xref: /titanic_44/usr/src/uts/common/rpc/rpcmod.c (revision 16ba0fac26f672b18447f2e17a2f91f14ed3ce40)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * Kernel RPC filtering module
32  */
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/stream.h>
37 #include <sys/stropts.h>
38 #include <sys/strsubr.h>
39 #include <sys/tihdr.h>
40 #include <sys/timod.h>
41 #include <sys/tiuser.h>
42 #include <sys/debug.h>
43 #include <sys/signal.h>
44 #include <sys/pcb.h>
45 #include <sys/user.h>
46 #include <sys/errno.h>
47 #include <sys/cred.h>
48 #include <sys/policy.h>
49 #include <sys/inline.h>
50 #include <sys/cmn_err.h>
51 #include <sys/kmem.h>
52 #include <sys/file.h>
53 #include <sys/sysmacros.h>
54 #include <sys/systm.h>
55 #include <sys/t_lock.h>
56 #include <sys/ddi.h>
57 #include <sys/vtrace.h>
58 #include <sys/callb.h>
59 #include <sys/strsun.h>
60 
61 #include <sys/strlog.h>
62 #include <rpc/rpc_com.h>
63 #include <inet/common.h>
64 #include <rpc/types.h>
65 #include <sys/time.h>
66 #include <rpc/xdr.h>
67 #include <rpc/auth.h>
68 #include <rpc/clnt.h>
69 #include <rpc/rpc_msg.h>
70 #include <rpc/clnt.h>
71 #include <rpc/svc.h>
72 #include <rpc/rpcsys.h>
73 #include <rpc/rpc_rdma.h>
74 
75 /*
76  * This is the loadable module wrapper.
77  */
78 #include <sys/conf.h>
79 #include <sys/modctl.h>
80 #include <sys/syscall.h>
81 
82 extern struct streamtab rpcinfo;
83 
84 static struct fmodsw fsw = {
85 	"rpcmod",
86 	&rpcinfo,
87 	D_NEW|D_MP,
88 };
89 
90 /*
91  * Module linkage information for the kernel.
92  */
93 
94 static struct modlstrmod modlstrmod = {
95 	&mod_strmodops, "rpc interface str mod", &fsw
96 };
97 
98 /*
99  * For the RPC system call.
100  */
101 static struct sysent rpcsysent = {
102 	2,
103 	SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
104 	rpcsys
105 };
106 
107 static struct modlsys modlsys = {
108 	&mod_syscallops,
109 	"RPC syscall",
110 	&rpcsysent
111 };
112 
113 #ifdef _SYSCALL32_IMPL
114 static struct modlsys modlsys32 = {
115 	&mod_syscallops32,
116 	"32-bit RPC syscall",
117 	&rpcsysent
118 };
119 #endif /* _SYSCALL32_IMPL */
120 
121 static struct modlinkage modlinkage = {
122 	MODREV_1,
123 	{
124 		&modlsys,
125 #ifdef _SYSCALL32_IMPL
126 		&modlsys32,
127 #endif
128 		&modlstrmod,
129 		NULL
130 	}
131 };
132 
133 int
134 _init(void)
135 {
136 	int error = 0;
137 	callb_id_t cid;
138 	int status;
139 
140 	svc_init();
141 	clnt_init();
142 	cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc");
143 
144 	if (error = mod_install(&modlinkage)) {
145 		/*
146 		 * Could not install module, cleanup previous
147 		 * initialization work.
148 		 */
149 		clnt_fini();
150 		if (cid != NULL)
151 			(void) callb_delete(cid);
152 
153 		return (error);
154 	}
155 
156 	/*
157 	 * Load up the RDMA plugins and initialize the stats. Even if the
158 	 * plugins loadup fails, but rpcmod was successfully installed the
159 	 * counters still get initialized.
160 	 */
161 	rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL);
162 	mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL);
163 
164 	cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL);
165 	mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL);
166 
167 	mt_kstat_init();
168 
169 	/*
170 	 * Get our identification into ldi.  This is used for loading
171 	 * other modules, e.g. rpcib.
172 	 */
173 	status = ldi_ident_from_mod(&modlinkage, &rpcmod_li);
174 	if (status != 0) {
175 		cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status);
176 		rpcmod_li = NULL;
177 	}
178 
179 	return (error);
180 }
181 
182 /*
183  * The unload entry point fails, because we advertise entry points into
184  * rpcmod from the rest of kRPC: rpcmod_release().
185  */
186 int
187 _fini(void)
188 {
189 	return (EBUSY);
190 }
191 
192 int
193 _info(struct modinfo *modinfop)
194 {
195 	return (mod_info(&modlinkage, modinfop));
196 }
197 
198 extern int nulldev();
199 
200 #define	RPCMOD_ID	2049
201 
202 int rmm_open(), rmm_close();
203 
204 /*
205  * To save instructions, since STREAMS ignores the return value
206  * from these functions, they are defined as void here. Kind of icky, but...
207  */
208 void rmm_rput(queue_t *, mblk_t *);
209 void rmm_wput(queue_t *, mblk_t *);
210 void rmm_rsrv(queue_t *);
211 void rmm_wsrv(queue_t *);
212 
213 int rpcmodopen(), rpcmodclose();
214 void rpcmodrput(), rpcmodwput();
215 void rpcmodrsrv(), rpcmodwsrv();
216 
217 static	void	rpcmodwput_other(queue_t *, mblk_t *);
218 static	int	mir_close(queue_t *q);
219 static	int	mir_open(queue_t *q, dev_t *devp, int flag, int sflag,
220 		    cred_t *credp);
221 static	void	mir_rput(queue_t *q, mblk_t *mp);
222 static	void	mir_rsrv(queue_t *q);
223 static	void	mir_wput(queue_t *q, mblk_t *mp);
224 static	void	mir_wsrv(queue_t *q);
225 
226 static struct module_info rpcmod_info =
227 	{RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024};
228 
229 /*
230  * Read side has no service procedure.
231  */
232 static struct qinit rpcmodrinit = {
233 	(int (*)())rmm_rput,
234 	(int (*)())rmm_rsrv,
235 	rmm_open,
236 	rmm_close,
237 	nulldev,
238 	&rpcmod_info,
239 	NULL
240 };
241 
242 /*
243  * The write put procedure is simply putnext to conserve stack space.
244  * The write service procedure is not used to queue data, but instead to
245  * synchronize with flow control.
246  */
247 static struct qinit rpcmodwinit = {
248 	(int (*)())rmm_wput,
249 	(int (*)())rmm_wsrv,
250 	rmm_open,
251 	rmm_close,
252 	nulldev,
253 	&rpcmod_info,
254 	NULL
255 };
256 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL };
257 
258 struct xprt_style_ops {
259 	int (*xo_open)();
260 	int (*xo_close)();
261 	void (*xo_wput)();
262 	void (*xo_wsrv)();
263 	void (*xo_rput)();
264 	void (*xo_rsrv)();
265 };
266 
267 static struct xprt_style_ops xprt_clts_ops = {
268 	rpcmodopen,
269 	rpcmodclose,
270 	rpcmodwput,
271 	rpcmodwsrv,
272 	rpcmodrput,
273 	NULL
274 };
275 
276 static struct xprt_style_ops xprt_cots_ops = {
277 	mir_open,
278 	mir_close,
279 	mir_wput,
280 	mir_wsrv,
281 	mir_rput,
282 	mir_rsrv
283 };
284 
285 /*
286  * Per rpcmod "slot" data structure. q->q_ptr points to one of these.
287  */
288 struct rpcm {
289 	void		*rm_krpc_cell;	/* Reserved for use by KRPC */
290 	struct		xprt_style_ops	*rm_ops;
291 	int		rm_type;	/* Client or server side stream */
292 #define	RM_CLOSING	0x1		/* somebody is trying to close slot */
293 	uint_t		rm_state;	/* state of the slot. see above */
294 	uint_t		rm_ref;		/* cnt of external references to slot */
295 	kmutex_t	rm_lock;	/* mutex protecting above fields */
296 	kcondvar_t	rm_cwait;	/* condition for closing */
297 	zoneid_t	rm_zoneid;	/* zone which pushed rpcmod */
298 };
299 
300 struct temp_slot {
301 	void *cell;
302 	struct xprt_style_ops *ops;
303 	int type;
304 	mblk_t *info_ack;
305 	kmutex_t lock;
306 	kcondvar_t wait;
307 };
308 
309 typedef struct mir_s {
310 	void	*mir_krpc_cell;	/* Reserved for KRPC use. This field */
311 					/* must be first in the structure. */
312 	struct xprt_style_ops	*rm_ops;
313 	int	mir_type;		/* Client or server side stream */
314 
315 	mblk_t	*mir_head_mp;		/* RPC msg in progress */
316 		/*
317 		 * mir_head_mp points the first mblk being collected in
318 		 * the current RPC message.  Record headers are removed
319 		 * before data is linked into mir_head_mp.
320 		 */
321 	mblk_t	*mir_tail_mp;		/* Last mblk in mir_head_mp */
322 		/*
323 		 * mir_tail_mp points to the last mblk in the message
324 		 * chain starting at mir_head_mp.  It is only valid
325 		 * if mir_head_mp is non-NULL and is used to add new
326 		 * data blocks to the end of chain quickly.
327 		 */
328 
329 	int32_t	mir_frag_len;		/* Bytes seen in the current frag */
330 		/*
331 		 * mir_frag_len starts at -4 for beginning of each fragment.
332 		 * When this length is negative, it indicates the number of
333 		 * bytes that rpcmod needs to complete the record marker
334 		 * header.  When it is positive or zero, it holds the number
335 		 * of bytes that have arrived for the current fragment and
336 		 * are held in mir_header_mp.
337 		 */
338 
339 	int32_t	mir_frag_header;
340 		/*
341 		 * Fragment header as collected for the current fragment.
342 		 * It holds the last-fragment indicator and the number
343 		 * of bytes in the fragment.
344 		 */
345 
346 	unsigned int
347 		mir_ordrel_pending : 1,	/* Sent T_ORDREL_REQ */
348 		mir_hold_inbound : 1,	/* Hold inbound messages on server */
349 					/* side until outbound flow control */
350 					/* is relieved. */
351 		mir_closing : 1,	/* The stream is being closed */
352 		mir_inrservice : 1,	/* data queued or rd srv proc running */
353 		mir_inwservice : 1,	/* data queued or wr srv proc running */
354 		mir_inwflushdata : 1,	/* flush M_DATAs when srv runs */
355 		/*
356 		 * On client streams, mir_clntreq is 0 or 1; it is set
357 		 * to 1 whenever a new request is sent out (mir_wput)
358 		 * and cleared when the timer fires (mir_timer).  If
359 		 * the timer fires with this value equal to 0, then the
360 		 * stream is considered idle and KRPC is notified.
361 		 */
362 		mir_clntreq : 1,
363 		/*
364 		 * On server streams, stop accepting messages
365 		 */
366 		mir_svc_no_more_msgs : 1,
367 		mir_listen_stream : 1,	/* listen end point */
368 		mir_unused : 1,	/* no longer used */
369 		mir_timer_call : 1,
370 		mir_junk_fill_thru_bit_31 : 21;
371 
372 	int	mir_setup_complete;	/* server has initialized everything */
373 	timeout_id_t mir_timer_id;	/* Timer for idle checks */
374 	clock_t	mir_idle_timeout;	/* Allowed idle time before shutdown */
375 		/*
376 		 * This value is copied from clnt_idle_timeout or
377 		 * svc_idle_timeout during the appropriate ioctl.
378 		 * Kept in milliseconds
379 		 */
380 	clock_t	mir_use_timestamp;	/* updated on client with each use */
381 		/*
382 		 * This value is set to lbolt
383 		 * every time a client stream sends or receives data.
384 		 * Even if the timer message arrives, we don't shutdown
385 		 * client unless:
386 		 *    lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp.
387 		 * This value is kept in HZ.
388 		 */
389 
390 	uint_t	*mir_max_msg_sizep;	/* Reference to sanity check size */
391 		/*
392 		 * This pointer is set to &clnt_max_msg_size or
393 		 * &svc_max_msg_size during the appropriate ioctl.
394 		 */
395 	zoneid_t mir_zoneid;	/* zone which pushed rpcmod */
396 	/* Server-side fields. */
397 	int	mir_ref_cnt;		/* Reference count: server side only */
398 					/* counts the number of references */
399 					/* that a kernel RPC server thread */
400 					/* (see svc_run()) has on this rpcmod */
401 					/* slot. Effectively, it is the */
402 					/* number * of unprocessed messages */
403 					/* that have been passed up to the */
404 					/* KRPC layer */
405 
406 	mblk_t	*mir_svc_pend_mp;	/* Pending T_ORDREL_IND or */
407 					/* T_DISCON_IND */
408 
409 	/*
410 	 * these fields are for both client and server, but for debugging,
411 	 * it is easier to have these last in the structure.
412 	 */
413 	kmutex_t	mir_mutex;	/* Mutex and condvar for close */
414 	kcondvar_t	mir_condvar;	/* synchronization. */
415 	kcondvar_t	mir_timer_cv;	/* Timer routine sync. */
416 } mir_t;
417 
418 void tmp_rput(queue_t *q, mblk_t *mp);
419 
420 struct xprt_style_ops tmpops = {
421 	NULL,
422 	NULL,
423 	putnext,
424 	NULL,
425 	tmp_rput,
426 	NULL
427 };
428 
429 void
430 tmp_rput(queue_t *q, mblk_t *mp)
431 {
432 	struct temp_slot *t = (struct temp_slot *)(q->q_ptr);
433 	struct T_info_ack *pptr;
434 
435 	switch (mp->b_datap->db_type) {
436 	case M_PCPROTO:
437 		pptr = (struct T_info_ack *)mp->b_rptr;
438 		switch (pptr->PRIM_type) {
439 		case T_INFO_ACK:
440 			mutex_enter(&t->lock);
441 			t->info_ack = mp;
442 			cv_signal(&t->wait);
443 			mutex_exit(&t->lock);
444 			return;
445 		default:
446 			break;
447 		}
448 	default:
449 		break;
450 	}
451 
452 	/*
453 	 * Not an info-ack, so free it. This is ok because we should
454 	 * not be receiving data until the open finishes: rpcmod
455 	 * is pushed well before the end-point is bound to an address.
456 	 */
457 	freemsg(mp);
458 }
459 
460 int
461 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
462 {
463 	mblk_t *bp;
464 	struct temp_slot ts, *t;
465 	struct T_info_ack *pptr;
466 	int error = 0;
467 
468 	ASSERT(q != NULL);
469 	/*
470 	 * Check for re-opens.
471 	 */
472 	if (q->q_ptr) {
473 		TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END,
474 		    "rpcmodopen_end:(%s)", "q->qptr");
475 		return (0);
476 	}
477 
478 	t = &ts;
479 	bzero(t, sizeof (*t));
480 	q->q_ptr = (void *)t;
481 	WR(q)->q_ptr = (void *)t;
482 
483 	/*
484 	 * Allocate the required messages upfront.
485 	 */
486 	if ((bp = allocb_cred(sizeof (struct T_info_req) +
487 	    sizeof (struct T_info_ack), crp, curproc->p_pid)) == NULL) {
488 		return (ENOBUFS);
489 	}
490 
491 	mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL);
492 	cv_init(&t->wait, NULL, CV_DEFAULT, NULL);
493 
494 	t->ops = &tmpops;
495 
496 	qprocson(q);
497 	bp->b_datap->db_type = M_PCPROTO;
498 	*(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ;
499 	bp->b_wptr += sizeof (struct T_info_req);
500 	putnext(WR(q), bp);
501 
502 	mutex_enter(&t->lock);
503 	while (t->info_ack == NULL) {
504 		if (cv_wait_sig(&t->wait, &t->lock) == 0) {
505 			error = EINTR;
506 			break;
507 		}
508 	}
509 	mutex_exit(&t->lock);
510 
511 	if (error)
512 		goto out;
513 
514 	pptr = (struct T_info_ack *)t->info_ack->b_rptr;
515 
516 	if (pptr->SERV_type == T_CLTS) {
517 		if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0)
518 			((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops;
519 	} else {
520 		if ((error = mir_open(q, devp, flag, sflag, crp)) == 0)
521 			((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops;
522 	}
523 
524 out:
525 	if (error)
526 		qprocsoff(q);
527 
528 	freemsg(t->info_ack);
529 	mutex_destroy(&t->lock);
530 	cv_destroy(&t->wait);
531 
532 	return (error);
533 }
534 
535 void
536 rmm_rput(queue_t *q, mblk_t  *mp)
537 {
538 	(*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp);
539 }
540 
541 void
542 rmm_rsrv(queue_t *q)
543 {
544 	(*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q);
545 }
546 
547 void
548 rmm_wput(queue_t *q, mblk_t *mp)
549 {
550 	(*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp);
551 }
552 
553 void
554 rmm_wsrv(queue_t *q)
555 {
556 	(*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q);
557 }
558 
559 int
560 rmm_close(queue_t *q, int flag, cred_t *crp)
561 {
562 	return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp));
563 }
564 
565 static void rpcmod_release(queue_t *, mblk_t *);
566 /*
567  * rpcmodopen -	open routine gets called when the module gets pushed
568  *		onto the stream.
569  */
570 /*ARGSUSED*/
571 int
572 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
573 {
574 	struct rpcm *rmp;
575 
576 	extern void (*rpc_rele)(queue_t *, mblk_t *);
577 
578 	TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:");
579 
580 	/*
581 	 * Initialize entry points to release a rpcmod slot (and an input
582 	 * message if supplied) and to send an output message to the module
583 	 * below rpcmod.
584 	 */
585 	if (rpc_rele == NULL)
586 		rpc_rele = rpcmod_release;
587 
588 	/*
589 	 * Only sufficiently privileged users can use this module, and it
590 	 * is assumed that they will use this module properly, and NOT send
591 	 * bulk data from downstream.
592 	 */
593 	if (secpolicy_rpcmod_open(crp) != 0)
594 		return (EPERM);
595 
596 	/*
597 	 * Allocate slot data structure.
598 	 */
599 	rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP);
600 
601 	mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL);
602 	cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL);
603 	rmp->rm_zoneid = rpc_zoneid();
604 	/*
605 	 * slot type will be set by kRPC client and server ioctl's
606 	 */
607 	rmp->rm_type = 0;
608 
609 	q->q_ptr = (void *)rmp;
610 	WR(q)->q_ptr = (void *)rmp;
611 
612 	TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end");
613 	return (0);
614 }
615 
616 /*
617  * rpcmodclose - This routine gets called when the module gets popped
618  * off of the stream.
619  */
620 /*ARGSUSED*/
621 int
622 rpcmodclose(queue_t *q, int flag, cred_t *crp)
623 {
624 	struct rpcm *rmp;
625 
626 	ASSERT(q != NULL);
627 	rmp = (struct rpcm *)q->q_ptr;
628 
629 	/*
630 	 * Mark our state as closing.
631 	 */
632 	mutex_enter(&rmp->rm_lock);
633 	rmp->rm_state |= RM_CLOSING;
634 
635 	/*
636 	 * Check and see if there are any messages on the queue.  If so, send
637 	 * the messages, regardless whether the downstream module is ready to
638 	 * accept data.
639 	 */
640 	if (rmp->rm_type == RPC_SERVER) {
641 		flushq(q, FLUSHDATA);
642 
643 		qenable(WR(q));
644 
645 		if (rmp->rm_ref) {
646 			mutex_exit(&rmp->rm_lock);
647 			/*
648 			 * call into SVC to clean the queue
649 			 */
650 			svc_queueclean(q);
651 			mutex_enter(&rmp->rm_lock);
652 
653 			/*
654 			 * Block while there are kRPC threads with a reference
655 			 * to this message.
656 			 */
657 			while (rmp->rm_ref)
658 				cv_wait(&rmp->rm_cwait, &rmp->rm_lock);
659 		}
660 
661 		mutex_exit(&rmp->rm_lock);
662 
663 		/*
664 		 * It is now safe to remove this queue from the stream. No kRPC
665 		 * threads have a reference to the stream, and none ever will,
666 		 * because RM_CLOSING is set.
667 		 */
668 		qprocsoff(q);
669 
670 		/* Notify kRPC that this stream is going away. */
671 		svc_queueclose(q);
672 	} else {
673 		mutex_exit(&rmp->rm_lock);
674 		qprocsoff(q);
675 	}
676 
677 	q->q_ptr = NULL;
678 	WR(q)->q_ptr = NULL;
679 	mutex_destroy(&rmp->rm_lock);
680 	cv_destroy(&rmp->rm_cwait);
681 	kmem_free(rmp, sizeof (*rmp));
682 	return (0);
683 }
684 
685 #ifdef	DEBUG
686 int	rpcmod_send_msg_up = 0;
687 int	rpcmod_send_uderr = 0;
688 int	rpcmod_send_dup = 0;
689 int	rpcmod_send_dup_cnt = 0;
690 #endif
691 
692 /*
693  * rpcmodrput -	Module read put procedure.  This is called from
694  *		the module, driver, or stream head downstream.
695  */
696 void
697 rpcmodrput(queue_t *q, mblk_t *mp)
698 {
699 	struct rpcm *rmp;
700 	union T_primitives *pptr;
701 	int hdrsz;
702 
703 	TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:");
704 
705 	ASSERT(q != NULL);
706 	rmp = (struct rpcm *)q->q_ptr;
707 
708 	if (rmp->rm_type == 0) {
709 		freemsg(mp);
710 		return;
711 	}
712 
713 #ifdef DEBUG
714 	if (rpcmod_send_msg_up > 0) {
715 		mblk_t *nmp = copymsg(mp);
716 		if (nmp) {
717 			putnext(q, nmp);
718 			rpcmod_send_msg_up--;
719 		}
720 	}
721 	if ((rpcmod_send_uderr > 0) && mp->b_datap->db_type == M_PROTO) {
722 		mblk_t *nmp;
723 		struct T_unitdata_ind *data;
724 		struct T_uderror_ind *ud;
725 		int d;
726 		data = (struct T_unitdata_ind *)mp->b_rptr;
727 		if (data->PRIM_type == T_UNITDATA_IND) {
728 			d = sizeof (*ud) - sizeof (*data);
729 			nmp = allocb(mp->b_wptr - mp->b_rptr + d, BPRI_HI);
730 			if (nmp) {
731 				ud = (struct T_uderror_ind *)nmp->b_rptr;
732 				ud->PRIM_type = T_UDERROR_IND;
733 				ud->DEST_length = data->SRC_length;
734 				ud->DEST_offset = data->SRC_offset + d;
735 				ud->OPT_length = data->OPT_length;
736 				ud->OPT_offset = data->OPT_offset + d;
737 				ud->ERROR_type = ENETDOWN;
738 				if (data->SRC_length) {
739 					bcopy(mp->b_rptr +
740 					    data->SRC_offset,
741 					    nmp->b_rptr +
742 					    ud->DEST_offset,
743 					    data->SRC_length);
744 				}
745 				if (data->OPT_length) {
746 					bcopy(mp->b_rptr +
747 					    data->OPT_offset,
748 					    nmp->b_rptr +
749 					    ud->OPT_offset,
750 					    data->OPT_length);
751 				}
752 				nmp->b_wptr += d;
753 				nmp->b_wptr += (mp->b_wptr - mp->b_rptr);
754 				nmp->b_datap->db_type = M_PROTO;
755 				putnext(q, nmp);
756 				rpcmod_send_uderr--;
757 			}
758 		}
759 	}
760 #endif
761 	switch (mp->b_datap->db_type) {
762 	default:
763 		putnext(q, mp);
764 		break;
765 
766 	case M_PROTO:
767 	case M_PCPROTO:
768 		ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t));
769 		pptr = (union T_primitives *)mp->b_rptr;
770 
771 		/*
772 		 * Forward this message to krpc if it is data.
773 		 */
774 		if (pptr->type == T_UNITDATA_IND) {
775 			mblk_t *nmp;
776 
777 		/*
778 		 * Check if the module is being popped.
779 		 */
780 			mutex_enter(&rmp->rm_lock);
781 			if (rmp->rm_state & RM_CLOSING) {
782 				mutex_exit(&rmp->rm_lock);
783 				putnext(q, mp);
784 				break;
785 			}
786 
787 			switch (rmp->rm_type) {
788 			case RPC_CLIENT:
789 				mutex_exit(&rmp->rm_lock);
790 				hdrsz = mp->b_wptr - mp->b_rptr;
791 
792 				/*
793 				 * Make sure the header is sane.
794 				 */
795 				if (hdrsz < TUNITDATAINDSZ ||
796 				    hdrsz < (pptr->unitdata_ind.OPT_length +
797 				    pptr->unitdata_ind.OPT_offset) ||
798 				    hdrsz < (pptr->unitdata_ind.SRC_length +
799 				    pptr->unitdata_ind.SRC_offset)) {
800 					freemsg(mp);
801 					return;
802 				}
803 
804 				/*
805 				 * Call clnt_clts_dispatch_notify, so that it
806 				 * can pass the message to the proper caller.
807 				 * Don't discard the header just yet since the
808 				 * client may need the sender's address.
809 				 */
810 				clnt_clts_dispatch_notify(mp, hdrsz,
811 				    rmp->rm_zoneid);
812 				return;
813 			case RPC_SERVER:
814 				/*
815 				 * rm_krpc_cell is exclusively used by the kRPC
816 				 * CLTS server
817 				 */
818 				if (rmp->rm_krpc_cell) {
819 #ifdef DEBUG
820 					/*
821 					 * Test duplicate request cache and
822 					 * rm_ref count handling by sending a
823 					 * duplicate every so often, if
824 					 * desired.
825 					 */
826 					if (rpcmod_send_dup &&
827 					    rpcmod_send_dup_cnt++ %
828 					    rpcmod_send_dup)
829 						nmp = copymsg(mp);
830 					else
831 						nmp = NULL;
832 #endif
833 					/*
834 					 * Raise the reference count on this
835 					 * module to prevent it from being
836 					 * popped before krpc generates the
837 					 * reply.
838 					 */
839 					rmp->rm_ref++;
840 					mutex_exit(&rmp->rm_lock);
841 
842 					/*
843 					 * Submit the message to krpc.
844 					 */
845 					svc_queuereq(q, mp);
846 #ifdef DEBUG
847 					/*
848 					 * Send duplicate if we created one.
849 					 */
850 					if (nmp) {
851 						mutex_enter(&rmp->rm_lock);
852 						rmp->rm_ref++;
853 						mutex_exit(&rmp->rm_lock);
854 						svc_queuereq(q, nmp);
855 					}
856 #endif
857 				} else {
858 					mutex_exit(&rmp->rm_lock);
859 					freemsg(mp);
860 				}
861 				return;
862 			default:
863 				mutex_exit(&rmp->rm_lock);
864 				freemsg(mp);
865 				return;
866 			} /* end switch(rmp->rm_type) */
867 		} else if (pptr->type == T_UDERROR_IND) {
868 			mutex_enter(&rmp->rm_lock);
869 			hdrsz = mp->b_wptr - mp->b_rptr;
870 
871 			/*
872 			 * Make sure the header is sane
873 			 */
874 			if (hdrsz < TUDERRORINDSZ ||
875 			    hdrsz < (pptr->uderror_ind.OPT_length +
876 			    pptr->uderror_ind.OPT_offset) ||
877 			    hdrsz < (pptr->uderror_ind.DEST_length +
878 			    pptr->uderror_ind.DEST_offset)) {
879 				mutex_exit(&rmp->rm_lock);
880 				freemsg(mp);
881 				return;
882 			}
883 
884 			/*
885 			 * In the case where a unit data error has been
886 			 * received, all we need to do is clear the message from
887 			 * the queue.
888 			 */
889 			mutex_exit(&rmp->rm_lock);
890 			freemsg(mp);
891 			RPCLOG(32, "rpcmodrput: unitdata error received at "
892 			    "%ld\n", gethrestime_sec());
893 			return;
894 		} /* end else if (pptr->type == T_UDERROR_IND) */
895 
896 		putnext(q, mp);
897 		break;
898 	} /* end switch (mp->b_datap->db_type) */
899 
900 	TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END,
901 	    "rpcmodrput_end:");
902 	/*
903 	 * Return codes are not looked at by the STREAMS framework.
904 	 */
905 }
906 
907 /*
908  * write put procedure
909  */
910 void
911 rpcmodwput(queue_t *q, mblk_t *mp)
912 {
913 	struct rpcm	*rmp;
914 
915 	ASSERT(q != NULL);
916 
917 	switch (mp->b_datap->db_type) {
918 		case M_PROTO:
919 		case M_PCPROTO:
920 			break;
921 		default:
922 			rpcmodwput_other(q, mp);
923 			return;
924 	}
925 
926 	/*
927 	 * Check to see if we can send the message downstream.
928 	 */
929 	if (canputnext(q)) {
930 		putnext(q, mp);
931 		return;
932 	}
933 
934 	rmp = (struct rpcm *)q->q_ptr;
935 	ASSERT(rmp != NULL);
936 
937 	/*
938 	 * The first canputnext failed.  Try again except this time with the
939 	 * lock held, so that we can check the state of the stream to see if
940 	 * it is closing.  If either of these conditions evaluate to true
941 	 * then send the meesage.
942 	 */
943 	mutex_enter(&rmp->rm_lock);
944 	if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) {
945 		mutex_exit(&rmp->rm_lock);
946 		putnext(q, mp);
947 	} else {
948 		/*
949 		 * canputnext failed again and the stream is not closing.
950 		 * Place the message on the queue and let the service
951 		 * procedure handle the message.
952 		 */
953 		mutex_exit(&rmp->rm_lock);
954 		(void) putq(q, mp);
955 	}
956 }
957 
958 static void
959 rpcmodwput_other(queue_t *q, mblk_t *mp)
960 {
961 	struct rpcm	*rmp;
962 	struct iocblk	*iocp;
963 
964 	rmp = (struct rpcm *)q->q_ptr;
965 	ASSERT(rmp != NULL);
966 
967 	switch (mp->b_datap->db_type) {
968 		case M_IOCTL:
969 			iocp = (struct iocblk *)mp->b_rptr;
970 			ASSERT(iocp != NULL);
971 			switch (iocp->ioc_cmd) {
972 				case RPC_CLIENT:
973 				case RPC_SERVER:
974 					mutex_enter(&rmp->rm_lock);
975 					rmp->rm_type = iocp->ioc_cmd;
976 					mutex_exit(&rmp->rm_lock);
977 					mp->b_datap->db_type = M_IOCACK;
978 					qreply(q, mp);
979 					return;
980 				default:
981 				/*
982 				 * pass the ioctl downstream and hope someone
983 				 * down there knows how to handle it.
984 				 */
985 					putnext(q, mp);
986 					return;
987 			}
988 		default:
989 			break;
990 	}
991 	/*
992 	 * This is something we definitely do not know how to handle, just
993 	 * pass the message downstream
994 	 */
995 	putnext(q, mp);
996 }
997 
998 /*
999  * Module write service procedure. This is called by downstream modules
1000  * for back enabling during flow control.
1001  */
1002 void
1003 rpcmodwsrv(queue_t *q)
1004 {
1005 	struct rpcm	*rmp;
1006 	mblk_t		*mp = NULL;
1007 
1008 	rmp = (struct rpcm *)q->q_ptr;
1009 	ASSERT(rmp != NULL);
1010 
1011 	/*
1012 	 * Get messages that may be queued and send them down stream
1013 	 */
1014 	while ((mp = getq(q)) != NULL) {
1015 		/*
1016 		 * Optimize the service procedure for the server-side, by
1017 		 * avoiding a call to canputnext().
1018 		 */
1019 		if (rmp->rm_type == RPC_SERVER || canputnext(q)) {
1020 			putnext(q, mp);
1021 			continue;
1022 		}
1023 		(void) putbq(q, mp);
1024 		return;
1025 	}
1026 }
1027 
1028 static void
1029 rpcmod_release(queue_t *q, mblk_t *bp)
1030 {
1031 	struct rpcm *rmp;
1032 
1033 	/*
1034 	 * For now, just free the message.
1035 	 */
1036 	if (bp)
1037 		freemsg(bp);
1038 	rmp = (struct rpcm *)q->q_ptr;
1039 
1040 	mutex_enter(&rmp->rm_lock);
1041 	rmp->rm_ref--;
1042 
1043 	if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) {
1044 		cv_broadcast(&rmp->rm_cwait);
1045 	}
1046 
1047 	mutex_exit(&rmp->rm_lock);
1048 }
1049 
1050 /*
1051  * This part of rpcmod is pushed on a connection-oriented transport for use
1052  * by RPC.  It serves to bypass the Stream head, implements
1053  * the record marking protocol, and dispatches incoming RPC messages.
1054  */
1055 
1056 /* Default idle timer values */
1057 #define	MIR_CLNT_IDLE_TIMEOUT	(5 * (60 * 1000L))	/* 5 minutes */
1058 #define	MIR_SVC_IDLE_TIMEOUT	(6 * (60 * 1000L))	/* 6 minutes */
1059 #define	MIR_SVC_ORDREL_TIMEOUT	(10 * (60 * 1000L))	/* 10 minutes */
1060 #define	MIR_LASTFRAG	0x80000000	/* Record marker */
1061 
1062 #define	DLEN(mp) (mp->b_cont ? msgdsize(mp) : (mp->b_wptr - mp->b_rptr))
1063 
1064 #define	MIR_SVC_QUIESCED(mir)	\
1065 	(mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0)
1066 
1067 #define	MIR_CLEAR_INRSRV(mir_ptr)	{	\
1068 	(mir_ptr)->mir_inrservice = 0;	\
1069 	if ((mir_ptr)->mir_type == RPC_SERVER &&	\
1070 		(mir_ptr)->mir_closing)	\
1071 		cv_signal(&(mir_ptr)->mir_condvar);	\
1072 }
1073 
1074 /*
1075  * Don't block service procedure (and mir_close) if
1076  * we are in the process of closing.
1077  */
1078 #define	MIR_WCANPUTNEXT(mir_ptr, write_q)	\
1079 	(canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1))
1080 
1081 static int	mir_clnt_dup_request(queue_t *q, mblk_t *mp);
1082 static void	mir_rput_proto(queue_t *q, mblk_t *mp);
1083 static int	mir_svc_policy_notify(queue_t *q, int event);
1084 static void	mir_svc_release(queue_t *wq, mblk_t *mp);
1085 static void	mir_svc_start(queue_t *wq);
1086 static void	mir_svc_idle_start(queue_t *, mir_t *);
1087 static void	mir_svc_idle_stop(queue_t *, mir_t *);
1088 static void	mir_svc_start_close(queue_t *, mir_t *);
1089 static void	mir_clnt_idle_do_stop(queue_t *);
1090 static void	mir_clnt_idle_stop(queue_t *, mir_t *);
1091 static void	mir_clnt_idle_start(queue_t *, mir_t *);
1092 static void	mir_wput(queue_t *q, mblk_t *mp);
1093 static void	mir_wput_other(queue_t *q, mblk_t *mp);
1094 static void	mir_wsrv(queue_t *q);
1095 static	void	mir_disconnect(queue_t *, mir_t *ir);
1096 static	int	mir_check_len(queue_t *, int32_t, mblk_t *);
1097 static	void	mir_timer(void *);
1098 
1099 extern void	(*mir_rele)(queue_t *, mblk_t *);
1100 extern void	(*mir_start)(queue_t *);
1101 extern void	(*clnt_stop_idle)(queue_t *);
1102 
1103 clock_t	clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT;
1104 clock_t	svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT;
1105 
1106 /*
1107  * Timeout for subsequent notifications of idle connection.  This is
1108  * typically used to clean up after a wedged orderly release.
1109  */
1110 clock_t	svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */
1111 
1112 extern	uint_t	*clnt_max_msg_sizep;
1113 extern	uint_t	*svc_max_msg_sizep;
1114 uint_t	clnt_max_msg_size = RPC_MAXDATASIZE;
1115 uint_t	svc_max_msg_size = RPC_MAXDATASIZE;
1116 uint_t	mir_krpc_cell_null;
1117 
1118 static void
1119 mir_timer_stop(mir_t *mir)
1120 {
1121 	timeout_id_t tid;
1122 
1123 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1124 
1125 	/*
1126 	 * Since the mir_mutex lock needs to be released to call
1127 	 * untimeout(), we need to make sure that no other thread
1128 	 * can start/stop the timer (changing mir_timer_id) during
1129 	 * that time.  The mir_timer_call bit and the mir_timer_cv
1130 	 * condition variable are used to synchronize this.  Setting
1131 	 * mir_timer_call also tells mir_timer() (refer to the comments
1132 	 * in mir_timer()) that it does not need to do anything.
1133 	 */
1134 	while (mir->mir_timer_call)
1135 		cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1136 	mir->mir_timer_call = B_TRUE;
1137 
1138 	if ((tid = mir->mir_timer_id) != 0) {
1139 		mir->mir_timer_id = 0;
1140 		mutex_exit(&mir->mir_mutex);
1141 		(void) untimeout(tid);
1142 		mutex_enter(&mir->mir_mutex);
1143 	}
1144 	mir->mir_timer_call = B_FALSE;
1145 	cv_broadcast(&mir->mir_timer_cv);
1146 }
1147 
1148 static void
1149 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl)
1150 {
1151 	timeout_id_t tid;
1152 
1153 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1154 
1155 	while (mir->mir_timer_call)
1156 		cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1157 	mir->mir_timer_call = B_TRUE;
1158 
1159 	if ((tid = mir->mir_timer_id) != 0) {
1160 		mutex_exit(&mir->mir_mutex);
1161 		(void) untimeout(tid);
1162 		mutex_enter(&mir->mir_mutex);
1163 	}
1164 	/* Only start the timer when it is not closing. */
1165 	if (!mir->mir_closing) {
1166 		mir->mir_timer_id = timeout(mir_timer, q,
1167 		    MSEC_TO_TICK(intrvl));
1168 	}
1169 	mir->mir_timer_call = B_FALSE;
1170 	cv_broadcast(&mir->mir_timer_cv);
1171 }
1172 
1173 static int
1174 mir_clnt_dup_request(queue_t *q, mblk_t *mp)
1175 {
1176 	mblk_t  *mp1;
1177 	uint32_t  new_xid;
1178 	uint32_t  old_xid;
1179 
1180 	ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex));
1181 	new_xid = BE32_TO_U32(&mp->b_rptr[4]);
1182 	/*
1183 	 * This loop is a bit tacky -- it walks the STREAMS list of
1184 	 * flow-controlled messages.
1185 	 */
1186 	if ((mp1 = q->q_first) != NULL) {
1187 		do {
1188 			old_xid = BE32_TO_U32(&mp1->b_rptr[4]);
1189 			if (new_xid == old_xid)
1190 				return (1);
1191 		} while ((mp1 = mp1->b_next) != NULL);
1192 	}
1193 	return (0);
1194 }
1195 
1196 static int
1197 mir_close(queue_t *q)
1198 {
1199 	mir_t	*mir = q->q_ptr;
1200 	mblk_t	*mp;
1201 	bool_t queue_cleaned = FALSE;
1202 
1203 	RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q);
1204 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1205 	mutex_enter(&mir->mir_mutex);
1206 	if ((mp = mir->mir_head_mp) != NULL) {
1207 		mir->mir_head_mp = NULL;
1208 		mir->mir_tail_mp = NULL;
1209 		freemsg(mp);
1210 	}
1211 	/*
1212 	 * Set mir_closing so we get notified when MIR_SVC_QUIESCED()
1213 	 * is TRUE.  And mir_timer_start() won't start the timer again.
1214 	 */
1215 	mir->mir_closing = B_TRUE;
1216 	mir_timer_stop(mir);
1217 
1218 	if (mir->mir_type == RPC_SERVER) {
1219 		flushq(q, FLUSHDATA);	/* Ditch anything waiting on read q */
1220 
1221 		/*
1222 		 * This will prevent more requests from arriving and
1223 		 * will force rpcmod to ignore flow control.
1224 		 */
1225 		mir_svc_start_close(WR(q), mir);
1226 
1227 		while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) {
1228 
1229 			if (mir->mir_ref_cnt && !mir->mir_inrservice &&
1230 			    (queue_cleaned == FALSE)) {
1231 				/*
1232 				 * call into SVC to clean the queue
1233 				 */
1234 				mutex_exit(&mir->mir_mutex);
1235 				svc_queueclean(q);
1236 				queue_cleaned = TRUE;
1237 				mutex_enter(&mir->mir_mutex);
1238 				continue;
1239 			}
1240 
1241 			/*
1242 			 * Bugid 1253810 - Force the write service
1243 			 * procedure to send its messages, regardless
1244 			 * whether the downstream  module is ready
1245 			 * to accept data.
1246 			 */
1247 			if (mir->mir_inwservice == 1)
1248 				qenable(WR(q));
1249 
1250 			cv_wait(&mir->mir_condvar, &mir->mir_mutex);
1251 		}
1252 
1253 		mutex_exit(&mir->mir_mutex);
1254 		qprocsoff(q);
1255 
1256 		/* Notify KRPC that this stream is going away. */
1257 		svc_queueclose(q);
1258 	} else {
1259 		mutex_exit(&mir->mir_mutex);
1260 		qprocsoff(q);
1261 	}
1262 
1263 	mutex_destroy(&mir->mir_mutex);
1264 	cv_destroy(&mir->mir_condvar);
1265 	cv_destroy(&mir->mir_timer_cv);
1266 	kmem_free(mir, sizeof (mir_t));
1267 	return (0);
1268 }
1269 
1270 /*
1271  * This is server side only (RPC_SERVER).
1272  *
1273  * Exit idle mode.
1274  */
1275 static void
1276 mir_svc_idle_stop(queue_t *q, mir_t *mir)
1277 {
1278 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1279 	ASSERT((q->q_flag & QREADR) == 0);
1280 	ASSERT(mir->mir_type == RPC_SERVER);
1281 	RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q);
1282 
1283 	mir_timer_stop(mir);
1284 }
1285 
1286 /*
1287  * This is server side only (RPC_SERVER).
1288  *
1289  * Start idle processing, which will include setting idle timer if the
1290  * stream is not being closed.
1291  */
1292 static void
1293 mir_svc_idle_start(queue_t *q, mir_t *mir)
1294 {
1295 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1296 	ASSERT((q->q_flag & QREADR) == 0);
1297 	ASSERT(mir->mir_type == RPC_SERVER);
1298 	RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q);
1299 
1300 	/*
1301 	 * Don't re-start idle timer if we are closing queues.
1302 	 */
1303 	if (mir->mir_closing) {
1304 		RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n",
1305 		    (void *)q);
1306 
1307 		/*
1308 		 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED()
1309 		 * is true.  When it is true, and we are in the process of
1310 		 * closing the stream, signal any thread waiting in
1311 		 * mir_close().
1312 		 */
1313 		if (mir->mir_inwservice == 0)
1314 			cv_signal(&mir->mir_condvar);
1315 
1316 	} else {
1317 		RPCLOG(16, "mir_svc_idle_start - reset %s timer\n",
1318 		    mir->mir_ordrel_pending ? "ordrel" : "normal");
1319 		/*
1320 		 * Normal condition, start the idle timer.  If an orderly
1321 		 * release has been sent, set the timeout to wait for the
1322 		 * client to close its side of the connection.  Otherwise,
1323 		 * use the normal idle timeout.
1324 		 */
1325 		mir_timer_start(q, mir, mir->mir_ordrel_pending ?
1326 		    svc_ordrel_timeout : mir->mir_idle_timeout);
1327 	}
1328 }
1329 
1330 /* ARGSUSED */
1331 static int
1332 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1333 {
1334 	mir_t	*mir;
1335 
1336 	RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q);
1337 	/* Set variables used directly by KRPC. */
1338 	if (!mir_rele)
1339 		mir_rele = mir_svc_release;
1340 	if (!mir_start)
1341 		mir_start = mir_svc_start;
1342 	if (!clnt_stop_idle)
1343 		clnt_stop_idle = mir_clnt_idle_do_stop;
1344 	if (!clnt_max_msg_sizep)
1345 		clnt_max_msg_sizep = &clnt_max_msg_size;
1346 	if (!svc_max_msg_sizep)
1347 		svc_max_msg_sizep = &svc_max_msg_size;
1348 
1349 	/* Allocate a zero'ed out mir structure for this stream. */
1350 	mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP);
1351 
1352 	/*
1353 	 * We set hold inbound here so that incoming messages will
1354 	 * be held on the read-side queue until the stream is completely
1355 	 * initialized with a RPC_CLIENT or RPC_SERVER ioctl.  During
1356 	 * the ioctl processing, the flag is cleared and any messages that
1357 	 * arrived between the open and the ioctl are delivered to KRPC.
1358 	 *
1359 	 * Early data should never arrive on a client stream since
1360 	 * servers only respond to our requests and we do not send any.
1361 	 * until after the stream is initialized.  Early data is
1362 	 * very common on a server stream where the client will start
1363 	 * sending data as soon as the connection is made (and this
1364 	 * is especially true with TCP where the protocol accepts the
1365 	 * connection before nfsd or KRPC is notified about it).
1366 	 */
1367 
1368 	mir->mir_hold_inbound = 1;
1369 
1370 	/*
1371 	 * Start the record marker looking for a 4-byte header.  When
1372 	 * this length is negative, it indicates that rpcmod is looking
1373 	 * for bytes to consume for the record marker header.  When it
1374 	 * is positive, it holds the number of bytes that have arrived
1375 	 * for the current fragment and are being held in mir_header_mp.
1376 	 */
1377 
1378 	mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1379 
1380 	mir->mir_zoneid = rpc_zoneid();
1381 	mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL);
1382 	cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL);
1383 	cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL);
1384 
1385 	q->q_ptr = (char *)mir;
1386 	WR(q)->q_ptr = (char *)mir;
1387 
1388 	/*
1389 	 * We noenable the read-side queue because we don't want it
1390 	 * automatically enabled by putq.  We enable it explicitly
1391 	 * in mir_wsrv when appropriate. (See additional comments on
1392 	 * flow control at the beginning of mir_rsrv.)
1393 	 */
1394 	noenable(q);
1395 
1396 	qprocson(q);
1397 	return (0);
1398 }
1399 
1400 /*
1401  * Read-side put routine for both the client and server side.  Does the
1402  * record marking for incoming RPC messages, and when complete, dispatches
1403  * the message to either the client or server.
1404  */
1405 static void
1406 mir_rput(queue_t *q, mblk_t *mp)
1407 {
1408 	int	excess;
1409 	int32_t	frag_len, frag_header;
1410 	mblk_t	*cont_mp, *head_mp, *tail_mp, *mp1;
1411 	mir_t	*mir = q->q_ptr;
1412 	boolean_t stop_timer = B_FALSE;
1413 
1414 	ASSERT(mir != NULL);
1415 
1416 	/*
1417 	 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER
1418 	 * with the corresponding ioctl, then don't accept
1419 	 * any inbound data.  This should never happen for streams
1420 	 * created by nfsd or client-side KRPC because they are careful
1421 	 * to set the mode of the stream before doing anything else.
1422 	 */
1423 	if (mir->mir_type == 0) {
1424 		freemsg(mp);
1425 		return;
1426 	}
1427 
1428 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1429 
1430 	switch (mp->b_datap->db_type) {
1431 	case M_DATA:
1432 		break;
1433 	case M_PROTO:
1434 	case M_PCPROTO:
1435 		if (MBLKL(mp) < sizeof (t_scalar_t)) {
1436 			RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n",
1437 			    (int)MBLKL(mp));
1438 			freemsg(mp);
1439 			return;
1440 		}
1441 		if (((union T_primitives *)mp->b_rptr)->type != T_DATA_IND) {
1442 			mir_rput_proto(q, mp);
1443 			return;
1444 		}
1445 
1446 		/* Throw away the T_DATA_IND block and continue with data. */
1447 		mp1 = mp;
1448 		mp = mp->b_cont;
1449 		freeb(mp1);
1450 		break;
1451 	case M_SETOPTS:
1452 		/*
1453 		 * If a module on the stream is trying set the Stream head's
1454 		 * high water mark, then set our hiwater to the requested
1455 		 * value.  We are the "stream head" for all inbound
1456 		 * data messages since messages are passed directly to KRPC.
1457 		 */
1458 		if (MBLKL(mp) >= sizeof (struct stroptions)) {
1459 			struct stroptions	*stropts;
1460 
1461 			stropts = (struct stroptions *)mp->b_rptr;
1462 			if ((stropts->so_flags & SO_HIWAT) &&
1463 			    !(stropts->so_flags & SO_BAND)) {
1464 				(void) strqset(q, QHIWAT, 0, stropts->so_hiwat);
1465 			}
1466 		}
1467 		putnext(q, mp);
1468 		return;
1469 	case M_FLUSH:
1470 		RPCLOG(32, "mir_rput: ignoring M_FLUSH %x ", *mp->b_rptr);
1471 		RPCLOG(32, "on q 0x%p\n", (void *)q);
1472 		putnext(q, mp);
1473 		return;
1474 	default:
1475 		putnext(q, mp);
1476 		return;
1477 	}
1478 
1479 	mutex_enter(&mir->mir_mutex);
1480 
1481 	/*
1482 	 * If this connection is closing, don't accept any new messages.
1483 	 */
1484 	if (mir->mir_svc_no_more_msgs) {
1485 		ASSERT(mir->mir_type == RPC_SERVER);
1486 		mutex_exit(&mir->mir_mutex);
1487 		freemsg(mp);
1488 		return;
1489 	}
1490 
1491 	/* Get local copies for quicker access. */
1492 	frag_len = mir->mir_frag_len;
1493 	frag_header = mir->mir_frag_header;
1494 	head_mp = mir->mir_head_mp;
1495 	tail_mp = mir->mir_tail_mp;
1496 
1497 	/* Loop, processing each message block in the mp chain separately. */
1498 	do {
1499 		cont_mp = mp->b_cont;
1500 		mp->b_cont = NULL;
1501 
1502 		/*
1503 		 * Drop zero-length mblks to prevent unbounded kernel memory
1504 		 * consumption.
1505 		 */
1506 		if (MBLKL(mp) == 0) {
1507 			freeb(mp);
1508 			continue;
1509 		}
1510 
1511 		/*
1512 		 * If frag_len is negative, we're still in the process of
1513 		 * building frag_header -- try to complete it with this mblk.
1514 		 */
1515 		while (frag_len < 0 && mp->b_rptr < mp->b_wptr) {
1516 			frag_len++;
1517 			frag_header <<= 8;
1518 			frag_header += *mp->b_rptr++;
1519 		}
1520 
1521 		if (MBLKL(mp) == 0 && frag_len < 0) {
1522 			/*
1523 			 * We consumed this mblk while trying to complete the
1524 			 * fragment header.  Free it and move on.
1525 			 */
1526 			freeb(mp);
1527 			continue;
1528 		}
1529 
1530 		ASSERT(frag_len >= 0);
1531 
1532 		/*
1533 		 * Now frag_header has the number of bytes in this fragment
1534 		 * and we're just waiting to collect them all.  Chain our
1535 		 * latest mblk onto the list and see if we now have enough
1536 		 * bytes to complete the fragment.
1537 		 */
1538 		if (head_mp == NULL) {
1539 			ASSERT(tail_mp == NULL);
1540 			head_mp = tail_mp = mp;
1541 		} else {
1542 			tail_mp->b_cont = mp;
1543 			tail_mp = mp;
1544 		}
1545 
1546 		frag_len += MBLKL(mp);
1547 		excess = frag_len - (frag_header & ~MIR_LASTFRAG);
1548 		if (excess < 0) {
1549 			/*
1550 			 * We still haven't received enough data to complete
1551 			 * the fragment, so continue on to the next mblk.
1552 			 */
1553 			continue;
1554 		}
1555 
1556 		/*
1557 		 * We've got a complete fragment.  If there are excess bytes,
1558 		 * then they're part of the next fragment's header (of either
1559 		 * this RPC message or the next RPC message).  Split that part
1560 		 * into its own mblk so that we can safely freeb() it when
1561 		 * building frag_header above.
1562 		 */
1563 		if (excess > 0) {
1564 			if ((mp1 = dupb(mp)) == NULL &&
1565 			    (mp1 = copyb(mp)) == NULL) {
1566 				freemsg(head_mp);
1567 				freemsg(cont_mp);
1568 				RPCLOG0(1, "mir_rput: dupb/copyb failed\n");
1569 				mir->mir_frag_header = 0;
1570 				mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1571 				mir->mir_head_mp = NULL;
1572 				mir->mir_tail_mp = NULL;
1573 				mir_disconnect(q, mir);	/* drops mir_mutex */
1574 				return;
1575 			}
1576 
1577 			/*
1578 			 * Relink the message chain so that the next mblk is
1579 			 * the next fragment header, followed by the rest of
1580 			 * the message chain.
1581 			 */
1582 			mp1->b_cont = cont_mp;
1583 			cont_mp = mp1;
1584 
1585 			/*
1586 			 * Data in the new mblk begins at the next fragment,
1587 			 * and data in the old mblk ends at the next fragment.
1588 			 */
1589 			mp1->b_rptr = mp1->b_wptr - excess;
1590 			mp->b_wptr -= excess;
1591 		}
1592 
1593 		/*
1594 		 * Reset frag_len and frag_header for the next fragment.
1595 		 */
1596 		frag_len = -(int32_t)sizeof (uint32_t);
1597 		if (!(frag_header & MIR_LASTFRAG)) {
1598 			/*
1599 			 * The current fragment is complete, but more
1600 			 * fragments need to be processed before we can
1601 			 * pass along the RPC message headed at head_mp.
1602 			 */
1603 			frag_header = 0;
1604 			continue;
1605 		}
1606 		frag_header = 0;
1607 
1608 		/*
1609 		 * We've got a complete RPC message; pass it to the
1610 		 * appropriate consumer.
1611 		 */
1612 		switch (mir->mir_type) {
1613 		case RPC_CLIENT:
1614 			if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) {
1615 				/*
1616 				 * Mark this stream as active.  This marker
1617 				 * is used in mir_timer().
1618 				 */
1619 				mir->mir_clntreq = 1;
1620 				mir->mir_use_timestamp = ddi_get_lbolt();
1621 			} else {
1622 				freemsg(head_mp);
1623 			}
1624 			break;
1625 
1626 		case RPC_SERVER:
1627 			/*
1628 			 * Check for flow control before passing the
1629 			 * message to KRPC.
1630 			 */
1631 			if (!mir->mir_hold_inbound) {
1632 				if (mir->mir_krpc_cell) {
1633 					/*
1634 					 * If the reference count is 0
1635 					 * (not including this request),
1636 					 * then the stream is transitioning
1637 					 * from idle to non-idle.  In this case,
1638 					 * we cancel the idle timer.
1639 					 */
1640 					if (mir->mir_ref_cnt++ == 0)
1641 						stop_timer = B_TRUE;
1642 					if (mir_check_len(q,
1643 					    (int32_t)msgdsize(mp), mp))
1644 						return;
1645 					svc_queuereq(q, head_mp); /* to KRPC */
1646 				} else {
1647 					/*
1648 					 * Count # of times this happens. Should
1649 					 * be never, but experience shows
1650 					 * otherwise.
1651 					 */
1652 					mir_krpc_cell_null++;
1653 					freemsg(head_mp);
1654 				}
1655 			} else {
1656 				/*
1657 				 * If the outbound side of the stream is
1658 				 * flow controlled, then hold this message
1659 				 * until client catches up. mir_hold_inbound
1660 				 * is set in mir_wput and cleared in mir_wsrv.
1661 				 */
1662 				(void) putq(q, head_mp);
1663 				mir->mir_inrservice = B_TRUE;
1664 			}
1665 			break;
1666 		default:
1667 			RPCLOG(1, "mir_rput: unknown mir_type %d\n",
1668 			    mir->mir_type);
1669 			freemsg(head_mp);
1670 			break;
1671 		}
1672 
1673 		/*
1674 		 * Reset the chain since we're starting on a new RPC message.
1675 		 */
1676 		head_mp = tail_mp = NULL;
1677 	} while ((mp = cont_mp) != NULL);
1678 
1679 	/*
1680 	 * Sanity check the message length; if it's too large mir_check_len()
1681 	 * will shutdown the connection, drop mir_mutex, and return non-zero.
1682 	 */
1683 	if (head_mp != NULL && mir->mir_setup_complete &&
1684 	    mir_check_len(q, frag_len, head_mp))
1685 		return;
1686 
1687 	/* Save our local copies back in the mir structure. */
1688 	mir->mir_frag_header = frag_header;
1689 	mir->mir_frag_len = frag_len;
1690 	mir->mir_head_mp = head_mp;
1691 	mir->mir_tail_mp = tail_mp;
1692 
1693 	/*
1694 	 * The timer is stopped after the whole message chain is processed.
1695 	 * The reason is that stopping the timer releases the mir_mutex
1696 	 * lock temporarily.  This means that the request can be serviced
1697 	 * while we are still processing the message chain.  This is not
1698 	 * good.  So we stop the timer here instead.
1699 	 *
1700 	 * Note that if the timer fires before we stop it, it will not
1701 	 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer()
1702 	 * will just return.
1703 	 */
1704 	if (stop_timer) {
1705 		RPCLOG(16, "mir_rput: stopping idle timer on 0x%p because "
1706 		    "ref cnt going to non zero\n", (void *)WR(q));
1707 		mir_svc_idle_stop(WR(q), mir);
1708 	}
1709 	mutex_exit(&mir->mir_mutex);
1710 }
1711 
1712 static void
1713 mir_rput_proto(queue_t *q, mblk_t *mp)
1714 {
1715 	mir_t	*mir = (mir_t *)q->q_ptr;
1716 	uint32_t	type;
1717 	uint32_t reason = 0;
1718 
1719 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1720 
1721 	type = ((union T_primitives *)mp->b_rptr)->type;
1722 	switch (mir->mir_type) {
1723 	case RPC_CLIENT:
1724 		switch (type) {
1725 		case T_DISCON_IND:
1726 			reason = ((struct T_discon_ind *)
1727 			    (mp->b_rptr))->DISCON_reason;
1728 			/*FALLTHROUGH*/
1729 		case T_ORDREL_IND:
1730 			mutex_enter(&mir->mir_mutex);
1731 			if (mir->mir_head_mp) {
1732 				freemsg(mir->mir_head_mp);
1733 				mir->mir_head_mp = (mblk_t *)0;
1734 				mir->mir_tail_mp = (mblk_t *)0;
1735 			}
1736 			/*
1737 			 * We are disconnecting, but not necessarily
1738 			 * closing. By not closing, we will fail to
1739 			 * pick up a possibly changed global timeout value,
1740 			 * unless we store it now.
1741 			 */
1742 			mir->mir_idle_timeout = clnt_idle_timeout;
1743 			mir_clnt_idle_stop(WR(q), mir);
1744 
1745 			/*
1746 			 * Even though we are unconnected, we still
1747 			 * leave the idle timer going on the client. The
1748 			 * reason for is that if we've disconnected due
1749 			 * to a server-side disconnect, reset, or connection
1750 			 * timeout, there is a possibility the client may
1751 			 * retry the RPC request. This retry needs to done on
1752 			 * the same bound address for the server to interpret
1753 			 * it as such. However, we don't want
1754 			 * to wait forever for that possibility. If the
1755 			 * end-point stays unconnected for mir_idle_timeout
1756 			 * units of time, then that is a signal to the
1757 			 * connection manager to give up waiting for the
1758 			 * application (eg. NFS) to send a retry.
1759 			 */
1760 			mir_clnt_idle_start(WR(q), mir);
1761 			mutex_exit(&mir->mir_mutex);
1762 			clnt_dispatch_notifyall(WR(q), type, reason);
1763 			freemsg(mp);
1764 			return;
1765 		case T_ERROR_ACK:
1766 		{
1767 			struct T_error_ack	*terror;
1768 
1769 			terror = (struct T_error_ack *)mp->b_rptr;
1770 			RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p",
1771 			    (void *)q);
1772 			RPCLOG(1, " ERROR_prim: %s,",
1773 			    rpc_tpiprim2name(terror->ERROR_prim));
1774 			RPCLOG(1, " TLI_error: %s,",
1775 			    rpc_tpierr2name(terror->TLI_error));
1776 			RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error);
1777 			if (terror->ERROR_prim == T_DISCON_REQ)  {
1778 				clnt_dispatch_notifyall(WR(q), type, reason);
1779 				freemsg(mp);
1780 				return;
1781 			} else {
1782 				if (clnt_dispatch_notifyconn(WR(q), mp))
1783 					return;
1784 			}
1785 			break;
1786 		}
1787 		case T_OK_ACK:
1788 		{
1789 			struct T_ok_ack	*tok = (struct T_ok_ack *)mp->b_rptr;
1790 
1791 			if (tok->CORRECT_prim == T_DISCON_REQ) {
1792 				clnt_dispatch_notifyall(WR(q), type, reason);
1793 				freemsg(mp);
1794 				return;
1795 			} else {
1796 				if (clnt_dispatch_notifyconn(WR(q), mp))
1797 					return;
1798 			}
1799 			break;
1800 		}
1801 		case T_CONN_CON:
1802 		case T_INFO_ACK:
1803 		case T_OPTMGMT_ACK:
1804 			if (clnt_dispatch_notifyconn(WR(q), mp))
1805 				return;
1806 			break;
1807 		case T_BIND_ACK:
1808 			break;
1809 		default:
1810 			RPCLOG(1, "mir_rput: unexpected message %d "
1811 			    "for KRPC client\n",
1812 			    ((union T_primitives *)mp->b_rptr)->type);
1813 			break;
1814 		}
1815 		break;
1816 
1817 	case RPC_SERVER:
1818 		switch (type) {
1819 		case T_BIND_ACK:
1820 		{
1821 			struct T_bind_ack	*tbind;
1822 
1823 			/*
1824 			 * If this is a listening stream, then shut
1825 			 * off the idle timer.
1826 			 */
1827 			tbind = (struct T_bind_ack *)mp->b_rptr;
1828 			if (tbind->CONIND_number > 0) {
1829 				mutex_enter(&mir->mir_mutex);
1830 				mir_svc_idle_stop(WR(q), mir);
1831 
1832 				/*
1833 				 * mark this as a listen endpoint
1834 				 * for special handling.
1835 				 */
1836 
1837 				mir->mir_listen_stream = 1;
1838 				mutex_exit(&mir->mir_mutex);
1839 			}
1840 			break;
1841 		}
1842 		case T_DISCON_IND:
1843 		case T_ORDREL_IND:
1844 			RPCLOG(16, "mir_rput_proto: got %s indication\n",
1845 			    type == T_DISCON_IND ? "disconnect"
1846 			    : "orderly release");
1847 
1848 			/*
1849 			 * For listen endpoint just pass
1850 			 * on the message.
1851 			 */
1852 
1853 			if (mir->mir_listen_stream)
1854 				break;
1855 
1856 			mutex_enter(&mir->mir_mutex);
1857 
1858 			/*
1859 			 * If client wants to break off connection, record
1860 			 * that fact.
1861 			 */
1862 			mir_svc_start_close(WR(q), mir);
1863 
1864 			/*
1865 			 * If we are idle, then send the orderly release
1866 			 * or disconnect indication to nfsd.
1867 			 */
1868 			if (MIR_SVC_QUIESCED(mir)) {
1869 				mutex_exit(&mir->mir_mutex);
1870 				break;
1871 			}
1872 
1873 			RPCLOG(16, "mir_rput_proto: not idle, so "
1874 			    "disconnect/ord rel indication not passed "
1875 			    "upstream on 0x%p\n", (void *)q);
1876 
1877 			/*
1878 			 * Hold the indication until we get idle
1879 			 * If there already is an indication stored,
1880 			 * replace it if the new one is a disconnect. The
1881 			 * reasoning is that disconnection takes less time
1882 			 * to process, and once a client decides to
1883 			 * disconnect, we should do that.
1884 			 */
1885 			if (mir->mir_svc_pend_mp) {
1886 				if (type == T_DISCON_IND) {
1887 					RPCLOG(16, "mir_rput_proto: replacing"
1888 					    " held disconnect/ord rel"
1889 					    " indication with disconnect on"
1890 					    " 0x%p\n", (void *)q);
1891 
1892 					freemsg(mir->mir_svc_pend_mp);
1893 					mir->mir_svc_pend_mp = mp;
1894 				} else {
1895 					RPCLOG(16, "mir_rput_proto: already "
1896 					    "held a disconnect/ord rel "
1897 					    "indication. freeing ord rel "
1898 					    "ind on 0x%p\n", (void *)q);
1899 					freemsg(mp);
1900 				}
1901 			} else
1902 				mir->mir_svc_pend_mp = mp;
1903 
1904 			mutex_exit(&mir->mir_mutex);
1905 			return;
1906 
1907 		default:
1908 			/* nfsd handles server-side non-data messages. */
1909 			break;
1910 		}
1911 		break;
1912 
1913 	default:
1914 		break;
1915 	}
1916 
1917 	putnext(q, mp);
1918 }
1919 
1920 /*
1921  * The server-side read queues are used to hold inbound messages while
1922  * outbound flow control is exerted.  When outbound flow control is
1923  * relieved, mir_wsrv qenables the read-side queue.  Read-side queues
1924  * are not enabled by STREAMS and are explicitly noenable'ed in mir_open.
1925  *
1926  * For the server side,  we have two types of messages queued. The first type
1927  * are messages that are ready to be XDR decoded and and then sent to the
1928  * RPC program's dispatch routine. The second type are "raw" messages that
1929  * haven't been processed, i.e. assembled from rpc record fragements into
1930  * full requests. The only time we will see the second type of message
1931  * queued is if we have a memory allocation failure while processing a
1932  * a raw message. The field mir_first_non_processed_mblk will mark the
1933  * first such raw message. So the flow for server side is:
1934  *
1935  *	- send processed queued messages to kRPC until we run out or find
1936  *	  one that needs additional processing because we were short on memory
1937  *	  earlier
1938  *	- process a message that was deferred because of lack of
1939  *	  memory
1940  *	- continue processing messages until the queue empties or we
1941  *	  have to stop because of lack of memory
1942  *	- during each of the above phase, if the queue is empty and
1943  *	  there are no pending messages that were passed to the RPC
1944  *	  layer, send upstream the pending disconnect/ordrel indication if
1945  *	  there is one
1946  *
1947  * The read-side queue is also enabled by a bufcall callback if dupmsg
1948  * fails in mir_rput.
1949  */
1950 static void
1951 mir_rsrv(queue_t *q)
1952 {
1953 	mir_t	*mir;
1954 	mblk_t	*mp;
1955 	mblk_t	*cmp = NULL;
1956 	boolean_t stop_timer = B_FALSE;
1957 
1958 	mir = (mir_t *)q->q_ptr;
1959 	mutex_enter(&mir->mir_mutex);
1960 
1961 	mp = NULL;
1962 	switch (mir->mir_type) {
1963 	case RPC_SERVER:
1964 		if (mir->mir_ref_cnt == 0)
1965 			mir->mir_hold_inbound = 0;
1966 		if (mir->mir_hold_inbound) {
1967 
1968 			ASSERT(cmp == NULL);
1969 			if (q->q_first == NULL) {
1970 
1971 				MIR_CLEAR_INRSRV(mir);
1972 
1973 				if (MIR_SVC_QUIESCED(mir)) {
1974 					cmp = mir->mir_svc_pend_mp;
1975 					mir->mir_svc_pend_mp = NULL;
1976 				}
1977 			}
1978 
1979 			mutex_exit(&mir->mir_mutex);
1980 
1981 			if (cmp != NULL) {
1982 				RPCLOG(16, "mir_rsrv: line %d: sending a held "
1983 				    "disconnect/ord rel indication upstream\n",
1984 				    __LINE__);
1985 				putnext(q, cmp);
1986 			}
1987 
1988 			return;
1989 		}
1990 		while (mp = getq(q)) {
1991 			if (mir->mir_krpc_cell &&
1992 			    (mir->mir_svc_no_more_msgs == 0)) {
1993 				/*
1994 				 * If we were idle, turn off idle timer since
1995 				 * we aren't idle any more.
1996 				 */
1997 				if (mir->mir_ref_cnt++ == 0)
1998 					stop_timer = B_TRUE;
1999 				if (mir_check_len(q,
2000 				    (int32_t)msgdsize(mp), mp))
2001 					return;
2002 				svc_queuereq(q, mp);
2003 			} else {
2004 				/*
2005 				 * Count # of times this happens. Should be
2006 				 * never, but experience shows otherwise.
2007 				 */
2008 				if (mir->mir_krpc_cell == NULL)
2009 					mir_krpc_cell_null++;
2010 				freemsg(mp);
2011 			}
2012 		}
2013 		break;
2014 	case RPC_CLIENT:
2015 		break;
2016 	default:
2017 		RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type);
2018 
2019 		if (q->q_first == NULL)
2020 			MIR_CLEAR_INRSRV(mir);
2021 
2022 		mutex_exit(&mir->mir_mutex);
2023 
2024 		return;
2025 	}
2026 
2027 	/*
2028 	 * The timer is stopped after all the messages are processed.
2029 	 * The reason is that stopping the timer releases the mir_mutex
2030 	 * lock temporarily.  This means that the request can be serviced
2031 	 * while we are still processing the message queue.  This is not
2032 	 * good.  So we stop the timer here instead.
2033 	 */
2034 	if (stop_timer)  {
2035 		RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref "
2036 		    "cnt going to non zero\n", (void *)WR(q));
2037 		mir_svc_idle_stop(WR(q), mir);
2038 	}
2039 
2040 	if (q->q_first == NULL) {
2041 
2042 		MIR_CLEAR_INRSRV(mir);
2043 
2044 		ASSERT(cmp == NULL);
2045 		if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) {
2046 			cmp = mir->mir_svc_pend_mp;
2047 			mir->mir_svc_pend_mp = NULL;
2048 		}
2049 
2050 		mutex_exit(&mir->mir_mutex);
2051 
2052 		if (cmp != NULL) {
2053 			RPCLOG(16, "mir_rsrv: line %d: sending a held "
2054 			    "disconnect/ord rel indication upstream\n",
2055 			    __LINE__);
2056 			putnext(q, cmp);
2057 		}
2058 
2059 		return;
2060 	}
2061 	mutex_exit(&mir->mir_mutex);
2062 }
2063 
2064 static int mir_svc_policy_fails;
2065 
2066 /*
2067  * Called to send an event code to nfsd/lockd so that it initiates
2068  * connection close.
2069  */
2070 static int
2071 mir_svc_policy_notify(queue_t *q, int event)
2072 {
2073 	mblk_t	*mp;
2074 #ifdef DEBUG
2075 	mir_t *mir = (mir_t *)q->q_ptr;
2076 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2077 #endif
2078 	ASSERT(q->q_flag & QREADR);
2079 
2080 	/*
2081 	 * Create an M_DATA message with the event code and pass it to the
2082 	 * Stream head (nfsd or whoever created the stream will consume it).
2083 	 */
2084 	mp = allocb(sizeof (int), BPRI_HI);
2085 
2086 	if (!mp) {
2087 
2088 		mir_svc_policy_fails++;
2089 		RPCLOG(16, "mir_svc_policy_notify: could not allocate event "
2090 		    "%d\n", event);
2091 		return (ENOMEM);
2092 	}
2093 
2094 	U32_TO_BE32(event, mp->b_rptr);
2095 	mp->b_wptr = mp->b_rptr + sizeof (int);
2096 	putnext(q, mp);
2097 	return (0);
2098 }
2099 
2100 /*
2101  * Server side: start the close phase. We want to get this rpcmod slot in an
2102  * idle state before mir_close() is called.
2103  */
2104 static void
2105 mir_svc_start_close(queue_t *wq, mir_t *mir)
2106 {
2107 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2108 	ASSERT((wq->q_flag & QREADR) == 0);
2109 	ASSERT(mir->mir_type == RPC_SERVER);
2110 
2111 
2112 	/*
2113 	 * Do not accept any more messages.
2114 	 */
2115 	mir->mir_svc_no_more_msgs = 1;
2116 
2117 	/*
2118 	 * Next two statements will make the read service procedure invoke
2119 	 * svc_queuereq() on everything stuck in the streams read queue.
2120 	 * It's not necessary because enabling the write queue will
2121 	 * have the same effect, but why not speed the process along?
2122 	 */
2123 	mir->mir_hold_inbound = 0;
2124 	qenable(RD(wq));
2125 
2126 	/*
2127 	 * Meanwhile force the write service procedure to send the
2128 	 * responses downstream, regardless of flow control.
2129 	 */
2130 	qenable(wq);
2131 }
2132 
2133 /*
2134  * This routine is called directly by KRPC after a request is completed,
2135  * whether a reply was sent or the request was dropped.
2136  */
2137 static void
2138 mir_svc_release(queue_t *wq, mblk_t *mp)
2139 {
2140 	mir_t   *mir = (mir_t *)wq->q_ptr;
2141 	mblk_t	*cmp = NULL;
2142 
2143 	ASSERT((wq->q_flag & QREADR) == 0);
2144 	if (mp)
2145 		freemsg(mp);
2146 
2147 	mutex_enter(&mir->mir_mutex);
2148 
2149 	/*
2150 	 * Start idle processing if this is the last reference.
2151 	 */
2152 	if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) {
2153 		cmp = mir->mir_svc_pend_mp;
2154 		mir->mir_svc_pend_mp = NULL;
2155 	}
2156 
2157 	if (cmp) {
2158 		RPCLOG(16, "mir_svc_release: sending a held "
2159 		    "disconnect/ord rel indication upstream on queue 0x%p\n",
2160 		    (void *)RD(wq));
2161 
2162 		mutex_exit(&mir->mir_mutex);
2163 
2164 		putnext(RD(wq), cmp);
2165 
2166 		mutex_enter(&mir->mir_mutex);
2167 	}
2168 
2169 	/*
2170 	 * Start idle processing if this is the last reference.
2171 	 */
2172 	if (mir->mir_ref_cnt == 1 && mir->mir_inrservice == 0) {
2173 
2174 		RPCLOG(16, "mir_svc_release starting idle timer on 0x%p "
2175 		    "because ref cnt is zero\n", (void *) wq);
2176 
2177 		mir_svc_idle_start(wq, mir);
2178 	}
2179 
2180 	mir->mir_ref_cnt--;
2181 	ASSERT(mir->mir_ref_cnt >= 0);
2182 
2183 	/*
2184 	 * Wake up the thread waiting to close.
2185 	 */
2186 
2187 	if ((mir->mir_ref_cnt == 0) && mir->mir_closing)
2188 		cv_signal(&mir->mir_condvar);
2189 
2190 	mutex_exit(&mir->mir_mutex);
2191 }
2192 
2193 /*
2194  * This routine is called by server-side KRPC when it is ready to
2195  * handle inbound messages on the stream.
2196  */
2197 static void
2198 mir_svc_start(queue_t *wq)
2199 {
2200 	mir_t   *mir = (mir_t *)wq->q_ptr;
2201 
2202 	/*
2203 	 * no longer need to take the mir_mutex because the
2204 	 * mir_setup_complete field has been moved out of
2205 	 * the binary field protected by the mir_mutex.
2206 	 */
2207 
2208 	mir->mir_setup_complete = 1;
2209 	qenable(RD(wq));
2210 }
2211 
2212 /*
2213  * client side wrapper for stopping timer with normal idle timeout.
2214  */
2215 static void
2216 mir_clnt_idle_stop(queue_t *wq, mir_t *mir)
2217 {
2218 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2219 	ASSERT((wq->q_flag & QREADR) == 0);
2220 	ASSERT(mir->mir_type == RPC_CLIENT);
2221 
2222 	mir_timer_stop(mir);
2223 }
2224 
2225 /*
2226  * client side wrapper for stopping timer with normal idle timeout.
2227  */
2228 static void
2229 mir_clnt_idle_start(queue_t *wq, mir_t *mir)
2230 {
2231 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2232 	ASSERT((wq->q_flag & QREADR) == 0);
2233 	ASSERT(mir->mir_type == RPC_CLIENT);
2234 
2235 	mir_timer_start(wq, mir, mir->mir_idle_timeout);
2236 }
2237 
2238 /*
2239  * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on
2240  * end-points that aren't connected.
2241  */
2242 static void
2243 mir_clnt_idle_do_stop(queue_t *wq)
2244 {
2245 	mir_t   *mir = (mir_t *)wq->q_ptr;
2246 
2247 	RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq);
2248 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2249 	mutex_enter(&mir->mir_mutex);
2250 	mir_clnt_idle_stop(wq, mir);
2251 	mutex_exit(&mir->mir_mutex);
2252 }
2253 
2254 /*
2255  * Timer handler.  It handles idle timeout and memory shortage problem.
2256  */
2257 static void
2258 mir_timer(void *arg)
2259 {
2260 	queue_t *wq = (queue_t *)arg;
2261 	mir_t *mir = (mir_t *)wq->q_ptr;
2262 	boolean_t notify;
2263 	clock_t now;
2264 
2265 	mutex_enter(&mir->mir_mutex);
2266 
2267 	/*
2268 	 * mir_timer_call is set only when either mir_timer_[start|stop]
2269 	 * is progressing.  And mir_timer() can only be run while they
2270 	 * are progressing if the timer is being stopped.  So just
2271 	 * return.
2272 	 */
2273 	if (mir->mir_timer_call) {
2274 		mutex_exit(&mir->mir_mutex);
2275 		return;
2276 	}
2277 	mir->mir_timer_id = 0;
2278 
2279 	switch (mir->mir_type) {
2280 	case RPC_CLIENT:
2281 
2282 		/*
2283 		 * For clients, the timer fires at clnt_idle_timeout
2284 		 * intervals.  If the activity marker (mir_clntreq) is
2285 		 * zero, then the stream has been idle since the last
2286 		 * timer event and we notify KRPC.  If mir_clntreq is
2287 		 * non-zero, then the stream is active and we just
2288 		 * restart the timer for another interval.  mir_clntreq
2289 		 * is set to 1 in mir_wput for every request passed
2290 		 * downstream.
2291 		 *
2292 		 * If this was a memory shortage timer reset the idle
2293 		 * timeout regardless; the mir_clntreq will not be a
2294 		 * valid indicator.
2295 		 *
2296 		 * The timer is initially started in mir_wput during
2297 		 * RPC_CLIENT ioctl processing.
2298 		 *
2299 		 * The timer interval can be changed for individual
2300 		 * streams with the ND variable "mir_idle_timeout".
2301 		 */
2302 		now = ddi_get_lbolt();
2303 		if (mir->mir_clntreq > 0 && mir->mir_use_timestamp +
2304 		    MSEC_TO_TICK(mir->mir_idle_timeout) - now >= 0) {
2305 			clock_t tout;
2306 
2307 			tout = mir->mir_idle_timeout -
2308 			    TICK_TO_MSEC(now - mir->mir_use_timestamp);
2309 			if (tout < 0)
2310 				tout = 1000;
2311 #if 0
2312 			printf("mir_timer[%d < %d + %d]: reset client timer "
2313 			    "to %d (ms)\n", TICK_TO_MSEC(now),
2314 			    TICK_TO_MSEC(mir->mir_use_timestamp),
2315 			    mir->mir_idle_timeout, tout);
2316 #endif
2317 			mir->mir_clntreq = 0;
2318 			mir_timer_start(wq, mir, tout);
2319 			mutex_exit(&mir->mir_mutex);
2320 			return;
2321 		}
2322 #if 0
2323 printf("mir_timer[%d]: doing client timeout\n", now / hz);
2324 #endif
2325 		/*
2326 		 * We are disconnecting, but not necessarily
2327 		 * closing. By not closing, we will fail to
2328 		 * pick up a possibly changed global timeout value,
2329 		 * unless we store it now.
2330 		 */
2331 		mir->mir_idle_timeout = clnt_idle_timeout;
2332 		mir_clnt_idle_start(wq, mir);
2333 
2334 		mutex_exit(&mir->mir_mutex);
2335 		/*
2336 		 * We pass T_ORDREL_REQ as an integer value
2337 		 * to KRPC as the indication that the stream
2338 		 * is idle.  This is not a T_ORDREL_REQ message,
2339 		 * it is just a convenient value since we call
2340 		 * the same KRPC routine for T_ORDREL_INDs and
2341 		 * T_DISCON_INDs.
2342 		 */
2343 		clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0);
2344 		return;
2345 
2346 	case RPC_SERVER:
2347 
2348 		/*
2349 		 * For servers, the timer is only running when the stream
2350 		 * is really idle or memory is short.  The timer is started
2351 		 * by mir_wput when mir_type is set to RPC_SERVER and
2352 		 * by mir_svc_idle_start whenever the stream goes idle
2353 		 * (mir_ref_cnt == 0).  The timer is cancelled in
2354 		 * mir_rput whenever a new inbound request is passed to KRPC
2355 		 * and the stream was previously idle.
2356 		 *
2357 		 * The timer interval can be changed for individual
2358 		 * streams with the ND variable "mir_idle_timeout".
2359 		 *
2360 		 * If the stream is not idle do nothing.
2361 		 */
2362 		if (!MIR_SVC_QUIESCED(mir)) {
2363 			mutex_exit(&mir->mir_mutex);
2364 			return;
2365 		}
2366 
2367 		notify = !mir->mir_inrservice;
2368 		mutex_exit(&mir->mir_mutex);
2369 
2370 		/*
2371 		 * If there is no packet queued up in read queue, the stream
2372 		 * is really idle so notify nfsd to close it.
2373 		 */
2374 		if (notify) {
2375 			RPCLOG(16, "mir_timer: telling stream head listener "
2376 			    "to close stream (0x%p)\n", (void *) RD(wq));
2377 			(void) mir_svc_policy_notify(RD(wq), 1);
2378 		}
2379 		return;
2380 	default:
2381 		RPCLOG(1, "mir_timer: unexpected mir_type %d\n",
2382 		    mir->mir_type);
2383 		mutex_exit(&mir->mir_mutex);
2384 		return;
2385 	}
2386 }
2387 
2388 /*
2389  * Called by the RPC package to send either a call or a return, or a
2390  * transport connection request.  Adds the record marking header.
2391  */
2392 static void
2393 mir_wput(queue_t *q, mblk_t *mp)
2394 {
2395 	uint_t	frag_header;
2396 	mir_t	*mir = (mir_t *)q->q_ptr;
2397 	uchar_t	*rptr = mp->b_rptr;
2398 
2399 	if (!mir) {
2400 		freemsg(mp);
2401 		return;
2402 	}
2403 
2404 	if (mp->b_datap->db_type != M_DATA) {
2405 		mir_wput_other(q, mp);
2406 		return;
2407 	}
2408 
2409 	if (mir->mir_ordrel_pending == 1) {
2410 		freemsg(mp);
2411 		RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n",
2412 		    (void *)q);
2413 		return;
2414 	}
2415 
2416 	frag_header = (uint_t)DLEN(mp);
2417 	frag_header |= MIR_LASTFRAG;
2418 
2419 	/* Stick in the 4 byte record marking header. */
2420 	if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) ||
2421 	    !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
2422 		/*
2423 		 * Since we know that M_DATA messages are created exclusively
2424 		 * by KRPC, we expect that KRPC will leave room for our header
2425 		 * and 4 byte align which is normal for XDR.
2426 		 * If KRPC (or someone else) does not cooperate, then we
2427 		 * just throw away the message.
2428 		 */
2429 		RPCLOG(1, "mir_wput: KRPC did not leave space for record "
2430 		    "fragment header (%d bytes left)\n",
2431 		    (int)(rptr - mp->b_datap->db_base));
2432 		freemsg(mp);
2433 		return;
2434 	}
2435 	rptr -= sizeof (uint32_t);
2436 	*(uint32_t *)rptr = htonl(frag_header);
2437 	mp->b_rptr = rptr;
2438 
2439 	mutex_enter(&mir->mir_mutex);
2440 	if (mir->mir_type == RPC_CLIENT) {
2441 		/*
2442 		 * For the client, set mir_clntreq to indicate that the
2443 		 * connection is active.
2444 		 */
2445 		mir->mir_clntreq = 1;
2446 		mir->mir_use_timestamp = ddi_get_lbolt();
2447 	}
2448 
2449 	/*
2450 	 * If we haven't already queued some data and the downstream module
2451 	 * can accept more data, send it on, otherwise we queue the message
2452 	 * and take other actions depending on mir_type.
2453 	 */
2454 	if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) {
2455 		mutex_exit(&mir->mir_mutex);
2456 
2457 		/*
2458 		 * Now we pass the RPC message downstream.
2459 		 */
2460 		putnext(q, mp);
2461 		return;
2462 	}
2463 
2464 	switch (mir->mir_type) {
2465 	case RPC_CLIENT:
2466 		/*
2467 		 * Check for a previous duplicate request on the
2468 		 * queue.  If there is one, then we throw away
2469 		 * the current message and let the previous one
2470 		 * go through.  If we can't find a duplicate, then
2471 		 * send this one.  This tap dance is an effort
2472 		 * to reduce traffic and processing requirements
2473 		 * under load conditions.
2474 		 */
2475 		if (mir_clnt_dup_request(q, mp)) {
2476 			mutex_exit(&mir->mir_mutex);
2477 			freemsg(mp);
2478 			return;
2479 		}
2480 		break;
2481 	case RPC_SERVER:
2482 		/*
2483 		 * Set mir_hold_inbound so that new inbound RPC
2484 		 * messages will be held until the client catches
2485 		 * up on the earlier replies.  This flag is cleared
2486 		 * in mir_wsrv after flow control is relieved;
2487 		 * the read-side queue is also enabled at that time.
2488 		 */
2489 		mir->mir_hold_inbound = 1;
2490 		break;
2491 	default:
2492 		RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type);
2493 		break;
2494 	}
2495 	mir->mir_inwservice = 1;
2496 	(void) putq(q, mp);
2497 	mutex_exit(&mir->mir_mutex);
2498 }
2499 
2500 static void
2501 mir_wput_other(queue_t *q, mblk_t *mp)
2502 {
2503 	mir_t	*mir = (mir_t *)q->q_ptr;
2504 	struct iocblk	*iocp;
2505 	uchar_t	*rptr = mp->b_rptr;
2506 	bool_t	flush_in_svc = FALSE;
2507 
2508 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2509 	switch (mp->b_datap->db_type) {
2510 	case M_IOCTL:
2511 		iocp = (struct iocblk *)rptr;
2512 		switch (iocp->ioc_cmd) {
2513 		case RPC_CLIENT:
2514 			mutex_enter(&mir->mir_mutex);
2515 			if (mir->mir_type != 0 &&
2516 			    mir->mir_type != iocp->ioc_cmd) {
2517 ioc_eperm:
2518 				mutex_exit(&mir->mir_mutex);
2519 				iocp->ioc_error = EPERM;
2520 				iocp->ioc_count = 0;
2521 				mp->b_datap->db_type = M_IOCACK;
2522 				qreply(q, mp);
2523 				return;
2524 			}
2525 
2526 			mir->mir_type = iocp->ioc_cmd;
2527 
2528 			/*
2529 			 * Clear mir_hold_inbound which was set to 1 by
2530 			 * mir_open.  This flag is not used on client
2531 			 * streams.
2532 			 */
2533 			mir->mir_hold_inbound = 0;
2534 			mir->mir_max_msg_sizep = &clnt_max_msg_size;
2535 
2536 			/*
2537 			 * Start the idle timer.  See mir_timer() for more
2538 			 * information on how client timers work.
2539 			 */
2540 			mir->mir_idle_timeout = clnt_idle_timeout;
2541 			mir_clnt_idle_start(q, mir);
2542 			mutex_exit(&mir->mir_mutex);
2543 
2544 			mp->b_datap->db_type = M_IOCACK;
2545 			qreply(q, mp);
2546 			return;
2547 		case RPC_SERVER:
2548 			mutex_enter(&mir->mir_mutex);
2549 			if (mir->mir_type != 0 &&
2550 			    mir->mir_type != iocp->ioc_cmd)
2551 				goto ioc_eperm;
2552 
2553 			/*
2554 			 * We don't clear mir_hold_inbound here because
2555 			 * mir_hold_inbound is used in the flow control
2556 			 * model. If we cleared it here, then we'd commit
2557 			 * a small violation to the model where the transport
2558 			 * might immediately block downstream flow.
2559 			 */
2560 
2561 			mir->mir_type = iocp->ioc_cmd;
2562 			mir->mir_max_msg_sizep = &svc_max_msg_size;
2563 
2564 			/*
2565 			 * Start the idle timer.  See mir_timer() for more
2566 			 * information on how server timers work.
2567 			 *
2568 			 * Note that it is important to start the idle timer
2569 			 * here so that connections time out even if we
2570 			 * never receive any data on them.
2571 			 */
2572 			mir->mir_idle_timeout = svc_idle_timeout;
2573 			RPCLOG(16, "mir_wput_other starting idle timer on 0x%p "
2574 			    "because we got RPC_SERVER ioctl\n", (void *)q);
2575 			mir_svc_idle_start(q, mir);
2576 			mutex_exit(&mir->mir_mutex);
2577 
2578 			mp->b_datap->db_type = M_IOCACK;
2579 			qreply(q, mp);
2580 			return;
2581 		default:
2582 			break;
2583 		}
2584 		break;
2585 
2586 	case M_PROTO:
2587 		if (mir->mir_type == RPC_CLIENT) {
2588 			/*
2589 			 * We are likely being called from the context of a
2590 			 * service procedure. So we need to enqueue. However
2591 			 * enqueing may put our message behind data messages.
2592 			 * So flush the data first.
2593 			 */
2594 			flush_in_svc = TRUE;
2595 		}
2596 		if ((mp->b_wptr - rptr) < sizeof (uint32_t) ||
2597 		    !IS_P2ALIGNED(rptr, sizeof (uint32_t)))
2598 			break;
2599 
2600 		switch (((union T_primitives *)rptr)->type) {
2601 		case T_DATA_REQ:
2602 			/* Don't pass T_DATA_REQ messages downstream. */
2603 			freemsg(mp);
2604 			return;
2605 		case T_ORDREL_REQ:
2606 			RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n",
2607 			    (void *)q);
2608 			mutex_enter(&mir->mir_mutex);
2609 			if (mir->mir_type != RPC_SERVER) {
2610 				/*
2611 				 * We are likely being called from
2612 				 * clnt_dispatch_notifyall(). Sending
2613 				 * a T_ORDREL_REQ will result in
2614 				 * a some kind of _IND message being sent,
2615 				 * will be another call to
2616 				 * clnt_dispatch_notifyall(). To keep the stack
2617 				 * lean, queue this message.
2618 				 */
2619 				mir->mir_inwservice = 1;
2620 				(void) putq(q, mp);
2621 				mutex_exit(&mir->mir_mutex);
2622 				return;
2623 			}
2624 
2625 			/*
2626 			 * Mark the structure such that we don't accept any
2627 			 * more requests from client. We could defer this
2628 			 * until we actually send the orderly release
2629 			 * request downstream, but all that does is delay
2630 			 * the closing of this stream.
2631 			 */
2632 			RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ "
2633 			    " so calling mir_svc_start_close\n", (void *)q);
2634 
2635 			mir_svc_start_close(q, mir);
2636 
2637 			/*
2638 			 * If we have sent down a T_ORDREL_REQ, don't send
2639 			 * any more.
2640 			 */
2641 			if (mir->mir_ordrel_pending) {
2642 				freemsg(mp);
2643 				mutex_exit(&mir->mir_mutex);
2644 				return;
2645 			}
2646 
2647 			/*
2648 			 * If the stream is not idle, then we hold the
2649 			 * orderly release until it becomes idle.  This
2650 			 * ensures that KRPC will be able to reply to
2651 			 * all requests that we have passed to it.
2652 			 *
2653 			 * We also queue the request if there is data already
2654 			 * queued, because we cannot allow the T_ORDREL_REQ
2655 			 * to go before data. When we had a separate reply
2656 			 * count, this was not a problem, because the
2657 			 * reply count was reconciled when mir_wsrv()
2658 			 * completed.
2659 			 */
2660 			if (!MIR_SVC_QUIESCED(mir) ||
2661 			    mir->mir_inwservice == 1) {
2662 				mir->mir_inwservice = 1;
2663 				(void) putq(q, mp);
2664 
2665 				RPCLOG(16, "mir_wput_other: queuing "
2666 				    "T_ORDREL_REQ on 0x%p\n", (void *)q);
2667 
2668 				mutex_exit(&mir->mir_mutex);
2669 				return;
2670 			}
2671 
2672 			/*
2673 			 * Mark the structure so that we know we sent
2674 			 * an orderly release request, and reset the idle timer.
2675 			 */
2676 			mir->mir_ordrel_pending = 1;
2677 
2678 			RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start"
2679 			    " on 0x%p because we got T_ORDREL_REQ\n",
2680 			    (void *)q);
2681 
2682 			mir_svc_idle_start(q, mir);
2683 			mutex_exit(&mir->mir_mutex);
2684 
2685 			/*
2686 			 * When we break, we will putnext the T_ORDREL_REQ.
2687 			 */
2688 			break;
2689 
2690 		case T_CONN_REQ:
2691 			mutex_enter(&mir->mir_mutex);
2692 			if (mir->mir_head_mp != NULL) {
2693 				freemsg(mir->mir_head_mp);
2694 				mir->mir_head_mp = NULL;
2695 				mir->mir_tail_mp = NULL;
2696 			}
2697 			mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2698 			/*
2699 			 * Restart timer in case mir_clnt_idle_do_stop() was
2700 			 * called.
2701 			 */
2702 			mir->mir_idle_timeout = clnt_idle_timeout;
2703 			mir_clnt_idle_stop(q, mir);
2704 			mir_clnt_idle_start(q, mir);
2705 			mutex_exit(&mir->mir_mutex);
2706 			break;
2707 
2708 		default:
2709 			/*
2710 			 * T_DISCON_REQ is one of the interesting default
2711 			 * cases here. Ideally, an M_FLUSH is done before
2712 			 * T_DISCON_REQ is done. However, that is somewhat
2713 			 * cumbersome for clnt_cots.c to do. So we queue
2714 			 * T_DISCON_REQ, and let the service procedure
2715 			 * flush all M_DATA.
2716 			 */
2717 			break;
2718 		}
2719 		/* fallthru */;
2720 	default:
2721 		if (mp->b_datap->db_type >= QPCTL) {
2722 			if (mp->b_datap->db_type == M_FLUSH) {
2723 				if (mir->mir_type == RPC_CLIENT &&
2724 				    *mp->b_rptr & FLUSHW) {
2725 					RPCLOG(32, "mir_wput_other: flushing "
2726 					    "wq 0x%p\n", (void *)q);
2727 					if (*mp->b_rptr & FLUSHBAND) {
2728 						flushband(q, *(mp->b_rptr + 1),
2729 						    FLUSHDATA);
2730 					} else {
2731 						flushq(q, FLUSHDATA);
2732 					}
2733 				} else {
2734 					RPCLOG(32, "mir_wput_other: ignoring "
2735 					    "M_FLUSH on wq 0x%p\n", (void *)q);
2736 				}
2737 			}
2738 			break;
2739 		}
2740 
2741 		mutex_enter(&mir->mir_mutex);
2742 		if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) {
2743 			mutex_exit(&mir->mir_mutex);
2744 			break;
2745 		}
2746 		mir->mir_inwservice = 1;
2747 		mir->mir_inwflushdata = flush_in_svc;
2748 		(void) putq(q, mp);
2749 		mutex_exit(&mir->mir_mutex);
2750 		qenable(q);
2751 
2752 		return;
2753 	}
2754 	putnext(q, mp);
2755 }
2756 
2757 static void
2758 mir_wsrv(queue_t *q)
2759 {
2760 	mblk_t	*mp;
2761 	mir_t	*mir;
2762 	bool_t flushdata;
2763 
2764 	mir = (mir_t *)q->q_ptr;
2765 	mutex_enter(&mir->mir_mutex);
2766 
2767 	flushdata = mir->mir_inwflushdata;
2768 	mir->mir_inwflushdata = 0;
2769 
2770 	while (mp = getq(q)) {
2771 		if (mp->b_datap->db_type == M_DATA) {
2772 			/*
2773 			 * Do not send any more data if we have sent
2774 			 * a T_ORDREL_REQ.
2775 			 */
2776 			if (flushdata || mir->mir_ordrel_pending == 1) {
2777 				freemsg(mp);
2778 				continue;
2779 			}
2780 
2781 			/*
2782 			 * Make sure that the stream can really handle more
2783 			 * data.
2784 			 */
2785 			if (!MIR_WCANPUTNEXT(mir, q)) {
2786 				(void) putbq(q, mp);
2787 				mutex_exit(&mir->mir_mutex);
2788 				return;
2789 			}
2790 
2791 			/*
2792 			 * Now we pass the RPC message downstream.
2793 			 */
2794 			mutex_exit(&mir->mir_mutex);
2795 			putnext(q, mp);
2796 			mutex_enter(&mir->mir_mutex);
2797 			continue;
2798 		}
2799 
2800 		/*
2801 		 * This is not an RPC message, pass it downstream
2802 		 * (ignoring flow control) if the server side is not sending a
2803 		 * T_ORDREL_REQ downstream.
2804 		 */
2805 		if (mir->mir_type != RPC_SERVER ||
2806 		    ((union T_primitives *)mp->b_rptr)->type !=
2807 		    T_ORDREL_REQ) {
2808 			mutex_exit(&mir->mir_mutex);
2809 			putnext(q, mp);
2810 			mutex_enter(&mir->mir_mutex);
2811 			continue;
2812 		}
2813 
2814 		if (mir->mir_ordrel_pending == 1) {
2815 			/*
2816 			 * Don't send two T_ORDRELs
2817 			 */
2818 			freemsg(mp);
2819 			continue;
2820 		}
2821 
2822 		/*
2823 		 * Mark the structure so that we know we sent an orderly
2824 		 * release request.  We will check to see slot is idle at the
2825 		 * end of this routine, and if so, reset the idle timer to
2826 		 * handle orderly release timeouts.
2827 		 */
2828 		mir->mir_ordrel_pending = 1;
2829 		RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n",
2830 		    (void *)q);
2831 		/*
2832 		 * Send the orderly release downstream. If there are other
2833 		 * pending replies we won't be able to send them.  However,
2834 		 * the only reason we should send the orderly release is if
2835 		 * we were idle, or if an unusual event occurred.
2836 		 */
2837 		mutex_exit(&mir->mir_mutex);
2838 		putnext(q, mp);
2839 		mutex_enter(&mir->mir_mutex);
2840 	}
2841 
2842 	if (q->q_first == NULL)
2843 		/*
2844 		 * If we call mir_svc_idle_start() below, then
2845 		 * clearing mir_inwservice here will also result in
2846 		 * any thread waiting in mir_close() to be signaled.
2847 		 */
2848 		mir->mir_inwservice = 0;
2849 
2850 	if (mir->mir_type != RPC_SERVER) {
2851 		mutex_exit(&mir->mir_mutex);
2852 		return;
2853 	}
2854 
2855 	/*
2856 	 * If idle we call mir_svc_idle_start to start the timer (or wakeup
2857 	 * a close). Also make sure not to start the idle timer on the
2858 	 * listener stream. This can cause nfsd to send an orderly release
2859 	 * command on the listener stream.
2860 	 */
2861 	if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) {
2862 		RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p "
2863 		    "because mir slot is idle\n", (void *)q);
2864 		mir_svc_idle_start(q, mir);
2865 	}
2866 
2867 	/*
2868 	 * If outbound flow control has been relieved, then allow new
2869 	 * inbound requests to be processed.
2870 	 */
2871 	if (mir->mir_hold_inbound) {
2872 		mir->mir_hold_inbound = 0;
2873 		qenable(RD(q));
2874 	}
2875 	mutex_exit(&mir->mir_mutex);
2876 }
2877 
2878 static void
2879 mir_disconnect(queue_t *q, mir_t *mir)
2880 {
2881 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2882 
2883 	switch (mir->mir_type) {
2884 	case RPC_CLIENT:
2885 		/*
2886 		 * We are disconnecting, but not necessarily
2887 		 * closing. By not closing, we will fail to
2888 		 * pick up a possibly changed global timeout value,
2889 		 * unless we store it now.
2890 		 */
2891 		mir->mir_idle_timeout = clnt_idle_timeout;
2892 		mir_clnt_idle_start(WR(q), mir);
2893 		mutex_exit(&mir->mir_mutex);
2894 
2895 		/*
2896 		 * T_DISCON_REQ is passed to KRPC as an integer value
2897 		 * (this is not a TPI message).  It is used as a
2898 		 * convenient value to indicate a sanity check
2899 		 * failure -- the same KRPC routine is also called
2900 		 * for T_DISCON_INDs and T_ORDREL_INDs.
2901 		 */
2902 		clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0);
2903 		break;
2904 
2905 	case RPC_SERVER:
2906 		mir->mir_svc_no_more_msgs = 1;
2907 		mir_svc_idle_stop(WR(q), mir);
2908 		mutex_exit(&mir->mir_mutex);
2909 		RPCLOG(16, "mir_disconnect: telling "
2910 		    "stream head listener to disconnect stream "
2911 		    "(0x%p)\n", (void *) q);
2912 		(void) mir_svc_policy_notify(q, 2);
2913 		break;
2914 
2915 	default:
2916 		mutex_exit(&mir->mir_mutex);
2917 		break;
2918 	}
2919 }
2920 
2921 /*
2922  * Sanity check the message length, and if it's too large, shutdown the
2923  * connection.  Returns 1 if the connection is shutdown; 0 otherwise.
2924  */
2925 static int
2926 mir_check_len(queue_t *q, int32_t frag_len, mblk_t *head_mp)
2927 {
2928 	mir_t *mir = q->q_ptr;
2929 	uint_t maxsize = 0;
2930 
2931 	if (mir->mir_max_msg_sizep != NULL)
2932 		maxsize = *mir->mir_max_msg_sizep;
2933 
2934 	if (maxsize == 0 || frag_len <= (int)maxsize)
2935 		return (0);
2936 
2937 	freemsg(head_mp);
2938 	mir->mir_head_mp = NULL;
2939 	mir->mir_tail_mp = NULL;
2940 	mir->mir_frag_header = 0;
2941 	mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2942 	if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) {
2943 		cmn_err(CE_NOTE,
2944 		    "KRPC: record fragment from %s of size(%d) exceeds "
2945 		    "maximum (%u). Disconnecting",
2946 		    (mir->mir_type == RPC_CLIENT) ? "server" :
2947 		    (mir->mir_type == RPC_SERVER) ? "client" :
2948 		    "test tool", frag_len, maxsize);
2949 	}
2950 
2951 	mir_disconnect(q, mir);
2952 	return (1);
2953 }
2954