xref: /illumos-gate/usr/src/uts/common/rpc/rpcmod.c (revision 4d633836fc25186ed4c118b2072d2b88cf87a700)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2012 Milan Jurik. All rights reserved.
27  * Copyright 2012 Marcel Telka <marcel@telka.sk>
28  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
29  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
30  */
31 /* Copyright (c) 1990 Mentat Inc. */
32 
33 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
34 /*	All Rights Reserved	*/
35 
36 /*
37  * Kernel RPC filtering module
38  */
39 
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stream.h>
43 #include <sys/stropts.h>
44 #include <sys/strsubr.h>
45 #include <sys/tihdr.h>
46 #include <sys/timod.h>
47 #include <sys/tiuser.h>
48 #include <sys/debug.h>
49 #include <sys/signal.h>
50 #include <sys/pcb.h>
51 #include <sys/user.h>
52 #include <sys/errno.h>
53 #include <sys/cred.h>
54 #include <sys/policy.h>
55 #include <sys/inline.h>
56 #include <sys/cmn_err.h>
57 #include <sys/kmem.h>
58 #include <sys/file.h>
59 #include <sys/sysmacros.h>
60 #include <sys/systm.h>
61 #include <sys/t_lock.h>
62 #include <sys/ddi.h>
63 #include <sys/vtrace.h>
64 #include <sys/callb.h>
65 #include <sys/strsun.h>
66 
67 #include <sys/strlog.h>
68 #include <rpc/rpc_com.h>
69 #include <inet/common.h>
70 #include <rpc/types.h>
71 #include <sys/time.h>
72 #include <rpc/xdr.h>
73 #include <rpc/auth.h>
74 #include <rpc/clnt.h>
75 #include <rpc/rpc_msg.h>
76 #include <rpc/clnt.h>
77 #include <rpc/svc.h>
78 #include <rpc/rpcsys.h>
79 #include <rpc/rpc_rdma.h>
80 
81 /*
82  * This is the loadable module wrapper.
83  */
84 #include <sys/conf.h>
85 #include <sys/modctl.h>
86 #include <sys/syscall.h>
87 
88 extern struct streamtab rpcinfo;
89 
90 static struct fmodsw fsw = {
91 	"rpcmod",
92 	&rpcinfo,
93 	D_NEW|D_MP,
94 };
95 
96 /*
97  * Module linkage information for the kernel.
98  */
99 
100 static struct modlstrmod modlstrmod = {
101 	&mod_strmodops, "rpc interface str mod", &fsw
102 };
103 
104 /*
105  * For the RPC system call.
106  */
107 static struct sysent rpcsysent = {
108 	2,
109 	SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD,
110 	rpcsys
111 };
112 
113 static struct modlsys modlsys = {
114 	&mod_syscallops,
115 	"RPC syscall",
116 	&rpcsysent
117 };
118 
119 #ifdef _SYSCALL32_IMPL
120 static struct modlsys modlsys32 = {
121 	&mod_syscallops32,
122 	"32-bit RPC syscall",
123 	&rpcsysent
124 };
125 #endif /* _SYSCALL32_IMPL */
126 
127 static struct modlinkage modlinkage = {
128 	MODREV_1,
129 	{
130 		&modlsys,
131 #ifdef _SYSCALL32_IMPL
132 		&modlsys32,
133 #endif
134 		&modlstrmod,
135 		NULL
136 	}
137 };
138 
139 int
_init(void)140 _init(void)
141 {
142 	int error = 0;
143 	callb_id_t cid;
144 	int status;
145 
146 	svc_init();
147 	clnt_init();
148 	cid = callb_add(connmgr_cpr_reset, 0, CB_CL_CPR_RPC, "rpc");
149 
150 	if (error = mod_install(&modlinkage)) {
151 		/*
152 		 * Could not install module, cleanup previous
153 		 * initialization work.
154 		 */
155 		clnt_fini();
156 		if (cid != NULL)
157 			(void) callb_delete(cid);
158 
159 		return (error);
160 	}
161 
162 	/*
163 	 * Load up the RDMA plugins and initialize the stats. Even if the
164 	 * plugins loadup fails, but rpcmod was successfully installed the
165 	 * counters still get initialized.
166 	 */
167 	rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL);
168 	mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL);
169 
170 	cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL);
171 	mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL);
172 
173 	mt_kstat_init();
174 
175 	/*
176 	 * Get our identification into ldi.  This is used for loading
177 	 * other modules, e.g. rpcib.
178 	 */
179 	status = ldi_ident_from_mod(&modlinkage, &rpcmod_li);
180 	if (status != 0) {
181 		cmn_err(CE_WARN, "ldi_ident_from_mod fails with %d", status);
182 		rpcmod_li = NULL;
183 	}
184 
185 	return (error);
186 }
187 
188 /*
189  * The unload entry point fails, because we advertise entry points into
190  * rpcmod from the rest of kRPC: rpcmod_release().
191  */
192 int
_fini(void)193 _fini(void)
194 {
195 	return (EBUSY);
196 }
197 
198 int
_info(struct modinfo * modinfop)199 _info(struct modinfo *modinfop)
200 {
201 	return (mod_info(&modlinkage, modinfop));
202 }
203 
204 extern int nulldev();
205 
206 #define	RPCMOD_ID	2049
207 
208 int rmm_open(queue_t *, dev_t *, int, int, cred_t *);
209 int rmm_close(queue_t *, int, cred_t *);
210 
211 /*
212  * To save instructions, since STREAMS ignores the return value
213  * from these functions, they are defined as void here. Kind of icky, but...
214  */
215 int rmm_rput(queue_t *, mblk_t *);
216 int rmm_wput(queue_t *, mblk_t *);
217 int rmm_rsrv(queue_t *);
218 int rmm_wsrv(queue_t *);
219 
220 int rpcmodopen(queue_t *, dev_t *, int, int, cred_t *);
221 int rpcmodclose(queue_t *, int, cred_t *);
222 void rpcmodrput(queue_t *, mblk_t *);
223 void rpcmodwput(queue_t *, mblk_t *);
224 void rpcmodrsrv();
225 void rpcmodwsrv(queue_t *);
226 
227 static	void	rpcmodwput_other(queue_t *, mblk_t *);
228 static	int	mir_close(queue_t *q);
229 static	int	mir_open(queue_t *q, dev_t *devp, int flag, int sflag,
230 		    cred_t *credp);
231 static	void	mir_rput(queue_t *q, mblk_t *mp);
232 static	void	mir_rsrv(queue_t *q);
233 static	void	mir_wput(queue_t *q, mblk_t *mp);
234 static	void	mir_wsrv(queue_t *q);
235 
236 static struct module_info rpcmod_info =
237 	{RPCMOD_ID, "rpcmod", 0, INFPSZ, 256*1024, 1024};
238 
239 static struct qinit rpcmodrinit = {
240 	rmm_rput,
241 	rmm_rsrv,
242 	rmm_open,
243 	rmm_close,
244 	nulldev,
245 	&rpcmod_info,
246 	NULL
247 };
248 
249 /*
250  * The write put procedure is simply putnext to conserve stack space.
251  * The write service procedure is not used to queue data, but instead to
252  * synchronize with flow control.
253  */
254 static struct qinit rpcmodwinit = {
255 	rmm_wput,
256 	rmm_wsrv,
257 	rmm_open,
258 	rmm_close,
259 	nulldev,
260 	&rpcmod_info,
261 	NULL
262 };
263 struct streamtab rpcinfo = { &rpcmodrinit, &rpcmodwinit, NULL, NULL };
264 
265 struct xprt_style_ops {
266 	int (*xo_open)();
267 	int (*xo_close)();
268 	void (*xo_wput)();
269 	void (*xo_wsrv)();
270 	void (*xo_rput)();
271 	void (*xo_rsrv)();
272 };
273 
274 /*
275  * Read side has no service procedure.
276  */
277 static struct xprt_style_ops xprt_clts_ops = {
278 	rpcmodopen,
279 	rpcmodclose,
280 	rpcmodwput,
281 	rpcmodwsrv,
282 	rpcmodrput,
283 	NULL
284 };
285 
286 static struct xprt_style_ops xprt_cots_ops = {
287 	mir_open,
288 	mir_close,
289 	mir_wput,
290 	mir_wsrv,
291 	mir_rput,
292 	mir_rsrv
293 };
294 
295 /*
296  * Per rpcmod "slot" data structure. q->q_ptr points to one of these.
297  */
298 struct rpcm {
299 	void		*rm_krpc_cell;	/* Reserved for use by kRPC */
300 	struct		xprt_style_ops	*rm_ops;
301 	int		rm_type;	/* Client or server side stream */
302 #define	RM_CLOSING	0x1		/* somebody is trying to close slot */
303 	uint_t		rm_state;	/* state of the slot. see above */
304 	uint_t		rm_ref;		/* cnt of external references to slot */
305 	kmutex_t	rm_lock;	/* mutex protecting above fields */
306 	kcondvar_t	rm_cwait;	/* condition for closing */
307 	zoneid_t	rm_zoneid;	/* zone which pushed rpcmod */
308 };
309 
310 struct temp_slot {
311 	void *cell;
312 	struct xprt_style_ops *ops;
313 	int type;
314 	mblk_t *info_ack;
315 	kmutex_t lock;
316 	kcondvar_t wait;
317 };
318 
319 typedef struct mir_s {
320 	void	*mir_krpc_cell;	/* Reserved for kRPC use. This field */
321 					/* must be first in the structure. */
322 	struct xprt_style_ops	*rm_ops;
323 	int	mir_type;		/* Client or server side stream */
324 
325 	mblk_t	*mir_head_mp;		/* RPC msg in progress */
326 		/*
327 		 * mir_head_mp points the first mblk being collected in
328 		 * the current RPC message.  Record headers are removed
329 		 * before data is linked into mir_head_mp.
330 		 */
331 	mblk_t	*mir_tail_mp;		/* Last mblk in mir_head_mp */
332 		/*
333 		 * mir_tail_mp points to the last mblk in the message
334 		 * chain starting at mir_head_mp.  It is only valid
335 		 * if mir_head_mp is non-NULL and is used to add new
336 		 * data blocks to the end of chain quickly.
337 		 */
338 
339 	int32_t	mir_frag_len;		/* Bytes seen in the current frag */
340 		/*
341 		 * mir_frag_len starts at -4 for beginning of each fragment.
342 		 * When this length is negative, it indicates the number of
343 		 * bytes that rpcmod needs to complete the record marker
344 		 * header.  When it is positive or zero, it holds the number
345 		 * of bytes that have arrived for the current fragment and
346 		 * are held in mir_header_mp.
347 		 */
348 
349 	int32_t	mir_frag_header;
350 		/*
351 		 * Fragment header as collected for the current fragment.
352 		 * It holds the last-fragment indicator and the number
353 		 * of bytes in the fragment.
354 		 */
355 
356 	unsigned int
357 		mir_ordrel_pending : 1,	/* Sent T_ORDREL_REQ */
358 		mir_hold_inbound : 1,	/* Hold inbound messages on server */
359 					/* side until outbound flow control */
360 					/* is relieved. */
361 		mir_closing : 1,	/* The stream is being closed */
362 		mir_inrservice : 1,	/* data queued or rd srv proc running */
363 		mir_inwservice : 1,	/* data queued or wr srv proc running */
364 		mir_inwflushdata : 1,	/* flush M_DATAs when srv runs */
365 		/*
366 		 * On client streams, mir_clntreq is 0 or 1; it is set
367 		 * to 1 whenever a new request is sent out (mir_wput)
368 		 * and cleared when the timer fires (mir_timer).  If
369 		 * the timer fires with this value equal to 0, then the
370 		 * stream is considered idle and kRPC is notified.
371 		 */
372 		mir_clntreq : 1,
373 		/*
374 		 * On server streams, stop accepting messages
375 		 */
376 		mir_svc_no_more_msgs : 1,
377 		mir_listen_stream : 1,	/* listen end point */
378 		mir_unused : 1,	/* no longer used */
379 		mir_timer_call : 1,
380 		mir_junk_fill_thru_bit_31 : 21;
381 
382 	int	mir_setup_complete;	/* server has initialized everything */
383 	timeout_id_t mir_timer_id;	/* Timer for idle checks */
384 	clock_t	mir_idle_timeout;	/* Allowed idle time before shutdown */
385 		/*
386 		 * This value is copied from clnt_idle_timeout or
387 		 * svc_idle_timeout during the appropriate ioctl.
388 		 * Kept in milliseconds
389 		 */
390 	clock_t	mir_use_timestamp;	/* updated on client with each use */
391 		/*
392 		 * This value is set to lbolt
393 		 * every time a client stream sends or receives data.
394 		 * Even if the timer message arrives, we don't shutdown
395 		 * client unless:
396 		 *    lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp.
397 		 * This value is kept in HZ.
398 		 */
399 
400 	uint_t	*mir_max_msg_sizep;	/* Reference to sanity check size */
401 		/*
402 		 * This pointer is set to &clnt_max_msg_size or
403 		 * &svc_max_msg_size during the appropriate ioctl.
404 		 */
405 	zoneid_t mir_zoneid;	/* zone which pushed rpcmod */
406 	/* Server-side fields. */
407 	int	mir_ref_cnt;		/* Reference count: server side only */
408 					/* counts the number of references */
409 					/* that a kernel RPC server thread */
410 					/* (see svc_run()) has on this rpcmod */
411 					/* slot. Effectively, it is the */
412 					/* number of unprocessed messages */
413 					/* that have been passed up to the */
414 					/* kRPC layer */
415 
416 	mblk_t	*mir_svc_pend_mp;	/* Pending T_ORDREL_IND or */
417 					/* T_DISCON_IND */
418 
419 	/*
420 	 * these fields are for both client and server, but for debugging,
421 	 * it is easier to have these last in the structure.
422 	 */
423 	kmutex_t	mir_mutex;	/* Mutex and condvar for close */
424 	kcondvar_t	mir_condvar;	/* synchronization. */
425 	kcondvar_t	mir_timer_cv;	/* Timer routine sync. */
426 } mir_t;
427 
428 void tmp_rput(queue_t *q, mblk_t *mp);
429 
430 struct xprt_style_ops tmpops = {
431 	NULL,
432 	NULL,
433 	putnext,
434 	NULL,
435 	tmp_rput,
436 	NULL
437 };
438 
439 void
tmp_rput(queue_t * q,mblk_t * mp)440 tmp_rput(queue_t *q, mblk_t *mp)
441 {
442 	struct temp_slot *t = (struct temp_slot *)(q->q_ptr);
443 	struct T_info_ack *pptr;
444 
445 	switch (mp->b_datap->db_type) {
446 	case M_PCPROTO:
447 		pptr = (struct T_info_ack *)mp->b_rptr;
448 		switch (pptr->PRIM_type) {
449 		case T_INFO_ACK:
450 			mutex_enter(&t->lock);
451 			t->info_ack = mp;
452 			cv_signal(&t->wait);
453 			mutex_exit(&t->lock);
454 			return;
455 		default:
456 			break;
457 		}
458 	default:
459 		break;
460 	}
461 
462 	/*
463 	 * Not an info-ack, so free it. This is ok because we should
464 	 * not be receiving data until the open finishes: rpcmod
465 	 * is pushed well before the end-point is bound to an address.
466 	 */
467 	freemsg(mp);
468 }
469 
470 int
rmm_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * crp)471 rmm_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
472 {
473 	mblk_t *bp;
474 	struct temp_slot ts, *t;
475 	struct T_info_ack *pptr;
476 	int error = 0;
477 
478 	ASSERT(q != NULL);
479 	/*
480 	 * Check for re-opens.
481 	 */
482 	if (q->q_ptr) {
483 		TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END,
484 		    "rpcmodopen_end:(%s)", "q->qptr");
485 		return (0);
486 	}
487 
488 	t = &ts;
489 	bzero(t, sizeof (*t));
490 	q->q_ptr = (void *)t;
491 	WR(q)->q_ptr = (void *)t;
492 
493 	/*
494 	 * Allocate the required messages upfront.
495 	 */
496 	if ((bp = allocb_cred(sizeof (struct T_info_req) +
497 	    sizeof (struct T_info_ack), crp, curproc->p_pid)) == NULL) {
498 		return (ENOBUFS);
499 	}
500 
501 	mutex_init(&t->lock, NULL, MUTEX_DEFAULT, NULL);
502 	cv_init(&t->wait, NULL, CV_DEFAULT, NULL);
503 
504 	t->ops = &tmpops;
505 
506 	qprocson(q);
507 	bp->b_datap->db_type = M_PCPROTO;
508 	*(int32_t *)bp->b_wptr = (int32_t)T_INFO_REQ;
509 	bp->b_wptr += sizeof (struct T_info_req);
510 	putnext(WR(q), bp);
511 
512 	mutex_enter(&t->lock);
513 	while (t->info_ack == NULL) {
514 		if (cv_wait_sig(&t->wait, &t->lock) == 0) {
515 			error = EINTR;
516 			break;
517 		}
518 	}
519 	mutex_exit(&t->lock);
520 
521 	if (error)
522 		goto out;
523 
524 	pptr = (struct T_info_ack *)t->info_ack->b_rptr;
525 
526 	if (pptr->SERV_type == T_CLTS) {
527 		if ((error = rpcmodopen(q, devp, flag, sflag, crp)) == 0)
528 			((struct rpcm *)q->q_ptr)->rm_ops = &xprt_clts_ops;
529 	} else {
530 		if ((error = mir_open(q, devp, flag, sflag, crp)) == 0)
531 			((mir_t *)q->q_ptr)->rm_ops = &xprt_cots_ops;
532 	}
533 
534 out:
535 	if (error)
536 		qprocsoff(q);
537 
538 	freemsg(t->info_ack);
539 	mutex_destroy(&t->lock);
540 	cv_destroy(&t->wait);
541 
542 	return (error);
543 }
544 
545 int
rmm_rput(queue_t * q,mblk_t * mp)546 rmm_rput(queue_t *q, mblk_t  *mp)
547 {
548 	(*((struct temp_slot *)q->q_ptr)->ops->xo_rput)(q, mp);
549 	return (0);
550 }
551 
552 int
rmm_rsrv(queue_t * q)553 rmm_rsrv(queue_t *q)
554 {
555 	(*((struct temp_slot *)q->q_ptr)->ops->xo_rsrv)(q);
556 	return (0);
557 }
558 
559 int
rmm_wput(queue_t * q,mblk_t * mp)560 rmm_wput(queue_t *q, mblk_t *mp)
561 {
562 	(*((struct temp_slot *)q->q_ptr)->ops->xo_wput)(q, mp);
563 	return (0);
564 }
565 
566 int
rmm_wsrv(queue_t * q)567 rmm_wsrv(queue_t *q)
568 {
569 	(*((struct temp_slot *)q->q_ptr)->ops->xo_wsrv)(q);
570 	return (0);
571 }
572 
573 int
rmm_close(queue_t * q,int flag,cred_t * crp)574 rmm_close(queue_t *q, int flag, cred_t *crp)
575 {
576 	return ((*((struct temp_slot *)q->q_ptr)->ops->xo_close)(q, flag, crp));
577 }
578 
579 /*
580  * rpcmodopen -	open routine gets called when the module gets pushed
581  *		onto the stream.
582  */
583 /*ARGSUSED*/
584 int
rpcmodopen(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * crp)585 rpcmodopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *crp)
586 {
587 	struct rpcm *rmp;
588 
589 	TRACE_0(TR_FAC_KRPC, TR_RPCMODOPEN_START, "rpcmodopen_start:");
590 
591 	/*
592 	 * Only sufficiently privileged users can use this module, and it
593 	 * is assumed that they will use this module properly, and NOT send
594 	 * bulk data from downstream.
595 	 */
596 	if (secpolicy_rpcmod_open(crp) != 0)
597 		return (EPERM);
598 
599 	/*
600 	 * Allocate slot data structure.
601 	 */
602 	rmp = kmem_zalloc(sizeof (*rmp), KM_SLEEP);
603 
604 	mutex_init(&rmp->rm_lock, NULL, MUTEX_DEFAULT, NULL);
605 	cv_init(&rmp->rm_cwait, NULL, CV_DEFAULT, NULL);
606 	rmp->rm_zoneid = rpc_zoneid();
607 	/*
608 	 * slot type will be set by kRPC client and server ioctl's
609 	 */
610 	rmp->rm_type = 0;
611 
612 	q->q_ptr = (void *)rmp;
613 	WR(q)->q_ptr = (void *)rmp;
614 
615 	TRACE_1(TR_FAC_KRPC, TR_RPCMODOPEN_END, "rpcmodopen_end:(%s)", "end");
616 	return (0);
617 }
618 
619 /*
620  * rpcmodclose - This routine gets called when the module gets popped
621  * off of the stream.
622  */
623 /*ARGSUSED*/
624 int
rpcmodclose(queue_t * q,int flag,cred_t * crp)625 rpcmodclose(queue_t *q, int flag, cred_t *crp)
626 {
627 	struct rpcm *rmp;
628 
629 	ASSERT(q != NULL);
630 	rmp = (struct rpcm *)q->q_ptr;
631 
632 	/*
633 	 * Mark our state as closing.
634 	 */
635 	mutex_enter(&rmp->rm_lock);
636 	rmp->rm_state |= RM_CLOSING;
637 
638 	/*
639 	 * Check and see if there are any messages on the queue.  If so, send
640 	 * the messages, regardless whether the downstream module is ready to
641 	 * accept data.
642 	 */
643 	if (rmp->rm_type == RPC_SERVER) {
644 		flushq(q, FLUSHDATA);
645 
646 		qenable(WR(q));
647 
648 		if (rmp->rm_ref) {
649 			mutex_exit(&rmp->rm_lock);
650 			/*
651 			 * call into SVC to clean the queue
652 			 */
653 			svc_queueclean(q);
654 			mutex_enter(&rmp->rm_lock);
655 
656 			/*
657 			 * Block while there are kRPC threads with a reference
658 			 * to this message.
659 			 */
660 			while (rmp->rm_ref)
661 				cv_wait(&rmp->rm_cwait, &rmp->rm_lock);
662 		}
663 
664 		mutex_exit(&rmp->rm_lock);
665 
666 		/*
667 		 * It is now safe to remove this queue from the stream. No kRPC
668 		 * threads have a reference to the stream, and none ever will,
669 		 * because RM_CLOSING is set.
670 		 */
671 		qprocsoff(q);
672 
673 		/* Notify kRPC that this stream is going away. */
674 		svc_queueclose(q);
675 	} else {
676 		mutex_exit(&rmp->rm_lock);
677 		qprocsoff(q);
678 	}
679 
680 	q->q_ptr = NULL;
681 	WR(q)->q_ptr = NULL;
682 	mutex_destroy(&rmp->rm_lock);
683 	cv_destroy(&rmp->rm_cwait);
684 	kmem_free(rmp, sizeof (*rmp));
685 	return (0);
686 }
687 
688 /*
689  * rpcmodrput -	Module read put procedure.  This is called from
690  *		the module, driver, or stream head downstream.
691  */
692 void
rpcmodrput(queue_t * q,mblk_t * mp)693 rpcmodrput(queue_t *q, mblk_t *mp)
694 {
695 	struct rpcm *rmp;
696 	union T_primitives *pptr;
697 	int hdrsz;
698 
699 	TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_START, "rpcmodrput_start:");
700 
701 	ASSERT(q != NULL);
702 	rmp = (struct rpcm *)q->q_ptr;
703 
704 	if (rmp->rm_type == 0) {
705 		freemsg(mp);
706 		return;
707 	}
708 
709 	switch (mp->b_datap->db_type) {
710 	default:
711 		putnext(q, mp);
712 		break;
713 
714 	case M_PROTO:
715 	case M_PCPROTO:
716 		ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (int32_t));
717 		pptr = (union T_primitives *)mp->b_rptr;
718 
719 		/*
720 		 * Forward this message to kRPC if it is data.
721 		 */
722 		if (pptr->type == T_UNITDATA_IND) {
723 			/*
724 			 * Check if the module is being popped.
725 			 */
726 			mutex_enter(&rmp->rm_lock);
727 			if (rmp->rm_state & RM_CLOSING) {
728 				mutex_exit(&rmp->rm_lock);
729 				putnext(q, mp);
730 				break;
731 			}
732 
733 			switch (rmp->rm_type) {
734 			case RPC_CLIENT:
735 				mutex_exit(&rmp->rm_lock);
736 				hdrsz = mp->b_wptr - mp->b_rptr;
737 
738 				/*
739 				 * Make sure the header is sane.
740 				 */
741 				if (hdrsz < TUNITDATAINDSZ ||
742 				    hdrsz < (pptr->unitdata_ind.OPT_length +
743 				    pptr->unitdata_ind.OPT_offset) ||
744 				    hdrsz < (pptr->unitdata_ind.SRC_length +
745 				    pptr->unitdata_ind.SRC_offset)) {
746 					freemsg(mp);
747 					return;
748 				}
749 
750 				/*
751 				 * Call clnt_clts_dispatch_notify, so that it
752 				 * can pass the message to the proper caller.
753 				 * Don't discard the header just yet since the
754 				 * client may need the sender's address.
755 				 */
756 				clnt_clts_dispatch_notify(mp, hdrsz,
757 				    rmp->rm_zoneid);
758 				return;
759 			case RPC_SERVER:
760 				/*
761 				 * rm_krpc_cell is exclusively used by the kRPC
762 				 * CLTS server. Try to submit the message to
763 				 * kRPC. Since this is an unreliable channel, we
764 				 * can just free the message in case the kRPC
765 				 * does not accept new messages.
766 				 */
767 				if (rmp->rm_krpc_cell &&
768 				    svc_queuereq(q, mp, TRUE)) {
769 					/*
770 					 * Raise the reference count on this
771 					 * module to prevent it from being
772 					 * popped before kRPC generates the
773 					 * reply.
774 					 */
775 					rmp->rm_ref++;
776 					mutex_exit(&rmp->rm_lock);
777 				} else {
778 					mutex_exit(&rmp->rm_lock);
779 					freemsg(mp);
780 				}
781 				return;
782 			default:
783 				mutex_exit(&rmp->rm_lock);
784 				freemsg(mp);
785 				return;
786 			} /* end switch(rmp->rm_type) */
787 		} else if (pptr->type == T_UDERROR_IND) {
788 			mutex_enter(&rmp->rm_lock);
789 			hdrsz = mp->b_wptr - mp->b_rptr;
790 
791 			/*
792 			 * Make sure the header is sane
793 			 */
794 			if (hdrsz < TUDERRORINDSZ ||
795 			    hdrsz < (pptr->uderror_ind.OPT_length +
796 			    pptr->uderror_ind.OPT_offset) ||
797 			    hdrsz < (pptr->uderror_ind.DEST_length +
798 			    pptr->uderror_ind.DEST_offset)) {
799 				mutex_exit(&rmp->rm_lock);
800 				freemsg(mp);
801 				return;
802 			}
803 
804 			/*
805 			 * In the case where a unit data error has been
806 			 * received, all we need to do is clear the message from
807 			 * the queue.
808 			 */
809 			mutex_exit(&rmp->rm_lock);
810 			freemsg(mp);
811 			RPCLOG(32, "rpcmodrput: unitdata error received at "
812 			    "%ld\n", gethrestime_sec());
813 			return;
814 		} /* end else if (pptr->type == T_UDERROR_IND) */
815 
816 		putnext(q, mp);
817 		break;
818 	} /* end switch (mp->b_datap->db_type) */
819 
820 	TRACE_0(TR_FAC_KRPC, TR_RPCMODRPUT_END,
821 	    "rpcmodrput_end:");
822 	/*
823 	 * Return codes are not looked at by the STREAMS framework.
824 	 */
825 }
826 
827 /*
828  * write put procedure
829  */
830 void
rpcmodwput(queue_t * q,mblk_t * mp)831 rpcmodwput(queue_t *q, mblk_t *mp)
832 {
833 	struct rpcm	*rmp;
834 
835 	ASSERT(q != NULL);
836 
837 	switch (mp->b_datap->db_type) {
838 		case M_PROTO:
839 		case M_PCPROTO:
840 			break;
841 		default:
842 			rpcmodwput_other(q, mp);
843 			return;
844 	}
845 
846 	/*
847 	 * Check to see if we can send the message downstream.
848 	 */
849 	if (canputnext(q)) {
850 		putnext(q, mp);
851 		return;
852 	}
853 
854 	rmp = (struct rpcm *)q->q_ptr;
855 	ASSERT(rmp != NULL);
856 
857 	/*
858 	 * The first canputnext failed.  Try again except this time with the
859 	 * lock held, so that we can check the state of the stream to see if
860 	 * it is closing.  If either of these conditions evaluate to true
861 	 * then send the meesage.
862 	 */
863 	mutex_enter(&rmp->rm_lock);
864 	if (canputnext(q) || (rmp->rm_state & RM_CLOSING)) {
865 		mutex_exit(&rmp->rm_lock);
866 		putnext(q, mp);
867 	} else {
868 		/*
869 		 * canputnext failed again and the stream is not closing.
870 		 * Place the message on the queue and let the service
871 		 * procedure handle the message.
872 		 */
873 		mutex_exit(&rmp->rm_lock);
874 		(void) putq(q, mp);
875 	}
876 }
877 
878 static void
rpcmodwput_other(queue_t * q,mblk_t * mp)879 rpcmodwput_other(queue_t *q, mblk_t *mp)
880 {
881 	struct rpcm	*rmp;
882 	struct iocblk	*iocp;
883 
884 	rmp = (struct rpcm *)q->q_ptr;
885 	ASSERT(rmp != NULL);
886 
887 	switch (mp->b_datap->db_type) {
888 		case M_IOCTL:
889 			iocp = (struct iocblk *)mp->b_rptr;
890 			ASSERT(iocp != NULL);
891 			switch (iocp->ioc_cmd) {
892 				case RPC_CLIENT:
893 				case RPC_SERVER:
894 					mutex_enter(&rmp->rm_lock);
895 					rmp->rm_type = iocp->ioc_cmd;
896 					mutex_exit(&rmp->rm_lock);
897 					mp->b_datap->db_type = M_IOCACK;
898 					qreply(q, mp);
899 					return;
900 				default:
901 				/*
902 				 * pass the ioctl downstream and hope someone
903 				 * down there knows how to handle it.
904 				 */
905 					putnext(q, mp);
906 					return;
907 			}
908 		default:
909 			break;
910 	}
911 	/*
912 	 * This is something we definitely do not know how to handle, just
913 	 * pass the message downstream
914 	 */
915 	putnext(q, mp);
916 }
917 
918 /*
919  * Module write service procedure. This is called by downstream modules
920  * for back enabling during flow control.
921  */
922 void
rpcmodwsrv(queue_t * q)923 rpcmodwsrv(queue_t *q)
924 {
925 	struct rpcm	*rmp;
926 	mblk_t		*mp = NULL;
927 
928 	rmp = (struct rpcm *)q->q_ptr;
929 	ASSERT(rmp != NULL);
930 
931 	/*
932 	 * Get messages that may be queued and send them down stream
933 	 */
934 	while ((mp = getq(q)) != NULL) {
935 		/*
936 		 * Optimize the service procedure for the server-side, by
937 		 * avoiding a call to canputnext().
938 		 */
939 		if (rmp->rm_type == RPC_SERVER || canputnext(q)) {
940 			putnext(q, mp);
941 			continue;
942 		}
943 		(void) putbq(q, mp);
944 		return;
945 	}
946 }
947 
948 void
rpcmod_hold(queue_t * q)949 rpcmod_hold(queue_t *q)
950 {
951 	struct rpcm *rmp = (struct rpcm *)q->q_ptr;
952 
953 	mutex_enter(&rmp->rm_lock);
954 	rmp->rm_ref++;
955 	mutex_exit(&rmp->rm_lock);
956 }
957 
958 void
rpcmod_release(queue_t * q,mblk_t * bp,bool_t enable __unused)959 rpcmod_release(queue_t *q, mblk_t *bp, bool_t enable __unused)
960 {
961 	struct rpcm *rmp;
962 
963 	/*
964 	 * For now, just free the message.
965 	 */
966 	if (bp)
967 		freemsg(bp);
968 	rmp = (struct rpcm *)q->q_ptr;
969 
970 	mutex_enter(&rmp->rm_lock);
971 	rmp->rm_ref--;
972 
973 	if (rmp->rm_ref == 0 && (rmp->rm_state & RM_CLOSING)) {
974 		cv_broadcast(&rmp->rm_cwait);
975 	}
976 
977 	mutex_exit(&rmp->rm_lock);
978 }
979 
980 /*
981  * This part of rpcmod is pushed on a connection-oriented transport for use
982  * by RPC.  It serves to bypass the Stream head, implements
983  * the record marking protocol, and dispatches incoming RPC messages.
984  */
985 
986 /* Default idle timer values */
987 #define	MIR_CLNT_IDLE_TIMEOUT	(5 * (60 * 1000L))	/* 5 minutes */
988 #define	MIR_SVC_IDLE_TIMEOUT	(6 * (60 * 1000L))	/* 6 minutes */
989 #define	MIR_SVC_ORDREL_TIMEOUT	(10 * (60 * 1000L))	/* 10 minutes */
990 #define	MIR_LASTFRAG	0x80000000	/* Record marker */
991 
992 #define	MIR_SVC_QUIESCED(mir)	\
993 	(mir->mir_ref_cnt == 0 && mir->mir_inrservice == 0)
994 
995 #define	MIR_CLEAR_INRSRV(mir_ptr)	{	\
996 	(mir_ptr)->mir_inrservice = 0;	\
997 	if ((mir_ptr)->mir_type == RPC_SERVER &&	\
998 		(mir_ptr)->mir_closing)	\
999 		cv_signal(&(mir_ptr)->mir_condvar);	\
1000 }
1001 
1002 /*
1003  * Don't block service procedure (and mir_close) if
1004  * we are in the process of closing.
1005  */
1006 #define	MIR_WCANPUTNEXT(mir_ptr, write_q)	\
1007 	(canputnext(write_q) || ((mir_ptr)->mir_svc_no_more_msgs == 1))
1008 
1009 static int	mir_clnt_dup_request(queue_t *q, mblk_t *mp);
1010 static void	mir_rput_proto(queue_t *q, mblk_t *mp);
1011 static int	mir_svc_policy_notify(queue_t *q, int event);
1012 static void	mir_svc_start(queue_t *wq);
1013 static void	mir_svc_idle_start(queue_t *, mir_t *);
1014 static void	mir_svc_idle_stop(queue_t *, mir_t *);
1015 static void	mir_svc_start_close(queue_t *, mir_t *);
1016 static void	mir_clnt_idle_do_stop(queue_t *);
1017 static void	mir_clnt_idle_stop(queue_t *, mir_t *);
1018 static void	mir_clnt_idle_start(queue_t *, mir_t *);
1019 static void	mir_wput(queue_t *q, mblk_t *mp);
1020 static void	mir_wput_other(queue_t *q, mblk_t *mp);
1021 static void	mir_wsrv(queue_t *q);
1022 static	void	mir_disconnect(queue_t *, mir_t *ir);
1023 static	int	mir_check_len(queue_t *, mblk_t *);
1024 static	void	mir_timer(void *);
1025 
1026 extern void	(*mir_start)(queue_t *);
1027 extern void	(*clnt_stop_idle)(queue_t *);
1028 
1029 clock_t	clnt_idle_timeout = MIR_CLNT_IDLE_TIMEOUT;
1030 clock_t	svc_idle_timeout = MIR_SVC_IDLE_TIMEOUT;
1031 
1032 /*
1033  * Timeout for subsequent notifications of idle connection.  This is
1034  * typically used to clean up after a wedged orderly release.
1035  */
1036 clock_t	svc_ordrel_timeout = MIR_SVC_ORDREL_TIMEOUT; /* milliseconds */
1037 
1038 extern	uint_t	*clnt_max_msg_sizep;
1039 extern	uint_t	*svc_max_msg_sizep;
1040 uint_t	clnt_max_msg_size = RPC_MAXDATASIZE;
1041 uint_t	svc_max_msg_size = RPC_MAXDATASIZE;
1042 uint_t	mir_krpc_cell_null;
1043 
1044 static void
mir_timer_stop(mir_t * mir)1045 mir_timer_stop(mir_t *mir)
1046 {
1047 	timeout_id_t tid;
1048 
1049 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1050 
1051 	/*
1052 	 * Since the mir_mutex lock needs to be released to call
1053 	 * untimeout(), we need to make sure that no other thread
1054 	 * can start/stop the timer (changing mir_timer_id) during
1055 	 * that time.  The mir_timer_call bit and the mir_timer_cv
1056 	 * condition variable are used to synchronize this.  Setting
1057 	 * mir_timer_call also tells mir_timer() (refer to the comments
1058 	 * in mir_timer()) that it does not need to do anything.
1059 	 */
1060 	while (mir->mir_timer_call)
1061 		cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1062 	mir->mir_timer_call = B_TRUE;
1063 
1064 	if ((tid = mir->mir_timer_id) != 0) {
1065 		mir->mir_timer_id = 0;
1066 		mutex_exit(&mir->mir_mutex);
1067 		(void) untimeout(tid);
1068 		mutex_enter(&mir->mir_mutex);
1069 	}
1070 	mir->mir_timer_call = B_FALSE;
1071 	cv_broadcast(&mir->mir_timer_cv);
1072 }
1073 
1074 static void
mir_timer_start(queue_t * q,mir_t * mir,clock_t intrvl)1075 mir_timer_start(queue_t *q, mir_t *mir, clock_t intrvl)
1076 {
1077 	timeout_id_t tid;
1078 
1079 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1080 
1081 	while (mir->mir_timer_call)
1082 		cv_wait(&mir->mir_timer_cv, &mir->mir_mutex);
1083 	mir->mir_timer_call = B_TRUE;
1084 
1085 	if ((tid = mir->mir_timer_id) != 0) {
1086 		mutex_exit(&mir->mir_mutex);
1087 		(void) untimeout(tid);
1088 		mutex_enter(&mir->mir_mutex);
1089 	}
1090 	/* Only start the timer when it is not closing. */
1091 	if (!mir->mir_closing) {
1092 		mir->mir_timer_id = timeout(mir_timer, q,
1093 		    MSEC_TO_TICK(intrvl));
1094 	}
1095 	mir->mir_timer_call = B_FALSE;
1096 	cv_broadcast(&mir->mir_timer_cv);
1097 }
1098 
1099 static int
mir_clnt_dup_request(queue_t * q,mblk_t * mp)1100 mir_clnt_dup_request(queue_t *q, mblk_t *mp)
1101 {
1102 	mblk_t  *mp1;
1103 	uint32_t  new_xid;
1104 	uint32_t  old_xid;
1105 
1106 	ASSERT(MUTEX_HELD(&((mir_t *)q->q_ptr)->mir_mutex));
1107 	new_xid = BE32_TO_U32(&mp->b_rptr[4]);
1108 	/*
1109 	 * This loop is a bit tacky -- it walks the STREAMS list of
1110 	 * flow-controlled messages.
1111 	 */
1112 	if ((mp1 = q->q_first) != NULL) {
1113 		do {
1114 			old_xid = BE32_TO_U32(&mp1->b_rptr[4]);
1115 			if (new_xid == old_xid)
1116 				return (1);
1117 		} while ((mp1 = mp1->b_next) != NULL);
1118 	}
1119 	return (0);
1120 }
1121 
1122 static int
mir_close(queue_t * q)1123 mir_close(queue_t *q)
1124 {
1125 	mir_t	*mir = q->q_ptr;
1126 	mblk_t	*mp;
1127 	bool_t queue_cleaned = FALSE;
1128 
1129 	RPCLOG(32, "rpcmod: mir_close of q 0x%p\n", (void *)q);
1130 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1131 	mutex_enter(&mir->mir_mutex);
1132 	if ((mp = mir->mir_head_mp) != NULL) {
1133 		mir->mir_head_mp = NULL;
1134 		mir->mir_tail_mp = NULL;
1135 		freemsg(mp);
1136 	}
1137 	/*
1138 	 * Set mir_closing so we get notified when MIR_SVC_QUIESCED()
1139 	 * is TRUE.  And mir_timer_start() won't start the timer again.
1140 	 */
1141 	mir->mir_closing = B_TRUE;
1142 	mir_timer_stop(mir);
1143 
1144 	if (mir->mir_type == RPC_SERVER) {
1145 		flushq(q, FLUSHDATA);	/* Ditch anything waiting on read q */
1146 
1147 		/*
1148 		 * This will prevent more requests from arriving and
1149 		 * will force rpcmod to ignore flow control.
1150 		 */
1151 		mir_svc_start_close(WR(q), mir);
1152 
1153 		while ((!MIR_SVC_QUIESCED(mir)) || mir->mir_inwservice == 1) {
1154 
1155 			if (mir->mir_ref_cnt && !mir->mir_inrservice &&
1156 			    (queue_cleaned == FALSE)) {
1157 				/*
1158 				 * call into SVC to clean the queue
1159 				 */
1160 				mutex_exit(&mir->mir_mutex);
1161 				svc_queueclean(q);
1162 				queue_cleaned = TRUE;
1163 				mutex_enter(&mir->mir_mutex);
1164 				continue;
1165 			}
1166 
1167 			/*
1168 			 * Bugid 1253810 - Force the write service
1169 			 * procedure to send its messages, regardless
1170 			 * whether the downstream  module is ready
1171 			 * to accept data.
1172 			 */
1173 			if (mir->mir_inwservice == 1)
1174 				qenable(WR(q));
1175 
1176 			cv_wait(&mir->mir_condvar, &mir->mir_mutex);
1177 		}
1178 
1179 		mutex_exit(&mir->mir_mutex);
1180 		qprocsoff(q);
1181 
1182 		/* Notify kRPC that this stream is going away. */
1183 		svc_queueclose(q);
1184 	} else {
1185 		mutex_exit(&mir->mir_mutex);
1186 		qprocsoff(q);
1187 	}
1188 
1189 	mutex_destroy(&mir->mir_mutex);
1190 	cv_destroy(&mir->mir_condvar);
1191 	cv_destroy(&mir->mir_timer_cv);
1192 	kmem_free(mir, sizeof (mir_t));
1193 	return (0);
1194 }
1195 
1196 /*
1197  * This is server side only (RPC_SERVER).
1198  *
1199  * Exit idle mode.
1200  */
1201 static void
mir_svc_idle_stop(queue_t * q,mir_t * mir)1202 mir_svc_idle_stop(queue_t *q, mir_t *mir)
1203 {
1204 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1205 	ASSERT((q->q_flag & QREADR) == 0);
1206 	ASSERT(mir->mir_type == RPC_SERVER);
1207 	RPCLOG(16, "rpcmod: mir_svc_idle_stop of q 0x%p\n", (void *)q);
1208 
1209 	mir_timer_stop(mir);
1210 }
1211 
1212 /*
1213  * This is server side only (RPC_SERVER).
1214  *
1215  * Start idle processing, which will include setting idle timer if the
1216  * stream is not being closed.
1217  */
1218 static void
mir_svc_idle_start(queue_t * q,mir_t * mir)1219 mir_svc_idle_start(queue_t *q, mir_t *mir)
1220 {
1221 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1222 	ASSERT((q->q_flag & QREADR) == 0);
1223 	ASSERT(mir->mir_type == RPC_SERVER);
1224 	RPCLOG(16, "rpcmod: mir_svc_idle_start q 0x%p\n", (void *)q);
1225 
1226 	/*
1227 	 * Don't re-start idle timer if we are closing queues.
1228 	 */
1229 	if (mir->mir_closing) {
1230 		RPCLOG(16, "mir_svc_idle_start - closing: 0x%p\n",
1231 		    (void *)q);
1232 
1233 		/*
1234 		 * We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED()
1235 		 * is true.  When it is true, and we are in the process of
1236 		 * closing the stream, signal any thread waiting in
1237 		 * mir_close().
1238 		 */
1239 		if (mir->mir_inwservice == 0)
1240 			cv_signal(&mir->mir_condvar);
1241 
1242 	} else {
1243 		RPCLOG(16, "mir_svc_idle_start - reset %s timer\n",
1244 		    mir->mir_ordrel_pending ? "ordrel" : "normal");
1245 		/*
1246 		 * Normal condition, start the idle timer.  If an orderly
1247 		 * release has been sent, set the timeout to wait for the
1248 		 * client to close its side of the connection.  Otherwise,
1249 		 * use the normal idle timeout.
1250 		 */
1251 		mir_timer_start(q, mir, mir->mir_ordrel_pending ?
1252 		    svc_ordrel_timeout : mir->mir_idle_timeout);
1253 	}
1254 }
1255 
1256 /* ARGSUSED */
1257 static int
mir_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1258 mir_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1259 {
1260 	mir_t	*mir;
1261 
1262 	RPCLOG(32, "rpcmod: mir_open of q 0x%p\n", (void *)q);
1263 	/* Set variables used directly by kRPC. */
1264 	if (!mir_start)
1265 		mir_start = mir_svc_start;
1266 	if (!clnt_stop_idle)
1267 		clnt_stop_idle = mir_clnt_idle_do_stop;
1268 	if (!clnt_max_msg_sizep)
1269 		clnt_max_msg_sizep = &clnt_max_msg_size;
1270 	if (!svc_max_msg_sizep)
1271 		svc_max_msg_sizep = &svc_max_msg_size;
1272 
1273 	/* Allocate a zero'ed out mir structure for this stream. */
1274 	mir = kmem_zalloc(sizeof (mir_t), KM_SLEEP);
1275 
1276 	/*
1277 	 * We set hold inbound here so that incoming messages will
1278 	 * be held on the read-side queue until the stream is completely
1279 	 * initialized with a RPC_CLIENT or RPC_SERVER ioctl.  During
1280 	 * the ioctl processing, the flag is cleared and any messages that
1281 	 * arrived between the open and the ioctl are delivered to kRPC.
1282 	 *
1283 	 * Early data should never arrive on a client stream since
1284 	 * servers only respond to our requests and we do not send any.
1285 	 * until after the stream is initialized.  Early data is
1286 	 * very common on a server stream where the client will start
1287 	 * sending data as soon as the connection is made (and this
1288 	 * is especially true with TCP where the protocol accepts the
1289 	 * connection before nfsd or kRPC is notified about it).
1290 	 */
1291 
1292 	mir->mir_hold_inbound = 1;
1293 
1294 	/*
1295 	 * Start the record marker looking for a 4-byte header.  When
1296 	 * this length is negative, it indicates that rpcmod is looking
1297 	 * for bytes to consume for the record marker header.  When it
1298 	 * is positive, it holds the number of bytes that have arrived
1299 	 * for the current fragment and are being held in mir_header_mp.
1300 	 */
1301 
1302 	mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1303 
1304 	mir->mir_zoneid = rpc_zoneid();
1305 	mutex_init(&mir->mir_mutex, NULL, MUTEX_DEFAULT, NULL);
1306 	cv_init(&mir->mir_condvar, NULL, CV_DRIVER, NULL);
1307 	cv_init(&mir->mir_timer_cv, NULL, CV_DRIVER, NULL);
1308 
1309 	q->q_ptr = (char *)mir;
1310 	WR(q)->q_ptr = (char *)mir;
1311 
1312 	/*
1313 	 * We noenable the read-side queue because we don't want it
1314 	 * automatically enabled by putq.  We enable it explicitly
1315 	 * in mir_wsrv when appropriate. (See additional comments on
1316 	 * flow control at the beginning of mir_rsrv.)
1317 	 */
1318 	noenable(q);
1319 
1320 	qprocson(q);
1321 	return (0);
1322 }
1323 
1324 /*
1325  * Read-side put routine for both the client and server side.  Does the
1326  * record marking for incoming RPC messages, and when complete, dispatches
1327  * the message to either the client or server.
1328  */
1329 static void
mir_rput(queue_t * q,mblk_t * mp)1330 mir_rput(queue_t *q, mblk_t *mp)
1331 {
1332 	int	excess;
1333 	int32_t	frag_len, frag_header;
1334 	mblk_t	*cont_mp, *head_mp, *tail_mp, *mp1;
1335 	mir_t	*mir = q->q_ptr;
1336 	boolean_t stop_timer = B_FALSE;
1337 
1338 	ASSERT(mir != NULL);
1339 
1340 	/*
1341 	 * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER
1342 	 * with the corresponding ioctl, then don't accept
1343 	 * any inbound data.  This should never happen for streams
1344 	 * created by nfsd or client-side kRPC because they are careful
1345 	 * to set the mode of the stream before doing anything else.
1346 	 */
1347 	if (mir->mir_type == 0) {
1348 		freemsg(mp);
1349 		return;
1350 	}
1351 
1352 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1353 
1354 	switch (mp->b_datap->db_type) {
1355 	case M_DATA:
1356 		break;
1357 	case M_PROTO:
1358 	case M_PCPROTO:
1359 		if (MBLKL(mp) < sizeof (t_scalar_t)) {
1360 			RPCLOG(1, "mir_rput: runt TPI message (%d bytes)\n",
1361 			    (int)MBLKL(mp));
1362 			freemsg(mp);
1363 			return;
1364 		}
1365 		if (((union T_primitives *)mp->b_rptr)->type != T_DATA_IND) {
1366 			mir_rput_proto(q, mp);
1367 			return;
1368 		}
1369 
1370 		/* Throw away the T_DATA_IND block and continue with data. */
1371 		mp1 = mp;
1372 		mp = mp->b_cont;
1373 		freeb(mp1);
1374 		break;
1375 	case M_SETOPTS:
1376 		/*
1377 		 * If a module on the stream is trying set the Stream head's
1378 		 * high water mark, then set our hiwater to the requested
1379 		 * value.  We are the "stream head" for all inbound
1380 		 * data messages since messages are passed directly to kRPC.
1381 		 */
1382 		if (MBLKL(mp) >= sizeof (struct stroptions)) {
1383 			struct stroptions	*stropts;
1384 
1385 			stropts = (struct stroptions *)mp->b_rptr;
1386 			if ((stropts->so_flags & SO_HIWAT) &&
1387 			    !(stropts->so_flags & SO_BAND)) {
1388 				(void) strqset(q, QHIWAT, 0, stropts->so_hiwat);
1389 			}
1390 		}
1391 		putnext(q, mp);
1392 		return;
1393 	case M_FLUSH:
1394 		RPCLOG(32, "mir_rput: ignoring M_FLUSH %x ", *mp->b_rptr);
1395 		RPCLOG(32, "on q 0x%p\n", (void *)q);
1396 		putnext(q, mp);
1397 		return;
1398 	default:
1399 		putnext(q, mp);
1400 		return;
1401 	}
1402 
1403 	mutex_enter(&mir->mir_mutex);
1404 
1405 	/*
1406 	 * If this connection is closing, don't accept any new messages.
1407 	 */
1408 	if (mir->mir_svc_no_more_msgs) {
1409 		ASSERT(mir->mir_type == RPC_SERVER);
1410 		mutex_exit(&mir->mir_mutex);
1411 		freemsg(mp);
1412 		return;
1413 	}
1414 
1415 	/* Get local copies for quicker access. */
1416 	frag_len = mir->mir_frag_len;
1417 	frag_header = mir->mir_frag_header;
1418 	head_mp = mir->mir_head_mp;
1419 	tail_mp = mir->mir_tail_mp;
1420 
1421 	/* Loop, processing each message block in the mp chain separately. */
1422 	do {
1423 		cont_mp = mp->b_cont;
1424 		mp->b_cont = NULL;
1425 
1426 		/*
1427 		 * Drop zero-length mblks to prevent unbounded kernel memory
1428 		 * consumption.
1429 		 */
1430 		if (MBLKL(mp) == 0) {
1431 			freeb(mp);
1432 			continue;
1433 		}
1434 
1435 		/*
1436 		 * If frag_len is negative, we're still in the process of
1437 		 * building frag_header -- try to complete it with this mblk.
1438 		 */
1439 		while (frag_len < 0 && mp->b_rptr < mp->b_wptr) {
1440 			frag_len++;
1441 			frag_header <<= 8;
1442 			frag_header += *mp->b_rptr++;
1443 		}
1444 
1445 		if (MBLKL(mp) == 0 && frag_len < 0) {
1446 			/*
1447 			 * We consumed this mblk while trying to complete the
1448 			 * fragment header.  Free it and move on.
1449 			 */
1450 			freeb(mp);
1451 			continue;
1452 		}
1453 
1454 		ASSERT(frag_len >= 0);
1455 
1456 		/*
1457 		 * Now frag_header has the number of bytes in this fragment
1458 		 * and we're just waiting to collect them all.  Chain our
1459 		 * latest mblk onto the list and see if we now have enough
1460 		 * bytes to complete the fragment.
1461 		 */
1462 		if (head_mp == NULL) {
1463 			ASSERT(tail_mp == NULL);
1464 			head_mp = tail_mp = mp;
1465 		} else {
1466 			tail_mp->b_cont = mp;
1467 			tail_mp = mp;
1468 		}
1469 
1470 		frag_len += MBLKL(mp);
1471 		excess = frag_len - (frag_header & ~MIR_LASTFRAG);
1472 		if (excess < 0) {
1473 			/*
1474 			 * We still haven't received enough data to complete
1475 			 * the fragment, so continue on to the next mblk.
1476 			 */
1477 			continue;
1478 		}
1479 
1480 		/*
1481 		 * We've got a complete fragment.  If there are excess bytes,
1482 		 * then they're part of the next fragment's header (of either
1483 		 * this RPC message or the next RPC message).  Split that part
1484 		 * into its own mblk so that we can safely freeb() it when
1485 		 * building frag_header above.
1486 		 */
1487 		if (excess > 0) {
1488 			if ((mp1 = dupb(mp)) == NULL &&
1489 			    (mp1 = copyb(mp)) == NULL) {
1490 				freemsg(head_mp);
1491 				freemsg(cont_mp);
1492 				RPCLOG0(1, "mir_rput: dupb/copyb failed\n");
1493 				mir->mir_frag_header = 0;
1494 				mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
1495 				mir->mir_head_mp = NULL;
1496 				mir->mir_tail_mp = NULL;
1497 				mir_disconnect(q, mir);	/* drops mir_mutex */
1498 				return;
1499 			}
1500 
1501 			/*
1502 			 * Relink the message chain so that the next mblk is
1503 			 * the next fragment header, followed by the rest of
1504 			 * the message chain.
1505 			 */
1506 			mp1->b_cont = cont_mp;
1507 			cont_mp = mp1;
1508 
1509 			/*
1510 			 * Data in the new mblk begins at the next fragment,
1511 			 * and data in the old mblk ends at the next fragment.
1512 			 */
1513 			mp1->b_rptr = mp1->b_wptr - excess;
1514 			mp->b_wptr -= excess;
1515 		}
1516 
1517 		/*
1518 		 * Reset frag_len and frag_header for the next fragment.
1519 		 */
1520 		frag_len = -(int32_t)sizeof (uint32_t);
1521 		if (!(frag_header & MIR_LASTFRAG)) {
1522 			/*
1523 			 * The current fragment is complete, but more
1524 			 * fragments need to be processed before we can
1525 			 * pass along the RPC message headed at head_mp.
1526 			 */
1527 			frag_header = 0;
1528 			continue;
1529 		}
1530 		frag_header = 0;
1531 
1532 		/*
1533 		 * We've got a complete RPC message; pass it to the
1534 		 * appropriate consumer.
1535 		 */
1536 		switch (mir->mir_type) {
1537 		case RPC_CLIENT:
1538 			if (clnt_dispatch_notify(head_mp, mir->mir_zoneid)) {
1539 				/*
1540 				 * Mark this stream as active.  This marker
1541 				 * is used in mir_timer().
1542 				 */
1543 				mir->mir_clntreq = 1;
1544 				mir->mir_use_timestamp = ddi_get_lbolt();
1545 			} else {
1546 				freemsg(head_mp);
1547 			}
1548 			break;
1549 
1550 		case RPC_SERVER:
1551 			/*
1552 			 * Check for flow control before passing the
1553 			 * message to kRPC.
1554 			 */
1555 			if (!mir->mir_hold_inbound) {
1556 				if (mir->mir_krpc_cell) {
1557 
1558 					if (mir_check_len(q, head_mp))
1559 						return;
1560 
1561 					if (q->q_first == NULL &&
1562 					    svc_queuereq(q, head_mp, TRUE)) {
1563 						/*
1564 						 * If the reference count is 0
1565 						 * (not including this
1566 						 * request), then the stream is
1567 						 * transitioning from idle to
1568 						 * non-idle.  In this case, we
1569 						 * cancel the idle timer.
1570 						 */
1571 						if (mir->mir_ref_cnt++ == 0)
1572 							stop_timer = B_TRUE;
1573 					} else {
1574 						(void) putq(q, head_mp);
1575 						mir->mir_inrservice = B_TRUE;
1576 					}
1577 				} else {
1578 					/*
1579 					 * Count # of times this happens. Should
1580 					 * be never, but experience shows
1581 					 * otherwise.
1582 					 */
1583 					mir_krpc_cell_null++;
1584 					freemsg(head_mp);
1585 				}
1586 			} else {
1587 				/*
1588 				 * If the outbound side of the stream is
1589 				 * flow controlled, then hold this message
1590 				 * until client catches up. mir_hold_inbound
1591 				 * is set in mir_wput and cleared in mir_wsrv.
1592 				 */
1593 				(void) putq(q, head_mp);
1594 				mir->mir_inrservice = B_TRUE;
1595 			}
1596 			break;
1597 		default:
1598 			RPCLOG(1, "mir_rput: unknown mir_type %d\n",
1599 			    mir->mir_type);
1600 			freemsg(head_mp);
1601 			break;
1602 		}
1603 
1604 		/*
1605 		 * Reset the chain since we're starting on a new RPC message.
1606 		 */
1607 		head_mp = tail_mp = NULL;
1608 	} while ((mp = cont_mp) != NULL);
1609 
1610 	/*
1611 	 * Sanity check the message length; if it's too large mir_check_len()
1612 	 * will shutdown the connection, drop mir_mutex, and return non-zero.
1613 	 */
1614 	if (head_mp != NULL && mir->mir_setup_complete &&
1615 	    mir_check_len(q, head_mp))
1616 		return;
1617 
1618 	/* Save our local copies back in the mir structure. */
1619 	mir->mir_frag_header = frag_header;
1620 	mir->mir_frag_len = frag_len;
1621 	mir->mir_head_mp = head_mp;
1622 	mir->mir_tail_mp = tail_mp;
1623 
1624 	/*
1625 	 * The timer is stopped after the whole message chain is processed.
1626 	 * The reason is that stopping the timer releases the mir_mutex
1627 	 * lock temporarily.  This means that the request can be serviced
1628 	 * while we are still processing the message chain.  This is not
1629 	 * good.  So we stop the timer here instead.
1630 	 *
1631 	 * Note that if the timer fires before we stop it, it will not
1632 	 * do any harm as MIR_SVC_QUIESCED() is false and mir_timer()
1633 	 * will just return.
1634 	 */
1635 	if (stop_timer) {
1636 		RPCLOG(16, "mir_rput: stopping idle timer on 0x%p because "
1637 		    "ref cnt going to non zero\n", (void *)WR(q));
1638 		mir_svc_idle_stop(WR(q), mir);
1639 	}
1640 	mutex_exit(&mir->mir_mutex);
1641 }
1642 
1643 static void
mir_rput_proto(queue_t * q,mblk_t * mp)1644 mir_rput_proto(queue_t *q, mblk_t *mp)
1645 {
1646 	mir_t	*mir = (mir_t *)q->q_ptr;
1647 	uint32_t	type;
1648 	uint32_t reason = 0;
1649 
1650 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1651 
1652 	type = ((union T_primitives *)mp->b_rptr)->type;
1653 	switch (mir->mir_type) {
1654 	case RPC_CLIENT:
1655 		switch (type) {
1656 		case T_DISCON_IND:
1657 			reason = ((struct T_discon_ind *)
1658 			    (mp->b_rptr))->DISCON_reason;
1659 			/*FALLTHROUGH*/
1660 		case T_ORDREL_IND:
1661 			mutex_enter(&mir->mir_mutex);
1662 			if (mir->mir_head_mp) {
1663 				freemsg(mir->mir_head_mp);
1664 				mir->mir_head_mp = (mblk_t *)0;
1665 				mir->mir_tail_mp = (mblk_t *)0;
1666 			}
1667 			/*
1668 			 * We are disconnecting, but not necessarily
1669 			 * closing. By not closing, we will fail to
1670 			 * pick up a possibly changed global timeout value,
1671 			 * unless we store it now.
1672 			 */
1673 			mir->mir_idle_timeout = clnt_idle_timeout;
1674 			mir_clnt_idle_stop(WR(q), mir);
1675 
1676 			/*
1677 			 * Even though we are unconnected, we still
1678 			 * leave the idle timer going on the client. The
1679 			 * reason for is that if we've disconnected due
1680 			 * to a server-side disconnect, reset, or connection
1681 			 * timeout, there is a possibility the client may
1682 			 * retry the RPC request. This retry needs to done on
1683 			 * the same bound address for the server to interpret
1684 			 * it as such. However, we don't want
1685 			 * to wait forever for that possibility. If the
1686 			 * end-point stays unconnected for mir_idle_timeout
1687 			 * units of time, then that is a signal to the
1688 			 * connection manager to give up waiting for the
1689 			 * application (eg. NFS) to send a retry.
1690 			 */
1691 			mir_clnt_idle_start(WR(q), mir);
1692 			mutex_exit(&mir->mir_mutex);
1693 			clnt_dispatch_notifyall(WR(q), type, reason);
1694 			freemsg(mp);
1695 			return;
1696 		case T_ERROR_ACK:
1697 		{
1698 			struct T_error_ack	*terror;
1699 
1700 			terror = (struct T_error_ack *)mp->b_rptr;
1701 			RPCLOG(1, "mir_rput_proto T_ERROR_ACK for queue 0x%p",
1702 			    (void *)q);
1703 			RPCLOG(1, " ERROR_prim: %s,",
1704 			    rpc_tpiprim2name(terror->ERROR_prim));
1705 			RPCLOG(1, " TLI_error: %s,",
1706 			    rpc_tpierr2name(terror->TLI_error));
1707 			RPCLOG(1, " UNIX_error: %d\n", terror->UNIX_error);
1708 			if (terror->ERROR_prim == T_DISCON_REQ)  {
1709 				clnt_dispatch_notifyall(WR(q), type, reason);
1710 				freemsg(mp);
1711 				return;
1712 			} else {
1713 				if (clnt_dispatch_notifyconn(WR(q), mp))
1714 					return;
1715 			}
1716 			break;
1717 		}
1718 		case T_OK_ACK:
1719 		{
1720 			struct T_ok_ack	*tok = (struct T_ok_ack *)mp->b_rptr;
1721 
1722 			if (tok->CORRECT_prim == T_DISCON_REQ) {
1723 				clnt_dispatch_notifyall(WR(q), type, reason);
1724 				freemsg(mp);
1725 				return;
1726 			} else {
1727 				if (clnt_dispatch_notifyconn(WR(q), mp))
1728 					return;
1729 			}
1730 			break;
1731 		}
1732 		case T_CONN_CON:
1733 		case T_INFO_ACK:
1734 		case T_OPTMGMT_ACK:
1735 			if (clnt_dispatch_notifyconn(WR(q), mp))
1736 				return;
1737 			break;
1738 		case T_BIND_ACK:
1739 			break;
1740 		default:
1741 			RPCLOG(1, "mir_rput: unexpected message %d "
1742 			    "for kRPC client\n",
1743 			    ((union T_primitives *)mp->b_rptr)->type);
1744 			break;
1745 		}
1746 		break;
1747 
1748 	case RPC_SERVER:
1749 		switch (type) {
1750 		case T_BIND_ACK:
1751 		{
1752 			struct T_bind_ack	*tbind;
1753 
1754 			/*
1755 			 * If this is a listening stream, then shut
1756 			 * off the idle timer.
1757 			 */
1758 			tbind = (struct T_bind_ack *)mp->b_rptr;
1759 			if (tbind->CONIND_number > 0) {
1760 				mutex_enter(&mir->mir_mutex);
1761 				mir_svc_idle_stop(WR(q), mir);
1762 
1763 				/*
1764 				 * mark this as a listen endpoint
1765 				 * for special handling.
1766 				 */
1767 
1768 				mir->mir_listen_stream = 1;
1769 				mutex_exit(&mir->mir_mutex);
1770 			}
1771 			break;
1772 		}
1773 		case T_DISCON_IND:
1774 		case T_ORDREL_IND:
1775 			RPCLOG(16, "mir_rput_proto: got %s indication\n",
1776 			    type == T_DISCON_IND ? "disconnect"
1777 			    : "orderly release");
1778 
1779 			/*
1780 			 * For listen endpoint just pass
1781 			 * on the message.
1782 			 */
1783 
1784 			if (mir->mir_listen_stream)
1785 				break;
1786 
1787 			mutex_enter(&mir->mir_mutex);
1788 
1789 			/*
1790 			 * If client wants to break off connection, record
1791 			 * that fact.
1792 			 */
1793 			mir_svc_start_close(WR(q), mir);
1794 
1795 			/*
1796 			 * If we are idle, then send the orderly release
1797 			 * or disconnect indication to nfsd.
1798 			 */
1799 			if (MIR_SVC_QUIESCED(mir)) {
1800 				mutex_exit(&mir->mir_mutex);
1801 				break;
1802 			}
1803 
1804 			RPCLOG(16, "mir_rput_proto: not idle, so "
1805 			    "disconnect/ord rel indication not passed "
1806 			    "upstream on 0x%p\n", (void *)q);
1807 
1808 			/*
1809 			 * Hold the indication until we get idle
1810 			 * If there already is an indication stored,
1811 			 * replace it if the new one is a disconnect. The
1812 			 * reasoning is that disconnection takes less time
1813 			 * to process, and once a client decides to
1814 			 * disconnect, we should do that.
1815 			 */
1816 			if (mir->mir_svc_pend_mp) {
1817 				if (type == T_DISCON_IND) {
1818 					RPCLOG(16, "mir_rput_proto: replacing"
1819 					    " held disconnect/ord rel"
1820 					    " indication with disconnect on"
1821 					    " 0x%p\n", (void *)q);
1822 
1823 					freemsg(mir->mir_svc_pend_mp);
1824 					mir->mir_svc_pend_mp = mp;
1825 				} else {
1826 					RPCLOG(16, "mir_rput_proto: already "
1827 					    "held a disconnect/ord rel "
1828 					    "indication. freeing ord rel "
1829 					    "ind on 0x%p\n", (void *)q);
1830 					freemsg(mp);
1831 				}
1832 			} else
1833 				mir->mir_svc_pend_mp = mp;
1834 
1835 			mutex_exit(&mir->mir_mutex);
1836 			return;
1837 
1838 		default:
1839 			/* nfsd handles server-side non-data messages. */
1840 			break;
1841 		}
1842 		break;
1843 
1844 	default:
1845 		break;
1846 	}
1847 
1848 	putnext(q, mp);
1849 }
1850 
1851 /*
1852  * The server-side read queues are used to hold inbound messages while
1853  * outbound flow control is exerted.  When outbound flow control is
1854  * relieved, mir_wsrv qenables the read-side queue.  Read-side queues
1855  * are not enabled by STREAMS and are explicitly noenable'ed in mir_open.
1856  */
1857 static void
mir_rsrv(queue_t * q)1858 mir_rsrv(queue_t *q)
1859 {
1860 	mir_t	*mir;
1861 	mblk_t	*mp;
1862 	boolean_t stop_timer = B_FALSE;
1863 
1864 	mir = (mir_t *)q->q_ptr;
1865 	mutex_enter(&mir->mir_mutex);
1866 
1867 	mp = NULL;
1868 	switch (mir->mir_type) {
1869 	case RPC_SERVER:
1870 		if (mir->mir_ref_cnt == 0)
1871 			mir->mir_hold_inbound = 0;
1872 		if (mir->mir_hold_inbound)
1873 			break;
1874 
1875 		while (mp = getq(q)) {
1876 			if (mir->mir_krpc_cell &&
1877 			    (mir->mir_svc_no_more_msgs == 0)) {
1878 
1879 				if (mir_check_len(q, mp))
1880 					return;
1881 
1882 				if (svc_queuereq(q, mp, TRUE)) {
1883 					/*
1884 					 * If we were idle, turn off idle timer
1885 					 * since we aren't idle any more.
1886 					 */
1887 					if (mir->mir_ref_cnt++ == 0)
1888 						stop_timer = B_TRUE;
1889 				} else {
1890 					(void) putbq(q, mp);
1891 					break;
1892 				}
1893 			} else {
1894 				/*
1895 				 * Count # of times this happens. Should be
1896 				 * never, but experience shows otherwise.
1897 				 */
1898 				if (mir->mir_krpc_cell == NULL)
1899 					mir_krpc_cell_null++;
1900 				freemsg(mp);
1901 			}
1902 		}
1903 		break;
1904 	case RPC_CLIENT:
1905 		break;
1906 	default:
1907 		RPCLOG(1, "mir_rsrv: unexpected mir_type %d\n", mir->mir_type);
1908 
1909 		if (q->q_first == NULL)
1910 			MIR_CLEAR_INRSRV(mir);
1911 
1912 		mutex_exit(&mir->mir_mutex);
1913 
1914 		return;
1915 	}
1916 
1917 	/*
1918 	 * The timer is stopped after all the messages are processed.
1919 	 * The reason is that stopping the timer releases the mir_mutex
1920 	 * lock temporarily.  This means that the request can be serviced
1921 	 * while we are still processing the message queue.  This is not
1922 	 * good.  So we stop the timer here instead.
1923 	 */
1924 	if (stop_timer)  {
1925 		RPCLOG(16, "mir_rsrv stopping idle timer on 0x%p because ref "
1926 		    "cnt going to non zero\n", (void *)WR(q));
1927 		mir_svc_idle_stop(WR(q), mir);
1928 	}
1929 
1930 	if (q->q_first == NULL) {
1931 		mblk_t	*cmp = NULL;
1932 
1933 		MIR_CLEAR_INRSRV(mir);
1934 
1935 		if (mir->mir_type == RPC_SERVER && MIR_SVC_QUIESCED(mir)) {
1936 			cmp = mir->mir_svc_pend_mp;
1937 			mir->mir_svc_pend_mp = NULL;
1938 		}
1939 
1940 		mutex_exit(&mir->mir_mutex);
1941 
1942 		if (cmp != NULL) {
1943 			RPCLOG(16, "mir_rsrv: line %d: sending a held "
1944 			    "disconnect/ord rel indication upstream\n",
1945 			    __LINE__);
1946 			putnext(q, cmp);
1947 		}
1948 
1949 		return;
1950 	}
1951 	mutex_exit(&mir->mir_mutex);
1952 }
1953 
1954 static int mir_svc_policy_fails;
1955 
1956 /*
1957  * Called to send an event code to nfsd/lockd so that it initiates
1958  * connection close.
1959  */
1960 static int
mir_svc_policy_notify(queue_t * q,int event)1961 mir_svc_policy_notify(queue_t *q, int event)
1962 {
1963 	mblk_t	*mp;
1964 #ifdef DEBUG
1965 	mir_t *mir = (mir_t *)q->q_ptr;
1966 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
1967 #endif
1968 	ASSERT(q->q_flag & QREADR);
1969 
1970 	/*
1971 	 * Create an M_DATA message with the event code and pass it to the
1972 	 * Stream head (nfsd or whoever created the stream will consume it).
1973 	 */
1974 	mp = allocb(sizeof (int), BPRI_HI);
1975 
1976 	if (!mp) {
1977 
1978 		mir_svc_policy_fails++;
1979 		RPCLOG(16, "mir_svc_policy_notify: could not allocate event "
1980 		    "%d\n", event);
1981 		return (ENOMEM);
1982 	}
1983 
1984 	U32_TO_BE32(event, mp->b_rptr);
1985 	mp->b_wptr = mp->b_rptr + sizeof (int);
1986 	putnext(q, mp);
1987 	return (0);
1988 }
1989 
1990 /*
1991  * Server side: start the close phase. We want to get this rpcmod slot in an
1992  * idle state before mir_close() is called.
1993  */
1994 static void
mir_svc_start_close(queue_t * wq,mir_t * mir)1995 mir_svc_start_close(queue_t *wq, mir_t *mir)
1996 {
1997 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
1998 	ASSERT((wq->q_flag & QREADR) == 0);
1999 	ASSERT(mir->mir_type == RPC_SERVER);
2000 
2001 	/*
2002 	 * Do not accept any more messages.
2003 	 */
2004 	mir->mir_svc_no_more_msgs = 1;
2005 
2006 	/*
2007 	 * Next two statements will make the read service procedure
2008 	 * free everything stuck in the streams read queue.
2009 	 * It's not necessary because enabling the write queue will
2010 	 * have the same effect, but why not speed the process along?
2011 	 */
2012 	mir->mir_hold_inbound = 0;
2013 	qenable(RD(wq));
2014 
2015 	/*
2016 	 * Meanwhile force the write service procedure to send the
2017 	 * responses downstream, regardless of flow control.
2018 	 */
2019 	qenable(wq);
2020 }
2021 
2022 void
mir_svc_hold(queue_t * wq)2023 mir_svc_hold(queue_t *wq)
2024 {
2025 	mir_t *mir = (mir_t *)wq->q_ptr;
2026 
2027 	mutex_enter(&mir->mir_mutex);
2028 	mir->mir_ref_cnt++;
2029 	mutex_exit(&mir->mir_mutex);
2030 }
2031 
2032 /*
2033  * This routine is called directly by kRPC after a request is completed,
2034  * whether a reply was sent or the request was dropped.
2035  */
2036 void
mir_svc_release(queue_t * wq,mblk_t * mp,bool_t enable)2037 mir_svc_release(queue_t *wq, mblk_t *mp, bool_t enable)
2038 {
2039 	mir_t   *mir = (mir_t *)wq->q_ptr;
2040 	mblk_t	*cmp = NULL;
2041 
2042 	ASSERT((wq->q_flag & QREADR) == 0);
2043 	if (mp)
2044 		freemsg(mp);
2045 
2046 	if (enable)
2047 		qenable(RD(wq));
2048 
2049 	mutex_enter(&mir->mir_mutex);
2050 
2051 	/*
2052 	 * Start idle processing if this is the last reference.
2053 	 */
2054 	if ((mir->mir_ref_cnt == 1) && (mir->mir_inrservice == 0)) {
2055 		cmp = mir->mir_svc_pend_mp;
2056 		mir->mir_svc_pend_mp = NULL;
2057 	}
2058 
2059 	if (cmp) {
2060 		RPCLOG(16, "mir_svc_release: sending a held "
2061 		    "disconnect/ord rel indication upstream on queue 0x%p\n",
2062 		    (void *)RD(wq));
2063 
2064 		mutex_exit(&mir->mir_mutex);
2065 
2066 		putnext(RD(wq), cmp);
2067 
2068 		mutex_enter(&mir->mir_mutex);
2069 	}
2070 
2071 	/*
2072 	 * Start idle processing if this is the last reference.
2073 	 */
2074 	if (mir->mir_ref_cnt == 1 && mir->mir_inrservice == 0) {
2075 
2076 		RPCLOG(16, "mir_svc_release starting idle timer on 0x%p "
2077 		    "because ref cnt is zero\n", (void *) wq);
2078 
2079 		mir_svc_idle_start(wq, mir);
2080 	}
2081 
2082 	mir->mir_ref_cnt--;
2083 	ASSERT(mir->mir_ref_cnt >= 0);
2084 
2085 	/*
2086 	 * Wake up the thread waiting to close.
2087 	 */
2088 
2089 	if ((mir->mir_ref_cnt == 0) && mir->mir_closing)
2090 		cv_signal(&mir->mir_condvar);
2091 
2092 	mutex_exit(&mir->mir_mutex);
2093 }
2094 
2095 /*
2096  * This routine is called by server-side kRPC when it is ready to
2097  * handle inbound messages on the stream.
2098  */
2099 static void
mir_svc_start(queue_t * wq)2100 mir_svc_start(queue_t *wq)
2101 {
2102 	mir_t   *mir = (mir_t *)wq->q_ptr;
2103 
2104 	/*
2105 	 * no longer need to take the mir_mutex because the
2106 	 * mir_setup_complete field has been moved out of
2107 	 * the binary field protected by the mir_mutex.
2108 	 */
2109 
2110 	mir->mir_setup_complete = 1;
2111 	qenable(RD(wq));
2112 }
2113 
2114 /*
2115  * client side wrapper for stopping timer with normal idle timeout.
2116  */
2117 static void
mir_clnt_idle_stop(queue_t * wq,mir_t * mir)2118 mir_clnt_idle_stop(queue_t *wq, mir_t *mir)
2119 {
2120 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2121 	ASSERT((wq->q_flag & QREADR) == 0);
2122 	ASSERT(mir->mir_type == RPC_CLIENT);
2123 
2124 	mir_timer_stop(mir);
2125 }
2126 
2127 /*
2128  * client side wrapper for stopping timer with normal idle timeout.
2129  */
2130 static void
mir_clnt_idle_start(queue_t * wq,mir_t * mir)2131 mir_clnt_idle_start(queue_t *wq, mir_t *mir)
2132 {
2133 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2134 	ASSERT((wq->q_flag & QREADR) == 0);
2135 	ASSERT(mir->mir_type == RPC_CLIENT);
2136 
2137 	mir_timer_start(wq, mir, mir->mir_idle_timeout);
2138 }
2139 
2140 /*
2141  * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on
2142  * end-points that aren't connected.
2143  */
2144 static void
mir_clnt_idle_do_stop(queue_t * wq)2145 mir_clnt_idle_do_stop(queue_t *wq)
2146 {
2147 	mir_t   *mir = (mir_t *)wq->q_ptr;
2148 
2149 	RPCLOG(1, "mir_clnt_idle_do_stop: wq 0x%p\n", (void *)wq);
2150 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2151 	mutex_enter(&mir->mir_mutex);
2152 	mir_clnt_idle_stop(wq, mir);
2153 	mutex_exit(&mir->mir_mutex);
2154 }
2155 
2156 /*
2157  * Timer handler.  It handles idle timeout and memory shortage problem.
2158  */
2159 static void
mir_timer(void * arg)2160 mir_timer(void *arg)
2161 {
2162 	queue_t *wq = (queue_t *)arg;
2163 	mir_t *mir = (mir_t *)wq->q_ptr;
2164 	boolean_t notify;
2165 	clock_t now;
2166 
2167 	mutex_enter(&mir->mir_mutex);
2168 
2169 	/*
2170 	 * mir_timer_call is set only when either mir_timer_[start|stop]
2171 	 * is progressing.  And mir_timer() can only be run while they
2172 	 * are progressing if the timer is being stopped.  So just
2173 	 * return.
2174 	 */
2175 	if (mir->mir_timer_call) {
2176 		mutex_exit(&mir->mir_mutex);
2177 		return;
2178 	}
2179 	mir->mir_timer_id = 0;
2180 
2181 	switch (mir->mir_type) {
2182 	case RPC_CLIENT:
2183 
2184 		/*
2185 		 * For clients, the timer fires at clnt_idle_timeout
2186 		 * intervals.  If the activity marker (mir_clntreq) is
2187 		 * zero, then the stream has been idle since the last
2188 		 * timer event and we notify kRPC.  If mir_clntreq is
2189 		 * non-zero, then the stream is active and we just
2190 		 * restart the timer for another interval.  mir_clntreq
2191 		 * is set to 1 in mir_wput for every request passed
2192 		 * downstream.
2193 		 *
2194 		 * If this was a memory shortage timer reset the idle
2195 		 * timeout regardless; the mir_clntreq will not be a
2196 		 * valid indicator.
2197 		 *
2198 		 * The timer is initially started in mir_wput during
2199 		 * RPC_CLIENT ioctl processing.
2200 		 *
2201 		 * The timer interval can be changed for individual
2202 		 * streams with the ND variable "mir_idle_timeout".
2203 		 */
2204 		now = ddi_get_lbolt();
2205 		if (mir->mir_clntreq > 0 && mir->mir_use_timestamp +
2206 		    MSEC_TO_TICK(mir->mir_idle_timeout) - now >= 0) {
2207 			clock_t tout;
2208 
2209 			tout = mir->mir_idle_timeout -
2210 			    TICK_TO_MSEC(now - mir->mir_use_timestamp);
2211 			if (tout < 0)
2212 				tout = 1000;
2213 #if 0
2214 			printf("mir_timer[%d < %d + %d]: reset client timer "
2215 			    "to %d (ms)\n", TICK_TO_MSEC(now),
2216 			    TICK_TO_MSEC(mir->mir_use_timestamp),
2217 			    mir->mir_idle_timeout, tout);
2218 #endif
2219 			mir->mir_clntreq = 0;
2220 			mir_timer_start(wq, mir, tout);
2221 			mutex_exit(&mir->mir_mutex);
2222 			return;
2223 		}
2224 #if 0
2225 printf("mir_timer[%d]: doing client timeout\n", now / hz);
2226 #endif
2227 		/*
2228 		 * We are disconnecting, but not necessarily
2229 		 * closing. By not closing, we will fail to
2230 		 * pick up a possibly changed global timeout value,
2231 		 * unless we store it now.
2232 		 */
2233 		mir->mir_idle_timeout = clnt_idle_timeout;
2234 		mir_clnt_idle_start(wq, mir);
2235 
2236 		mutex_exit(&mir->mir_mutex);
2237 		/*
2238 		 * We pass T_ORDREL_REQ as an integer value
2239 		 * to kRPC as the indication that the stream
2240 		 * is idle.  This is not a T_ORDREL_REQ message,
2241 		 * it is just a convenient value since we call
2242 		 * the same kRPC routine for T_ORDREL_INDs and
2243 		 * T_DISCON_INDs.
2244 		 */
2245 		clnt_dispatch_notifyall(wq, T_ORDREL_REQ, 0);
2246 		return;
2247 
2248 	case RPC_SERVER:
2249 
2250 		/*
2251 		 * For servers, the timer is only running when the stream
2252 		 * is really idle or memory is short.  The timer is started
2253 		 * by mir_wput when mir_type is set to RPC_SERVER and
2254 		 * by mir_svc_idle_start whenever the stream goes idle
2255 		 * (mir_ref_cnt == 0).  The timer is cancelled in
2256 		 * mir_rput whenever a new inbound request is passed to kRPC
2257 		 * and the stream was previously idle.
2258 		 *
2259 		 * The timer interval can be changed for individual
2260 		 * streams with the ND variable "mir_idle_timeout".
2261 		 *
2262 		 * If the stream is not idle do nothing.
2263 		 */
2264 		if (!MIR_SVC_QUIESCED(mir)) {
2265 			mutex_exit(&mir->mir_mutex);
2266 			return;
2267 		}
2268 
2269 		notify = !mir->mir_inrservice;
2270 		mutex_exit(&mir->mir_mutex);
2271 
2272 		/*
2273 		 * If there is no packet queued up in read queue, the stream
2274 		 * is really idle so notify nfsd to close it.
2275 		 */
2276 		if (notify) {
2277 			RPCLOG(16, "mir_timer: telling stream head listener "
2278 			    "to close stream (0x%p)\n", (void *) RD(wq));
2279 			(void) mir_svc_policy_notify(RD(wq), 1);
2280 		}
2281 		return;
2282 	default:
2283 		RPCLOG(1, "mir_timer: unexpected mir_type %d\n",
2284 		    mir->mir_type);
2285 		mutex_exit(&mir->mir_mutex);
2286 		return;
2287 	}
2288 }
2289 
2290 /*
2291  * Called by the RPC package to send either a call or a return, or a
2292  * transport connection request.  Adds the record marking header.
2293  */
2294 static void
mir_wput(queue_t * q,mblk_t * mp)2295 mir_wput(queue_t *q, mblk_t *mp)
2296 {
2297 	uint_t	frag_header;
2298 	mir_t	*mir = (mir_t *)q->q_ptr;
2299 	uchar_t	*rptr = mp->b_rptr;
2300 
2301 	if (!mir) {
2302 		freemsg(mp);
2303 		return;
2304 	}
2305 
2306 	if (mp->b_datap->db_type != M_DATA) {
2307 		mir_wput_other(q, mp);
2308 		return;
2309 	}
2310 
2311 	if (mir->mir_ordrel_pending == 1) {
2312 		freemsg(mp);
2313 		RPCLOG(16, "mir_wput wq 0x%p: got data after T_ORDREL_REQ\n",
2314 		    (void *)q);
2315 		return;
2316 	}
2317 
2318 	frag_header = (uint_t)DLEN(mp);
2319 	frag_header |= MIR_LASTFRAG;
2320 
2321 	/* Stick in the 4 byte record marking header. */
2322 	if ((rptr - mp->b_datap->db_base) < sizeof (uint32_t) ||
2323 	    !IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
2324 		/*
2325 		 * Since we know that M_DATA messages are created exclusively
2326 		 * by kRPC, we expect that kRPC will leave room for our header
2327 		 * and 4 byte align which is normal for XDR.
2328 		 * If kRPC (or someone else) does not cooperate, then we
2329 		 * just throw away the message.
2330 		 */
2331 		RPCLOG(1, "mir_wput: kRPC did not leave space for record "
2332 		    "fragment header (%d bytes left)\n",
2333 		    (int)(rptr - mp->b_datap->db_base));
2334 		freemsg(mp);
2335 		return;
2336 	}
2337 	rptr -= sizeof (uint32_t);
2338 	*(uint32_t *)rptr = htonl(frag_header);
2339 	mp->b_rptr = rptr;
2340 
2341 	mutex_enter(&mir->mir_mutex);
2342 	if (mir->mir_type == RPC_CLIENT) {
2343 		/*
2344 		 * For the client, set mir_clntreq to indicate that the
2345 		 * connection is active.
2346 		 */
2347 		mir->mir_clntreq = 1;
2348 		mir->mir_use_timestamp = ddi_get_lbolt();
2349 	}
2350 
2351 	/*
2352 	 * If we haven't already queued some data and the downstream module
2353 	 * can accept more data, send it on, otherwise we queue the message
2354 	 * and take other actions depending on mir_type.
2355 	 */
2356 	if (!mir->mir_inwservice && MIR_WCANPUTNEXT(mir, q)) {
2357 		mutex_exit(&mir->mir_mutex);
2358 
2359 		/*
2360 		 * Now we pass the RPC message downstream.
2361 		 */
2362 		putnext(q, mp);
2363 		return;
2364 	}
2365 
2366 	switch (mir->mir_type) {
2367 	case RPC_CLIENT:
2368 		/*
2369 		 * Check for a previous duplicate request on the
2370 		 * queue.  If there is one, then we throw away
2371 		 * the current message and let the previous one
2372 		 * go through.  If we can't find a duplicate, then
2373 		 * send this one.  This tap dance is an effort
2374 		 * to reduce traffic and processing requirements
2375 		 * under load conditions.
2376 		 */
2377 		if (mir_clnt_dup_request(q, mp)) {
2378 			mutex_exit(&mir->mir_mutex);
2379 			freemsg(mp);
2380 			return;
2381 		}
2382 		break;
2383 	case RPC_SERVER:
2384 		/*
2385 		 * Set mir_hold_inbound so that new inbound RPC
2386 		 * messages will be held until the client catches
2387 		 * up on the earlier replies.  This flag is cleared
2388 		 * in mir_wsrv after flow control is relieved;
2389 		 * the read-side queue is also enabled at that time.
2390 		 */
2391 		mir->mir_hold_inbound = 1;
2392 		break;
2393 	default:
2394 		RPCLOG(1, "mir_wput: unexpected mir_type %d\n", mir->mir_type);
2395 		break;
2396 	}
2397 	mir->mir_inwservice = 1;
2398 	(void) putq(q, mp);
2399 	mutex_exit(&mir->mir_mutex);
2400 }
2401 
2402 static void
mir_wput_other(queue_t * q,mblk_t * mp)2403 mir_wput_other(queue_t *q, mblk_t *mp)
2404 {
2405 	mir_t	*mir = (mir_t *)q->q_ptr;
2406 	struct iocblk	*iocp;
2407 	uchar_t	*rptr = mp->b_rptr;
2408 	bool_t	flush_in_svc = FALSE;
2409 
2410 	ASSERT(MUTEX_NOT_HELD(&mir->mir_mutex));
2411 	switch (mp->b_datap->db_type) {
2412 	case M_IOCTL:
2413 		iocp = (struct iocblk *)rptr;
2414 		switch (iocp->ioc_cmd) {
2415 		case RPC_CLIENT:
2416 			mutex_enter(&mir->mir_mutex);
2417 			if (mir->mir_type != 0 &&
2418 			    mir->mir_type != iocp->ioc_cmd) {
2419 ioc_eperm:
2420 				mutex_exit(&mir->mir_mutex);
2421 				iocp->ioc_error = EPERM;
2422 				iocp->ioc_count = 0;
2423 				mp->b_datap->db_type = M_IOCACK;
2424 				qreply(q, mp);
2425 				return;
2426 			}
2427 
2428 			mir->mir_type = iocp->ioc_cmd;
2429 
2430 			/*
2431 			 * Clear mir_hold_inbound which was set to 1 by
2432 			 * mir_open.  This flag is not used on client
2433 			 * streams.
2434 			 */
2435 			mir->mir_hold_inbound = 0;
2436 			mir->mir_max_msg_sizep = &clnt_max_msg_size;
2437 
2438 			/*
2439 			 * Start the idle timer.  See mir_timer() for more
2440 			 * information on how client timers work.
2441 			 */
2442 			mir->mir_idle_timeout = clnt_idle_timeout;
2443 			mir_clnt_idle_start(q, mir);
2444 			mutex_exit(&mir->mir_mutex);
2445 
2446 			mp->b_datap->db_type = M_IOCACK;
2447 			qreply(q, mp);
2448 			return;
2449 		case RPC_SERVER:
2450 			mutex_enter(&mir->mir_mutex);
2451 			if (mir->mir_type != 0 &&
2452 			    mir->mir_type != iocp->ioc_cmd)
2453 				goto ioc_eperm;
2454 
2455 			/*
2456 			 * We don't clear mir_hold_inbound here because
2457 			 * mir_hold_inbound is used in the flow control
2458 			 * model. If we cleared it here, then we'd commit
2459 			 * a small violation to the model where the transport
2460 			 * might immediately block downstream flow.
2461 			 */
2462 
2463 			mir->mir_type = iocp->ioc_cmd;
2464 			mir->mir_max_msg_sizep = &svc_max_msg_size;
2465 
2466 			/*
2467 			 * Start the idle timer.  See mir_timer() for more
2468 			 * information on how server timers work.
2469 			 *
2470 			 * Note that it is important to start the idle timer
2471 			 * here so that connections time out even if we
2472 			 * never receive any data on them.
2473 			 */
2474 			mir->mir_idle_timeout = svc_idle_timeout;
2475 			RPCLOG(16, "mir_wput_other starting idle timer on 0x%p "
2476 			    "because we got RPC_SERVER ioctl\n", (void *)q);
2477 			mir_svc_idle_start(q, mir);
2478 			mutex_exit(&mir->mir_mutex);
2479 
2480 			mp->b_datap->db_type = M_IOCACK;
2481 			qreply(q, mp);
2482 			return;
2483 		default:
2484 			break;
2485 		}
2486 		break;
2487 
2488 	case M_PROTO:
2489 		if (mir->mir_type == RPC_CLIENT) {
2490 			/*
2491 			 * We are likely being called from the context of a
2492 			 * service procedure. So we need to enqueue. However
2493 			 * enqueing may put our message behind data messages.
2494 			 * So flush the data first.
2495 			 */
2496 			flush_in_svc = TRUE;
2497 		}
2498 		if ((mp->b_wptr - rptr) < sizeof (uint32_t) ||
2499 		    !IS_P2ALIGNED(rptr, sizeof (uint32_t)))
2500 			break;
2501 
2502 		switch (((union T_primitives *)rptr)->type) {
2503 		case T_DATA_REQ:
2504 			/* Don't pass T_DATA_REQ messages downstream. */
2505 			freemsg(mp);
2506 			return;
2507 		case T_ORDREL_REQ:
2508 			RPCLOG(8, "mir_wput_other wq 0x%p: got T_ORDREL_REQ\n",
2509 			    (void *)q);
2510 			mutex_enter(&mir->mir_mutex);
2511 			if (mir->mir_type != RPC_SERVER) {
2512 				/*
2513 				 * We are likely being called from
2514 				 * clnt_dispatch_notifyall(). Sending
2515 				 * a T_ORDREL_REQ will result in
2516 				 * a some kind of _IND message being sent,
2517 				 * will be another call to
2518 				 * clnt_dispatch_notifyall(). To keep the stack
2519 				 * lean, queue this message.
2520 				 */
2521 				mir->mir_inwservice = 1;
2522 				(void) putq(q, mp);
2523 				mutex_exit(&mir->mir_mutex);
2524 				return;
2525 			}
2526 
2527 			/*
2528 			 * Mark the structure such that we don't accept any
2529 			 * more requests from client. We could defer this
2530 			 * until we actually send the orderly release
2531 			 * request downstream, but all that does is delay
2532 			 * the closing of this stream.
2533 			 */
2534 			RPCLOG(16, "mir_wput_other wq 0x%p: got T_ORDREL_REQ "
2535 			    " so calling mir_svc_start_close\n", (void *)q);
2536 
2537 			mir_svc_start_close(q, mir);
2538 
2539 			/*
2540 			 * If we have sent down a T_ORDREL_REQ, don't send
2541 			 * any more.
2542 			 */
2543 			if (mir->mir_ordrel_pending) {
2544 				freemsg(mp);
2545 				mutex_exit(&mir->mir_mutex);
2546 				return;
2547 			}
2548 
2549 			/*
2550 			 * If the stream is not idle, then we hold the
2551 			 * orderly release until it becomes idle.  This
2552 			 * ensures that kRPC will be able to reply to
2553 			 * all requests that we have passed to it.
2554 			 *
2555 			 * We also queue the request if there is data already
2556 			 * queued, because we cannot allow the T_ORDREL_REQ
2557 			 * to go before data. When we had a separate reply
2558 			 * count, this was not a problem, because the
2559 			 * reply count was reconciled when mir_wsrv()
2560 			 * completed.
2561 			 */
2562 			if (!MIR_SVC_QUIESCED(mir) ||
2563 			    mir->mir_inwservice == 1) {
2564 				mir->mir_inwservice = 1;
2565 				(void) putq(q, mp);
2566 
2567 				RPCLOG(16, "mir_wput_other: queuing "
2568 				    "T_ORDREL_REQ on 0x%p\n", (void *)q);
2569 
2570 				mutex_exit(&mir->mir_mutex);
2571 				return;
2572 			}
2573 
2574 			/*
2575 			 * Mark the structure so that we know we sent
2576 			 * an orderly release request, and reset the idle timer.
2577 			 */
2578 			mir->mir_ordrel_pending = 1;
2579 
2580 			RPCLOG(16, "mir_wput_other: calling mir_svc_idle_start"
2581 			    " on 0x%p because we got T_ORDREL_REQ\n",
2582 			    (void *)q);
2583 
2584 			mir_svc_idle_start(q, mir);
2585 			mutex_exit(&mir->mir_mutex);
2586 
2587 			/*
2588 			 * When we break, we will putnext the T_ORDREL_REQ.
2589 			 */
2590 			break;
2591 
2592 		case T_CONN_REQ:
2593 			mutex_enter(&mir->mir_mutex);
2594 			if (mir->mir_head_mp != NULL) {
2595 				freemsg(mir->mir_head_mp);
2596 				mir->mir_head_mp = NULL;
2597 				mir->mir_tail_mp = NULL;
2598 			}
2599 			mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2600 			/*
2601 			 * Restart timer in case mir_clnt_idle_do_stop() was
2602 			 * called.
2603 			 */
2604 			mir->mir_idle_timeout = clnt_idle_timeout;
2605 			mir_clnt_idle_stop(q, mir);
2606 			mir_clnt_idle_start(q, mir);
2607 			mutex_exit(&mir->mir_mutex);
2608 			break;
2609 
2610 		default:
2611 			/*
2612 			 * T_DISCON_REQ is one of the interesting default
2613 			 * cases here. Ideally, an M_FLUSH is done before
2614 			 * T_DISCON_REQ is done. However, that is somewhat
2615 			 * cumbersome for clnt_cots.c to do. So we queue
2616 			 * T_DISCON_REQ, and let the service procedure
2617 			 * flush all M_DATA.
2618 			 */
2619 			break;
2620 		}
2621 		/* FALLTHROUGH */
2622 	default:
2623 		if (mp->b_datap->db_type >= QPCTL) {
2624 			if (mp->b_datap->db_type == M_FLUSH) {
2625 				if (mir->mir_type == RPC_CLIENT &&
2626 				    *mp->b_rptr & FLUSHW) {
2627 					RPCLOG(32, "mir_wput_other: flushing "
2628 					    "wq 0x%p\n", (void *)q);
2629 					if (*mp->b_rptr & FLUSHBAND) {
2630 						flushband(q, *(mp->b_rptr + 1),
2631 						    FLUSHDATA);
2632 					} else {
2633 						flushq(q, FLUSHDATA);
2634 					}
2635 				} else {
2636 					RPCLOG(32, "mir_wput_other: ignoring "
2637 					    "M_FLUSH on wq 0x%p\n", (void *)q);
2638 				}
2639 			}
2640 			break;
2641 		}
2642 
2643 		mutex_enter(&mir->mir_mutex);
2644 		if (mir->mir_inwservice == 0 && MIR_WCANPUTNEXT(mir, q)) {
2645 			mutex_exit(&mir->mir_mutex);
2646 			break;
2647 		}
2648 		mir->mir_inwservice = 1;
2649 		mir->mir_inwflushdata = flush_in_svc;
2650 		(void) putq(q, mp);
2651 		mutex_exit(&mir->mir_mutex);
2652 		qenable(q);
2653 
2654 		return;
2655 	}
2656 	putnext(q, mp);
2657 }
2658 
2659 static void
mir_wsrv(queue_t * q)2660 mir_wsrv(queue_t *q)
2661 {
2662 	mblk_t	*mp;
2663 	mir_t	*mir;
2664 	bool_t flushdata;
2665 
2666 	mir = (mir_t *)q->q_ptr;
2667 	mutex_enter(&mir->mir_mutex);
2668 
2669 	flushdata = mir->mir_inwflushdata;
2670 	mir->mir_inwflushdata = 0;
2671 
2672 	while (mp = getq(q)) {
2673 		if (mp->b_datap->db_type == M_DATA) {
2674 			/*
2675 			 * Do not send any more data if we have sent
2676 			 * a T_ORDREL_REQ.
2677 			 */
2678 			if (flushdata || mir->mir_ordrel_pending == 1) {
2679 				freemsg(mp);
2680 				continue;
2681 			}
2682 
2683 			/*
2684 			 * Make sure that the stream can really handle more
2685 			 * data.
2686 			 */
2687 			if (!MIR_WCANPUTNEXT(mir, q)) {
2688 				(void) putbq(q, mp);
2689 				mutex_exit(&mir->mir_mutex);
2690 				return;
2691 			}
2692 
2693 			/*
2694 			 * Now we pass the RPC message downstream.
2695 			 */
2696 			mutex_exit(&mir->mir_mutex);
2697 			putnext(q, mp);
2698 			mutex_enter(&mir->mir_mutex);
2699 			continue;
2700 		}
2701 
2702 		/*
2703 		 * This is not an RPC message, pass it downstream
2704 		 * (ignoring flow control) if the server side is not sending a
2705 		 * T_ORDREL_REQ downstream.
2706 		 */
2707 		if (mir->mir_type != RPC_SERVER ||
2708 		    ((union T_primitives *)mp->b_rptr)->type !=
2709 		    T_ORDREL_REQ) {
2710 			mutex_exit(&mir->mir_mutex);
2711 			putnext(q, mp);
2712 			mutex_enter(&mir->mir_mutex);
2713 			continue;
2714 		}
2715 
2716 		if (mir->mir_ordrel_pending == 1) {
2717 			/*
2718 			 * Don't send two T_ORDRELs
2719 			 */
2720 			freemsg(mp);
2721 			continue;
2722 		}
2723 
2724 		/*
2725 		 * Mark the structure so that we know we sent an orderly
2726 		 * release request.  We will check to see slot is idle at the
2727 		 * end of this routine, and if so, reset the idle timer to
2728 		 * handle orderly release timeouts.
2729 		 */
2730 		mir->mir_ordrel_pending = 1;
2731 		RPCLOG(16, "mir_wsrv: sending ordrel req on q 0x%p\n",
2732 		    (void *)q);
2733 		/*
2734 		 * Send the orderly release downstream. If there are other
2735 		 * pending replies we won't be able to send them.  However,
2736 		 * the only reason we should send the orderly release is if
2737 		 * we were idle, or if an unusual event occurred.
2738 		 */
2739 		mutex_exit(&mir->mir_mutex);
2740 		putnext(q, mp);
2741 		mutex_enter(&mir->mir_mutex);
2742 	}
2743 
2744 	if (q->q_first == NULL)
2745 		/*
2746 		 * If we call mir_svc_idle_start() below, then
2747 		 * clearing mir_inwservice here will also result in
2748 		 * any thread waiting in mir_close() to be signaled.
2749 		 */
2750 		mir->mir_inwservice = 0;
2751 
2752 	if (mir->mir_type != RPC_SERVER) {
2753 		mutex_exit(&mir->mir_mutex);
2754 		return;
2755 	}
2756 
2757 	/*
2758 	 * If idle we call mir_svc_idle_start to start the timer (or wakeup
2759 	 * a close). Also make sure not to start the idle timer on the
2760 	 * listener stream. This can cause nfsd to send an orderly release
2761 	 * command on the listener stream.
2762 	 */
2763 	if (MIR_SVC_QUIESCED(mir) && !(mir->mir_listen_stream)) {
2764 		RPCLOG(16, "mir_wsrv: calling mir_svc_idle_start on 0x%p "
2765 		    "because mir slot is idle\n", (void *)q);
2766 		mir_svc_idle_start(q, mir);
2767 	}
2768 
2769 	/*
2770 	 * If outbound flow control has been relieved, then allow new
2771 	 * inbound requests to be processed.
2772 	 */
2773 	if (mir->mir_hold_inbound) {
2774 		mir->mir_hold_inbound = 0;
2775 		qenable(RD(q));
2776 	}
2777 	mutex_exit(&mir->mir_mutex);
2778 }
2779 
2780 static void
mir_disconnect(queue_t * q,mir_t * mir)2781 mir_disconnect(queue_t *q, mir_t *mir)
2782 {
2783 	ASSERT(MUTEX_HELD(&mir->mir_mutex));
2784 
2785 	switch (mir->mir_type) {
2786 	case RPC_CLIENT:
2787 		/*
2788 		 * We are disconnecting, but not necessarily
2789 		 * closing. By not closing, we will fail to
2790 		 * pick up a possibly changed global timeout value,
2791 		 * unless we store it now.
2792 		 */
2793 		mir->mir_idle_timeout = clnt_idle_timeout;
2794 		mir_clnt_idle_start(WR(q), mir);
2795 		mutex_exit(&mir->mir_mutex);
2796 
2797 		/*
2798 		 * T_DISCON_REQ is passed to kRPC as an integer value
2799 		 * (this is not a TPI message).  It is used as a
2800 		 * convenient value to indicate a sanity check
2801 		 * failure -- the same kRPC routine is also called
2802 		 * for T_DISCON_INDs and T_ORDREL_INDs.
2803 		 */
2804 		clnt_dispatch_notifyall(WR(q), T_DISCON_REQ, 0);
2805 		break;
2806 
2807 	case RPC_SERVER:
2808 		mir->mir_svc_no_more_msgs = 1;
2809 		mir_svc_idle_stop(WR(q), mir);
2810 		mutex_exit(&mir->mir_mutex);
2811 		RPCLOG(16, "mir_disconnect: telling "
2812 		    "stream head listener to disconnect stream "
2813 		    "(0x%p)\n", (void *) q);
2814 		(void) mir_svc_policy_notify(q, 2);
2815 		break;
2816 
2817 	default:
2818 		mutex_exit(&mir->mir_mutex);
2819 		break;
2820 	}
2821 }
2822 
2823 /*
2824  * Sanity check the message length, and if it's too large, shutdown the
2825  * connection.  Returns 1 if the connection is shutdown; 0 otherwise.
2826  */
2827 static int
mir_check_len(queue_t * q,mblk_t * head_mp)2828 mir_check_len(queue_t *q, mblk_t *head_mp)
2829 {
2830 	mir_t *mir = q->q_ptr;
2831 	uint_t maxsize = 0;
2832 	size_t msg_len = msgdsize(head_mp);
2833 
2834 	if (mir->mir_max_msg_sizep != NULL)
2835 		maxsize = *mir->mir_max_msg_sizep;
2836 
2837 	if (maxsize == 0 || msg_len <= maxsize)
2838 		return (0);
2839 
2840 	freemsg(head_mp);
2841 	mir->mir_head_mp = NULL;
2842 	mir->mir_tail_mp = NULL;
2843 	mir->mir_frag_header = 0;
2844 	mir->mir_frag_len = -(int32_t)sizeof (uint32_t);
2845 	if (mir->mir_type != RPC_SERVER || mir->mir_setup_complete) {
2846 		cmn_err(CE_NOTE,
2847 		    "kRPC: record fragment from %s of size(%lu) exceeds "
2848 		    "maximum (%u). Disconnecting",
2849 		    (mir->mir_type == RPC_CLIENT) ? "server" :
2850 		    (mir->mir_type == RPC_SERVER) ? "client" :
2851 		    "test tool", msg_len, maxsize);
2852 	}
2853 
2854 	mir_disconnect(q, mir);
2855 	return (1);
2856 }
2857