xref: /titanic_52/usr/src/uts/common/os/streamio.c (revision e11f6fbcfd838459080e675d24788eda4783c1d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/signal.h>
38 #include <sys/stat.h>
39 #include <sys/proc.h>
40 #include <sys/cred.h>
41 #include <sys/user.h>
42 #include <sys/vnode.h>
43 #include <sys/file.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/stropts.h>
47 #include <sys/tihdr.h>
48 #include <sys/var.h>
49 #include <sys/poll.h>
50 #include <sys/termio.h>
51 #include <sys/ttold.h>
52 #include <sys/systm.h>
53 #include <sys/uio.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sad.h>
56 #include <sys/priocntl.h>
57 #include <sys/jioctl.h>
58 #include <sys/procset.h>
59 #include <sys/session.h>
60 #include <sys/kmem.h>
61 #include <sys/filio.h>
62 #include <sys/vtrace.h>
63 #include <sys/debug.h>
64 #include <sys/strredir.h>
65 #include <sys/fs/fifonode.h>
66 #include <sys/fs/snode.h>
67 #include <sys/strlog.h>
68 #include <sys/strsun.h>
69 #include <sys/project.h>
70 #include <sys/kbio.h>
71 #include <sys/msio.h>
72 #include <sys/tty.h>
73 #include <sys/ptyvar.h>
74 #include <sys/vuid_event.h>
75 #include <sys/modctl.h>
76 #include <sys/sunddi.h>
77 #include <sys/sunldi_impl.h>
78 #include <sys/autoconf.h>
79 #include <sys/policy.h>
80 
81 /*
82  * what is mblk_pull_len?
83  *
84  * If a streams message consists of many short messages,
85  * a performance degradation occurs from copyout overhead.
86  * To decrease the per mblk overhead, messages that are
87  * likely to consist of many small mblks are pulled up into
88  * one continuous chunk of memory.
89  *
90  * To avoid the processing overhead of examining every
91  * mblk, a quick heuristic is used. If the first mblk in
92  * the message is shorter than mblk_pull_len, it is likely
93  * that the rest of the mblk will be short.
94  *
95  * This heuristic was decided upon after performance tests
96  * indicated that anything more complex slowed down the main
97  * code path.
98  */
99 #define	MBLK_PULL_LEN 64
100 uint32_t mblk_pull_len = MBLK_PULL_LEN;
101 
102 /*
103  * The sgttyb_handling flag controls the handling of the old BSD
104  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
105  *
106  * 0 - Emit no warnings at all and retain old, broken behavior.
107  * 1 - Emit no warnings and silently handle new semantics.
108  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
109  *     (once per system invocation).  Handle with new semantics.
110  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
111  *     made (so that offenders drop core and are easy to debug).
112  *
113  * The "new semantics" are that TIOCGETP returns B38400 for
114  * sg_[io]speed if the corresponding value is over B38400, and that
115  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
116  * bit rate."
117  */
118 int sgttyb_handling = 1;
119 static boolean_t sgttyb_complaint;
120 
121 /* don't push drcompat module by default on Style-2 streams */
122 static int push_drcompat = 0;
123 
124 /*
125  * id value used to distinguish between different ioctl messages
126  */
127 static uint32_t ioc_id;
128 
129 static void putback(struct stdata *, queue_t *, mblk_t *, int);
130 static void strcleanall(struct vnode *);
131 static int strwsrv(queue_t *);
132 
133 /*
134  * qinit and module_info structures for stream head read and write queues
135  */
136 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
137 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
138 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
139 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
140 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
141     FIFOLOWAT };
142 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
143 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
144 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
145 
146 extern kmutex_t	strresources;	/* protects global resources */
147 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
148 kmutex_t sad_lock;		/* protects sad drivers autopush */
149 
150 static boolean_t msghasdata(mblk_t *bp);
151 #define	msgnodata(bp) (!msghasdata(bp))
152 
153 /*
154  * Stream head locking notes:
155  *	There are four monitors associated with the stream head:
156  *	1. v_stream monitor: in stropen() and strclose() v_lock
157  *		is held while the association of vnode and stream
158  *		head is established or tested for.
159  *	2. open/close/push/pop monitor: sd_lock is held while each
160  *		thread bids for exclusive access to this monitor
161  *		for opening or closing a stream.  In addition, this
162  *		monitor is entered during pushes and pops.  This
163  *		guarantees that during plumbing operations there
164  *		is only one thread trying to change the plumbing.
165  *		Any other threads present in the stream are only
166  *		using the plumbing.
167  *	3. read/write monitor: in the case of read, a thread holds
168  *		sd_lock while trying to get data from the stream
169  *		head queue.  if there is none to fulfill a read
170  *		request, it sets RSLEEP and calls cv_wait_sig() down
171  *		in strwaitq() to await the arrival of new data.
172  *		when new data arrives in strrput(), sd_lock is acquired
173  *		before testing for RSLEEP and calling cv_broadcast().
174  *		the behavior of strwrite(), strwsrv(), and WSLEEP
175  *		mirror this.
176  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
177  *		thread is doing an ioctl at a time.
178  */
179 
180 static int
181 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
182     int anchor, cred_t *crp)
183 {
184 	int error;
185 	fmodsw_impl_t *fp;
186 
187 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
188 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
189 		return (error);
190 	}
191 	if (stp->sd_pushcnt >= nstrpush) {
192 		return (EINVAL);
193 	}
194 
195 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
196 		stp->sd_flag |= STREOPENFAIL;
197 		return (EINVAL);
198 	}
199 
200 	/*
201 	 * push new module and call its open routine via qattach
202 	 */
203 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
204 		return (error);
205 
206 	/*
207 	 * Check to see if caller wants a STREAMS anchor
208 	 * put at this place in the stream, and add if so.
209 	 */
210 	mutex_enter(&stp->sd_lock);
211 	if (anchor == stp->sd_pushcnt)
212 		stp->sd_anchor = stp->sd_pushcnt;
213 	mutex_exit(&stp->sd_lock);
214 
215 	return (0);
216 }
217 
218 /*
219  * Open a stream device.
220  */
221 int
222 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
223 {
224 	struct stdata *stp;
225 	queue_t *qp;
226 	int s;
227 	dev_t dummydev;
228 	struct autopush *ap;
229 	int error = 0;
230 	ssize_t	rmin, rmax;
231 	int cloneopen;
232 	queue_t *brq;
233 	major_t major;
234 
235 #ifdef C2_AUDIT
236 	if (audit_active)
237 		audit_stropen(vp, devp, flag, crp);
238 #endif
239 
240 	/*
241 	 * If the stream already exists, wait for any open in progress
242 	 * to complete, then call the open function of each module and
243 	 * driver in the stream.  Otherwise create the stream.
244 	 */
245 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
246 retry:
247 	mutex_enter(&vp->v_lock);
248 	if ((stp = vp->v_stream) != NULL) {
249 
250 		/*
251 		 * Waiting for stream to be created to device
252 		 * due to another open.
253 		 */
254 	    mutex_exit(&vp->v_lock);
255 
256 	    if (STRMATED(stp)) {
257 		struct stdata *strmatep = stp->sd_mate;
258 
259 		STRLOCKMATES(stp);
260 		if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
261 			if (flag & (FNDELAY|FNONBLOCK)) {
262 				error = EAGAIN;
263 				mutex_exit(&strmatep->sd_lock);
264 				goto ckreturn;
265 			}
266 			mutex_exit(&stp->sd_lock);
267 			if (!cv_wait_sig(&strmatep->sd_monitor,
268 			    &strmatep->sd_lock)) {
269 				error = EINTR;
270 				mutex_exit(&strmatep->sd_lock);
271 				mutex_enter(&stp->sd_lock);
272 				goto ckreturn;
273 			}
274 			mutex_exit(&strmatep->sd_lock);
275 			goto retry;
276 		}
277 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
278 			if (flag & (FNDELAY|FNONBLOCK)) {
279 				error = EAGAIN;
280 				mutex_exit(&strmatep->sd_lock);
281 				goto ckreturn;
282 			}
283 			mutex_exit(&strmatep->sd_lock);
284 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
285 				error = EINTR;
286 				goto ckreturn;
287 			}
288 			mutex_exit(&stp->sd_lock);
289 			goto retry;
290 		}
291 
292 		if (stp->sd_flag & (STRDERR|STWRERR)) {
293 			error = EIO;
294 			mutex_exit(&strmatep->sd_lock);
295 			goto ckreturn;
296 		}
297 
298 		stp->sd_flag |= STWOPEN;
299 		STRUNLOCKMATES(stp);
300 	    } else {
301 		mutex_enter(&stp->sd_lock);
302 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
303 			if (flag & (FNDELAY|FNONBLOCK)) {
304 				error = EAGAIN;
305 				goto ckreturn;
306 			}
307 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
308 				error = EINTR;
309 				goto ckreturn;
310 			}
311 			mutex_exit(&stp->sd_lock);
312 			goto retry;  /* could be clone! */
313 		}
314 
315 		if (stp->sd_flag & (STRDERR|STWRERR)) {
316 			error = EIO;
317 			goto ckreturn;
318 		}
319 
320 		stp->sd_flag |= STWOPEN;
321 		mutex_exit(&stp->sd_lock);
322 	    }
323 
324 		/*
325 		 * Open all modules and devices down stream to notify
326 		 * that another user is streaming.  For modules, set the
327 		 * last argument to MODOPEN and do not pass any open flags.
328 		 * Ignore dummydev since this is not the first open.
329 		 */
330 	    claimstr(stp->sd_wrq);
331 	    qp = stp->sd_wrq;
332 	    while (_SAMESTR(qp)) {
333 		qp = qp->q_next;
334 		if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
335 			break;
336 	    }
337 	    releasestr(stp->sd_wrq);
338 	    mutex_enter(&stp->sd_lock);
339 	    stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
340 	    stp->sd_rerror = 0;
341 	    stp->sd_werror = 0;
342 ckreturn:
343 	    cv_broadcast(&stp->sd_monitor);
344 	    mutex_exit(&stp->sd_lock);
345 	    return (error);
346 	}
347 
348 	/*
349 	 * This vnode isn't streaming.  SPECFS already
350 	 * checked for multiple vnodes pointing to the
351 	 * same stream, so create a stream to the driver.
352 	 */
353 	qp = allocq();
354 	stp = shalloc(qp);
355 
356 	/*
357 	 * Initialize stream head.  shalloc() has given us
358 	 * exclusive access, and we have the vnode locked;
359 	 * we can do whatever we want with stp.
360 	 */
361 	stp->sd_flag = STWOPEN;
362 	stp->sd_siglist = NULL;
363 	stp->sd_pollist.ph_list = NULL;
364 	stp->sd_sigflags = 0;
365 	stp->sd_mark = NULL;
366 	stp->sd_closetime = STRTIMOUT;
367 	stp->sd_sidp = NULL;
368 	stp->sd_pgidp = NULL;
369 	stp->sd_vnode = vp;
370 	stp->sd_rerror = 0;
371 	stp->sd_werror = 0;
372 	stp->sd_wroff = 0;
373 	stp->sd_iocblk = NULL;
374 	stp->sd_pushcnt = 0;
375 	stp->sd_qn_minpsz = 0;
376 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
377 	stp->sd_maxblk = INFPSZ;
378 	qp->q_ptr = _WR(qp)->q_ptr = stp;
379 	STREAM(qp) = STREAM(_WR(qp)) = stp;
380 	vp->v_stream = stp;
381 	mutex_exit(&vp->v_lock);
382 	if (vp->v_type == VFIFO) {
383 		stp->sd_flag |= OLDNDELAY;
384 		/*
385 		 * This means, both for pipes and fifos
386 		 * strwrite will send SIGPIPE if the other
387 		 * end is closed. For putmsg it depends
388 		 * on whether it is a XPG4_2 application
389 		 * or not
390 		 */
391 		stp->sd_wput_opt = SW_SIGPIPE;
392 
393 		/* setq might sleep in kmem_alloc - avoid holding locks. */
394 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
395 		    SQ_CI|SQ_CO, B_FALSE);
396 
397 		set_qend(qp);
398 		stp->sd_strtab = fifo_getinfo();
399 		_WR(qp)->q_nfsrv = _WR(qp);
400 		qp->q_nfsrv = qp;
401 		/*
402 		 * Wake up others that are waiting for stream to be created.
403 		 */
404 		mutex_enter(&stp->sd_lock);
405 		/*
406 		 * nothing is be pushed on stream yet, so
407 		 * optimized stream head packetsizes are just that
408 		 * of the read queue
409 		 */
410 		stp->sd_qn_minpsz = qp->q_minpsz;
411 		stp->sd_qn_maxpsz = qp->q_maxpsz;
412 		stp->sd_flag &= ~STWOPEN;
413 		goto fifo_opendone;
414 	}
415 	/* setq might sleep in kmem_alloc - avoid holding locks. */
416 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
417 
418 	set_qend(qp);
419 
420 	/*
421 	 * Open driver and create stream to it (via qattach).
422 	 */
423 	cloneopen = (getmajor(*devp) == clone_major);
424 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
425 		mutex_enter(&vp->v_lock);
426 		vp->v_stream = NULL;
427 		mutex_exit(&vp->v_lock);
428 		mutex_enter(&stp->sd_lock);
429 		cv_broadcast(&stp->sd_monitor);
430 		mutex_exit(&stp->sd_lock);
431 		freeq(_RD(qp));
432 		shfree(stp);
433 		return (error);
434 	}
435 	/*
436 	 * Set sd_strtab after open in order to handle clonable drivers
437 	 */
438 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
439 
440 	/*
441 	 * Historical note: dummydev used to be be prior to the initial
442 	 * open (via qattach above), which made the value seen
443 	 * inconsistent between an I_PUSH and an autopush of a module.
444 	 */
445 	dummydev = *devp;
446 
447 	/*
448 	 * For clone open of old style (Q not associated) network driver,
449 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
450 	 */
451 	brq = _RD(_WR(qp)->q_next);
452 	major = getmajor(*devp);
453 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
454 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
455 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp) != 0)
456 			cmn_err(CE_WARN, "cannot push " DRMODNAME
457 			    " streams module");
458 	}
459 
460 	/*
461 	 * check for autopush
462 	 */
463 	mutex_enter(&sad_lock);
464 	ap = strphash(getemajor(*devp));
465 #define	DEVT(ap)	makedevice(ap->ap_major, ap->ap_minor)
466 #define	DEVLT(ap)	makedevice(ap->ap_major, ap->ap_lastminor)
467 
468 	while (ap) {
469 		if (ap->ap_major == (getemajor(*devp))) {
470 			if (ap->ap_type == SAP_ALL)
471 				break;
472 			else if ((ap->ap_type == SAP_ONE) &&
473 			    (getminor(DEVT(ap)) == getminor(*devp)))
474 				break;
475 			else if (ap->ap_type == SAP_RANGE &&
476 			    getminor(*devp) >= getminor(DEVT(ap)) &&
477 			    getminor(*devp) <= getminor(DEVLT(ap)))
478 				break;
479 		}
480 		ap = ap->ap_nextp;
481 	}
482 	if (ap == NULL) {
483 		mutex_exit(&sad_lock);
484 		goto opendone;
485 	}
486 	ap->ap_cnt++;
487 	mutex_exit(&sad_lock);
488 	for (s = 0; s < ap->ap_npush; s++) {
489 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
490 		    ap->ap_anchor, crp);
491 		if (error != 0)
492 			break;
493 	}
494 	mutex_enter(&sad_lock);
495 	if (--(ap->ap_cnt) <= 0)
496 		ap_free(ap);
497 	mutex_exit(&sad_lock);
498 
499 	/*
500 	 * let specfs know that open failed part way through
501 	 */
502 
503 	if (error) {
504 		mutex_enter(&stp->sd_lock);
505 		stp->sd_flag |= STREOPENFAIL;
506 		mutex_exit(&stp->sd_lock);
507 	}
508 
509 opendone:
510 
511 	/*
512 	 * Wake up others that are waiting for stream to be created.
513 	 */
514 	mutex_enter(&stp->sd_lock);
515 	stp->sd_flag &= ~STWOPEN;
516 
517 	/*
518 	 * As a performance concern we are caching the values of
519 	 * q_minpsz and q_maxpsz of the module below the stream
520 	 * head in the stream head.
521 	 */
522 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
523 	rmin = stp->sd_wrq->q_next->q_minpsz;
524 	rmax = stp->sd_wrq->q_next->q_maxpsz;
525 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
526 
527 	/* do this processing here as a performance concern */
528 	if (strmsgsz != 0) {
529 		if (rmax == INFPSZ)
530 			rmax = strmsgsz;
531 		else
532 			rmax = MIN(strmsgsz, rmax);
533 	}
534 
535 	mutex_enter(QLOCK(stp->sd_wrq));
536 	stp->sd_qn_minpsz = rmin;
537 	stp->sd_qn_maxpsz = rmax;
538 	mutex_exit(QLOCK(stp->sd_wrq));
539 
540 fifo_opendone:
541 	cv_broadcast(&stp->sd_monitor);
542 	mutex_exit(&stp->sd_lock);
543 	return (error);
544 }
545 
546 static int strsink(queue_t *, mblk_t *);
547 static struct qinit deadrend = {
548 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
549 };
550 static struct qinit deadwend = {
551 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
552 };
553 
554 /*
555  * Close a stream.
556  * This is called from closef() on the last close of an open stream.
557  * Strclean() will already have removed the siglist and pollist
558  * information, so all that remains is to remove all multiplexor links
559  * for the stream, pop all the modules (and the driver), and free the
560  * stream structure.
561  */
562 
563 int
564 strclose(struct vnode *vp, int flag, cred_t *crp)
565 {
566 	struct stdata *stp;
567 	queue_t *qp;
568 	int rval;
569 	int freestp = 1;
570 	queue_t *rmq;
571 
572 #ifdef C2_AUDIT
573 	if (audit_active)
574 		audit_strclose(vp, flag, crp);
575 #endif
576 
577 	TRACE_1(TR_FAC_STREAMS_FR,
578 		TR_STRCLOSE, "strclose:%p", vp);
579 	ASSERT(vp->v_stream);
580 
581 	stp = vp->v_stream;
582 	ASSERT(!(stp->sd_flag & STPLEX));
583 	qp = stp->sd_wrq;
584 
585 	/*
586 	 * Needed so that strpoll will return non-zero for this fd.
587 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
588 	 */
589 	mutex_enter(&stp->sd_lock);
590 	stp->sd_flag |= STRHUP;
591 	mutex_exit(&stp->sd_lock);
592 
593 	/*
594 	 * Since we call pollwakeup in close() now, the poll list should
595 	 * be empty in most cases. The only exception is the layered devices
596 	 * (e.g. the console drivers with redirection modules pushed on top
597 	 * of it).
598 	 */
599 	if (stp->sd_pollist.ph_list != NULL) {
600 		pollwakeup(&stp->sd_pollist, POLLERR);
601 		pollhead_clean(&stp->sd_pollist);
602 	}
603 	ASSERT(stp->sd_pollist.ph_list == NULL);
604 	ASSERT(stp->sd_sidp == NULL);
605 	ASSERT(stp->sd_pgidp == NULL);
606 
607 	/*
608 	 * If the registered process or process group did not have an
609 	 * open instance of this stream then strclean would not be
610 	 * called. Thus at the time of closing all remaining siglist entries
611 	 * are removed.
612 	 */
613 	if (stp->sd_siglist != NULL)
614 		strcleanall(vp);
615 
616 	ASSERT(stp->sd_siglist == NULL);
617 	ASSERT(stp->sd_sigflags == 0);
618 
619 	if (STRMATED(stp)) {
620 		struct stdata *strmatep = stp->sd_mate;
621 		int waited = 1;
622 
623 		STRLOCKMATES(stp);
624 		while (waited) {
625 			waited = 0;
626 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
627 				mutex_exit(&strmatep->sd_lock);
628 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
629 				mutex_exit(&stp->sd_lock);
630 				STRLOCKMATES(stp);
631 				waited = 1;
632 			}
633 			while (strmatep->sd_flag &
634 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
635 				mutex_exit(&stp->sd_lock);
636 				cv_wait(&strmatep->sd_monitor,
637 				    &strmatep->sd_lock);
638 				mutex_exit(&strmatep->sd_lock);
639 				STRLOCKMATES(stp);
640 				waited = 1;
641 			}
642 		}
643 		stp->sd_flag |= STRCLOSE;
644 		STRUNLOCKMATES(stp);
645 	} else {
646 		mutex_enter(&stp->sd_lock);
647 		stp->sd_flag |= STRCLOSE;
648 		mutex_exit(&stp->sd_lock);
649 	}
650 
651 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
652 
653 	/* Check if an I_LINK was ever done on this stream */
654 	if (stp->sd_flag & STRHASLINKS) {
655 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval);
656 	}
657 
658 	while (_SAMESTR(qp)) {
659 		/*
660 		 * Holding sd_lock prevents q_next from changing in
661 		 * this stream.
662 		 */
663 		mutex_enter(&stp->sd_lock);
664 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
665 
666 			/*
667 			 * sleep until awakened by strwsrv() or timeout
668 			 */
669 			for (;;) {
670 				mutex_enter(QLOCK(qp->q_next));
671 				if (!(qp->q_next->q_mblkcnt)) {
672 					mutex_exit(QLOCK(qp->q_next));
673 					break;
674 				}
675 				stp->sd_flag |= WSLEEP;
676 
677 				/* ensure strwsrv gets enabled */
678 				qp->q_next->q_flag |= QWANTW;
679 				mutex_exit(QLOCK(qp->q_next));
680 				/* get out if we timed out or recv'd a signal */
681 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
682 				    stp->sd_closetime, 0) <= 0) {
683 					break;
684 				}
685 			}
686 			stp->sd_flag &= ~WSLEEP;
687 		}
688 		mutex_exit(&stp->sd_lock);
689 
690 		rmq = qp->q_next;
691 		if (rmq->q_flag & QISDRV) {
692 			ASSERT(!_SAMESTR(rmq));
693 			wait_sq_svc(_RD(qp)->q_syncq);
694 		}
695 
696 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
697 	}
698 
699 	/* Prevent qenable from re-enabling the stream head queue */
700 	disable_svc(_RD(qp));
701 
702 	/*
703 	 * Wait until service procedure of each queue is
704 	 * run, if QINSERVICE is set.
705 	 */
706 	wait_svc(_RD(qp));
707 
708 	/*
709 	 * Now, flush both queues.
710 	 */
711 	flushq(_RD(qp), FLUSHALL);
712 	flushq(qp, FLUSHALL);
713 
714 	/*
715 	 * If the write queue of the stream head is pointing to a
716 	 * read queue, we have a twisted stream.  If the read queue
717 	 * is alive, convert the stream head queues into a dead end.
718 	 * If the read queue is dead, free the dead pair.
719 	 */
720 	if (qp->q_next && !_SAMESTR(qp)) {
721 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
722 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
723 			shfree(qp->q_next->q_stream);
724 			freeq(qp->q_next);
725 			freeq(_RD(qp));
726 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
727 			freeq(_RD(qp));
728 		} else {				/* pipe */
729 			freestp = 0;
730 			/*
731 			 * The q_info pointers are never accessed when
732 			 * SQLOCK is held.
733 			 */
734 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
735 			mutex_enter(SQLOCK(qp->q_syncq));
736 			qp->q_qinfo = &deadwend;
737 			_RD(qp)->q_qinfo = &deadrend;
738 			mutex_exit(SQLOCK(qp->q_syncq));
739 		}
740 	} else {
741 		freeq(_RD(qp)); /* free stream head queue pair */
742 	}
743 
744 	mutex_enter(&vp->v_lock);
745 	if (stp->sd_iocblk) {
746 		if (stp->sd_iocblk != (mblk_t *)-1) {
747 			freemsg(stp->sd_iocblk);
748 		}
749 		stp->sd_iocblk = NULL;
750 	}
751 	stp->sd_vnode = NULL;
752 	vp->v_stream = NULL;
753 	mutex_exit(&vp->v_lock);
754 	mutex_enter(&stp->sd_lock);
755 	stp->sd_flag &= ~STRCLOSE;
756 	cv_broadcast(&stp->sd_monitor);
757 	mutex_exit(&stp->sd_lock);
758 
759 	if (freestp)
760 		shfree(stp);
761 	return (0);
762 }
763 
764 static int
765 strsink(queue_t *q, mblk_t *bp)
766 {
767 	struct copyresp *resp;
768 
769 	switch (bp->b_datap->db_type) {
770 	case M_FLUSH:
771 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
772 			*bp->b_rptr &= ~FLUSHR;
773 			bp->b_flag |= MSGNOLOOP;
774 			/*
775 			 * Protect against the driver passing up
776 			 * messages after it has done a qprocsoff.
777 			 */
778 			if (_OTHERQ(q)->q_next == NULL)
779 				freemsg(bp);
780 			else
781 				qreply(q, bp);
782 		} else {
783 			freemsg(bp);
784 		}
785 		break;
786 
787 	case M_COPYIN:
788 	case M_COPYOUT:
789 		if (bp->b_cont) {
790 			freemsg(bp->b_cont);
791 			bp->b_cont = NULL;
792 		}
793 		bp->b_datap->db_type = M_IOCDATA;
794 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
795 		resp = (struct copyresp *)bp->b_rptr;
796 		resp->cp_rval = (caddr_t)1;	/* failure */
797 		/*
798 		 * Protect against the driver passing up
799 		 * messages after it has done a qprocsoff.
800 		 */
801 		if (_OTHERQ(q)->q_next == NULL)
802 			freemsg(bp);
803 		else
804 			qreply(q, bp);
805 		break;
806 
807 	case M_IOCTL:
808 		if (bp->b_cont) {
809 			freemsg(bp->b_cont);
810 			bp->b_cont = NULL;
811 		}
812 		bp->b_datap->db_type = M_IOCNAK;
813 		/*
814 		 * Protect against the driver passing up
815 		 * messages after it has done a qprocsoff.
816 		 */
817 		if (_OTHERQ(q)->q_next == NULL)
818 			freemsg(bp);
819 		else
820 			qreply(q, bp);
821 		break;
822 
823 	default:
824 		freemsg(bp);
825 		break;
826 	}
827 
828 	return (0);
829 }
830 
831 /*
832  * Clean up after a process when it closes a stream.  This is called
833  * from closef for all closes, whereas strclose is called only for the
834  * last close on a stream.  The siglist is scanned for entries for the
835  * current process, and these are removed.
836  */
837 void
838 strclean(struct vnode *vp)
839 {
840 	strsig_t *ssp, *pssp, *tssp;
841 	stdata_t *stp;
842 	int update = 0;
843 
844 	TRACE_1(TR_FAC_STREAMS_FR,
845 		TR_STRCLEAN, "strclean:%p", vp);
846 	stp = vp->v_stream;
847 	pssp = NULL;
848 	mutex_enter(&stp->sd_lock);
849 	ssp = stp->sd_siglist;
850 	while (ssp) {
851 		if (ssp->ss_pidp == curproc->p_pidp) {
852 			tssp = ssp->ss_next;
853 			if (pssp)
854 				pssp->ss_next = tssp;
855 			else
856 				stp->sd_siglist = tssp;
857 			mutex_enter(&pidlock);
858 			PID_RELE(ssp->ss_pidp);
859 			mutex_exit(&pidlock);
860 			kmem_free(ssp, sizeof (strsig_t));
861 			update = 1;
862 			ssp = tssp;
863 		} else {
864 			pssp = ssp;
865 			ssp = ssp->ss_next;
866 		}
867 	}
868 	if (update) {
869 		stp->sd_sigflags = 0;
870 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
871 			stp->sd_sigflags |= ssp->ss_events;
872 	}
873 	mutex_exit(&stp->sd_lock);
874 }
875 
876 /*
877  * Used on the last close to remove any remaining items on the siglist.
878  * These could be present on the siglist due to I_ESETSIG calls that
879  * use process groups or processed that do not have an open file descriptor
880  * for this stream (Such entries would not be removed by strclean).
881  */
882 static void
883 strcleanall(struct vnode *vp)
884 {
885 	strsig_t *ssp, *nssp;
886 	stdata_t *stp;
887 
888 	stp = vp->v_stream;
889 	mutex_enter(&stp->sd_lock);
890 	ssp = stp->sd_siglist;
891 	stp->sd_siglist = NULL;
892 	while (ssp) {
893 		nssp = ssp->ss_next;
894 		mutex_enter(&pidlock);
895 		PID_RELE(ssp->ss_pidp);
896 		mutex_exit(&pidlock);
897 		kmem_free(ssp, sizeof (strsig_t));
898 		ssp = nssp;
899 	}
900 	stp->sd_sigflags = 0;
901 	mutex_exit(&stp->sd_lock);
902 }
903 
904 /*
905  * Retrieve the next message from the logical stream head read queue
906  * using either rwnext (if sync stream) or getq_noenab.
907  * It is the callers responsibility to call qbackenable after
908  * it is finished with the message. The caller should not call
909  * qbackenable until after any putback calls to avoid spurious backenabling.
910  */
911 mblk_t *
912 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
913     int *errorp)
914 {
915 	mblk_t *bp;
916 	int error;
917 
918 	ASSERT(MUTEX_HELD(&stp->sd_lock));
919 	/* Holding sd_lock prevents the read queue from changing  */
920 
921 	if (uiop != NULL && stp->sd_struiordq != NULL &&
922 	    q->q_first == NULL &&
923 	    (!first || (stp->sd_wakeq & RSLEEP))) {
924 		/*
925 		 * Stream supports rwnext() for the read side.
926 		 * If this is the first time we're called by e.g. strread
927 		 * only do the downcall if there is a deferred wakeup
928 		 * (registered in sd_wakeq).
929 		 */
930 		struiod_t uiod;
931 
932 		if (first)
933 			stp->sd_wakeq &= ~RSLEEP;
934 
935 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
936 			sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
937 		uiod.d_mp = 0;
938 		/*
939 		 * Mark that a thread is in rwnext on the read side
940 		 * to prevent strrput from nacking ioctls immediately.
941 		 * When the last concurrent rwnext returns
942 		 * the ioctls are nack'ed.
943 		 */
944 		ASSERT(MUTEX_HELD(&stp->sd_lock));
945 		stp->sd_struiodnak++;
946 		/*
947 		 * Note: rwnext will drop sd_lock.
948 		 */
949 		error = rwnext(q, &uiod);
950 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
951 		mutex_enter(&stp->sd_lock);
952 		stp->sd_struiodnak--;
953 		while (stp->sd_struiodnak == 0 &&
954 		    ((bp = stp->sd_struionak) != NULL)) {
955 			stp->sd_struionak = bp->b_next;
956 			bp->b_next = NULL;
957 			bp->b_datap->db_type = M_IOCNAK;
958 			/*
959 			 * Protect against the driver passing up
960 			 * messages after it has done a qprocsoff.
961 			 */
962 			if (_OTHERQ(q)->q_next == NULL)
963 				freemsg(bp);
964 			else {
965 				mutex_exit(&stp->sd_lock);
966 				qreply(q, bp);
967 				mutex_enter(&stp->sd_lock);
968 			}
969 		}
970 		ASSERT(MUTEX_HELD(&stp->sd_lock));
971 		if (error == 0 || error == EWOULDBLOCK) {
972 			if ((bp = uiod.d_mp) != NULL) {
973 				*errorp = 0;
974 				ASSERT(MUTEX_HELD(&stp->sd_lock));
975 				return (bp);
976 			}
977 			error = 0;
978 		} else if (error == EINVAL) {
979 			/*
980 			 * The stream plumbing must have
981 			 * changed while we were away, so
982 			 * just turn off rwnext()s.
983 			 */
984 			error = 0;
985 		} else if (error == EBUSY) {
986 			/*
987 			 * The module might have data in transit using putnext
988 			 * Fall back on waiting + getq.
989 			 */
990 			error = 0;
991 		} else {
992 			*errorp = error;
993 			ASSERT(MUTEX_HELD(&stp->sd_lock));
994 			return (NULL);
995 		}
996 		/*
997 		 * Try a getq in case a rwnext() generated mblk
998 		 * has bubbled up via strrput().
999 		 */
1000 	}
1001 	*errorp = 0;
1002 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1003 	return (getq_noenab(q));
1004 }
1005 
1006 /*
1007  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1008  * If the message does not fit in the uio the remainder of it is returned;
1009  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1010  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1011  * the error code, the message is consumed, and NULL is returned.
1012  */
1013 static mblk_t *
1014 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1015 {
1016 	int error;
1017 	ptrdiff_t n;
1018 	mblk_t *nbp;
1019 
1020 	ASSERT(bp->b_wptr >= bp->b_rptr);
1021 
1022 	do {
1023 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1024 			ASSERT(n > 0);
1025 
1026 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1027 			if (error != 0) {
1028 				freemsg(bp);
1029 				*errorp = error;
1030 				return (NULL);
1031 			}
1032 		}
1033 
1034 		bp->b_rptr += n;
1035 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1036 			nbp = bp;
1037 			bp = bp->b_cont;
1038 			freeb(nbp);
1039 		}
1040 	} while (bp != NULL && uiop->uio_resid > 0);
1041 
1042 	*errorp = 0;
1043 	return (bp);
1044 }
1045 
1046 /*
1047  * Read a stream according to the mode flags in sd_flag:
1048  *
1049  * (default mode)		- Byte stream, msg boundaries are ignored
1050  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
1051  *				any data remaining in msg
1052  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1053  *				any remaining data on head of read queue
1054  *
1055  * Consume readable messages on the front of the queue until
1056  * ttolwp(curthread)->lwp_count
1057  * is satisfied, the readable messages are exhausted, or a message
1058  * boundary is reached in a message mode.  If no data was read and
1059  * the stream was not opened with the NDELAY flag, block until data arrives.
1060  * Otherwise return the data read and update the count.
1061  *
1062  * In default mode a 0 length message signifies end-of-file and terminates
1063  * a read in progress.  The 0 length message is removed from the queue
1064  * only if it is the only message read (no data is read).
1065  *
1066  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1067  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1068  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1069  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1070  * are unlinked from and M_DATA blocks in the message, the protos are
1071  * thrown away, and the data is read.
1072  */
1073 /* ARGSUSED */
1074 int
1075 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1076 {
1077 	struct stdata *stp;
1078 	mblk_t *bp, *nbp;
1079 	queue_t *q;
1080 	int error = 0;
1081 	uint_t old_sd_flag;
1082 	int first;
1083 	char rflg;
1084 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
1085 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
1086 	short delim;
1087 	unsigned char pri = 0;
1088 	char waitflag;
1089 	unsigned char type;
1090 
1091 	TRACE_1(TR_FAC_STREAMS_FR,
1092 		TR_STRREAD_ENTER, "strread:%p", vp);
1093 	ASSERT(vp->v_stream);
1094 	stp = vp->v_stream;
1095 
1096 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
1097 		if (error = straccess(stp, JCREAD))
1098 			return (error);
1099 
1100 	mutex_enter(&stp->sd_lock);
1101 	if (stp->sd_flag & (STRDERR|STPLEX)) {
1102 		error = strgeterr(stp, STRDERR|STPLEX, 0);
1103 		if (error != 0) {
1104 			mutex_exit(&stp->sd_lock);
1105 			return (error);
1106 		}
1107 	}
1108 
1109 	/*
1110 	 * Loop terminates when uiop->uio_resid == 0.
1111 	 */
1112 	rflg = 0;
1113 	waitflag = READWAIT;
1114 	q = _RD(stp->sd_wrq);
1115 	for (;;) {
1116 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1117 		old_sd_flag = stp->sd_flag;
1118 		mark = 0;
1119 		delim = 0;
1120 		first = 1;
1121 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1122 			int done = 0;
1123 
1124 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1125 
1126 			if (error != 0)
1127 				goto oops;
1128 
1129 			if (stp->sd_flag & (STRHUP|STREOF)) {
1130 				goto oops;
1131 			}
1132 			if (rflg && !(stp->sd_flag & STRDELIM)) {
1133 				goto oops;
1134 			}
1135 			/*
1136 			 * If a read(fd,buf,0) has been done, there is no
1137 			 * need to sleep. We always have zero bytes to
1138 			 * return.
1139 			 */
1140 			if (uiop->uio_resid == 0) {
1141 				goto oops;
1142 			}
1143 
1144 			qbackenable(q, 0);
1145 
1146 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1147 				"strread calls strwaitq:%p, %p, %p",
1148 				vp, uiop, crp);
1149 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1150 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
1151 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1152 					"strread error or done:%p, %p, %p",
1153 					vp, uiop, crp);
1154 				if ((uiop->uio_fmode & FNDELAY) &&
1155 				    (stp->sd_flag & OLDNDELAY) &&
1156 				    (error == EAGAIN))
1157 					error = 0;
1158 				goto oops;
1159 			}
1160 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1161 				"strread awakes:%p, %p, %p", vp, uiop, crp);
1162 			if (stp->sd_sidp != NULL &&
1163 			    stp->sd_vnode->v_type != VFIFO) {
1164 				mutex_exit(&stp->sd_lock);
1165 				if (error = straccess(stp, JCREAD))
1166 					goto oops1;
1167 				mutex_enter(&stp->sd_lock);
1168 			}
1169 			first = 0;
1170 		}
1171 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1172 		ASSERT(bp);
1173 		pri = bp->b_band;
1174 		/*
1175 		 * Extract any mark information. If the message is not
1176 		 * completely consumed this information will be put in the mblk
1177 		 * that is putback.
1178 		 * If MSGMARKNEXT is set and the message is completely consumed
1179 		 * the STRATMARK flag will be set below. Likewise, if
1180 		 * MSGNOTMARKNEXT is set and the message is
1181 		 * completely consumed STRNOTATMARK will be set.
1182 		 *
1183 		 * For some unknown reason strread only breaks the read at the
1184 		 * last mark.
1185 		 */
1186 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1187 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1188 			(MSGMARKNEXT|MSGNOTMARKNEXT));
1189 		if (mark != 0 && bp == stp->sd_mark) {
1190 			if (rflg) {
1191 				putback(stp, q, bp, pri);
1192 				goto oops;
1193 			}
1194 			mark |= _LASTMARK;
1195 			stp->sd_mark = NULL;
1196 		}
1197 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1198 			delim = 1;
1199 		mutex_exit(&stp->sd_lock);
1200 
1201 		if (STREAM_NEEDSERVICE(stp))
1202 			stream_runservice(stp);
1203 
1204 		type = bp->b_datap->db_type;
1205 
1206 		switch (type) {
1207 
1208 		case M_DATA:
1209 ismdata:
1210 			if (msgnodata(bp)) {
1211 				if (mark || delim) {
1212 					freemsg(bp);
1213 				} else if (rflg) {
1214 
1215 					/*
1216 					 * If already read data put zero
1217 					 * length message back on queue else
1218 					 * free msg and return 0.
1219 					 */
1220 					bp->b_band = pri;
1221 					mutex_enter(&stp->sd_lock);
1222 					putback(stp, q, bp, pri);
1223 					mutex_exit(&stp->sd_lock);
1224 				} else {
1225 					freemsg(bp);
1226 				}
1227 				error =  0;
1228 				goto oops1;
1229 			}
1230 
1231 			rflg = 1;
1232 			waitflag |= NOINTR;
1233 			bp = struiocopyout(bp, uiop, &error);
1234 			if (error != 0)
1235 				goto oops1;
1236 
1237 			mutex_enter(&stp->sd_lock);
1238 			if (bp) {
1239 				/*
1240 				 * Have remaining data in message.
1241 				 * Free msg if in discard mode.
1242 				 */
1243 				if (stp->sd_read_opt & RD_MSGDIS) {
1244 					freemsg(bp);
1245 				} else {
1246 					bp->b_band = pri;
1247 					if ((mark & _LASTMARK) &&
1248 					    (stp->sd_mark == NULL))
1249 						stp->sd_mark = bp;
1250 					bp->b_flag |= mark & ~_LASTMARK;
1251 					if (delim)
1252 						bp->b_flag |= MSGDELIM;
1253 					if (msgnodata(bp))
1254 						freemsg(bp);
1255 					else
1256 						putback(stp, q, bp, pri);
1257 				}
1258 			} else {
1259 				/*
1260 				 * Consumed the complete message.
1261 				 * Move the MSG*MARKNEXT information
1262 				 * to the stream head just in case
1263 				 * the read queue becomes empty.
1264 				 *
1265 				 * If the stream head was at the mark
1266 				 * (STRATMARK) before we dropped sd_lock above
1267 				 * and some data was consumed then we have
1268 				 * moved past the mark thus STRATMARK is
1269 				 * cleared. However, if a message arrived in
1270 				 * strrput during the copyout above causing
1271 				 * STRATMARK to be set we can not clear that
1272 				 * flag.
1273 				 */
1274 				if (mark &
1275 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1276 					if (mark & MSGMARKNEXT) {
1277 						stp->sd_flag &= ~STRNOTATMARK;
1278 						stp->sd_flag |= STRATMARK;
1279 					} else if (mark & MSGNOTMARKNEXT) {
1280 						stp->sd_flag &= ~STRATMARK;
1281 						stp->sd_flag |= STRNOTATMARK;
1282 					} else {
1283 						stp->sd_flag &=
1284 						    ~(STRATMARK|STRNOTATMARK);
1285 					}
1286 				} else if (rflg && (old_sd_flag & STRATMARK)) {
1287 					stp->sd_flag &= ~STRATMARK;
1288 				}
1289 			}
1290 
1291 			/*
1292 			 * Check for signal messages at the front of the read
1293 			 * queue and generate the signal(s) if appropriate.
1294 			 * The only signal that can be on queue is M_SIG at
1295 			 * this point.
1296 			 */
1297 			while ((((bp = q->q_first)) != NULL) &&
1298 				(bp->b_datap->db_type == M_SIG)) {
1299 				bp = getq_noenab(q);
1300 				/*
1301 				 * sd_lock is held so the content of the
1302 				 * read queue can not change.
1303 				 */
1304 				ASSERT(bp != NULL &&
1305 					bp->b_datap->db_type == M_SIG);
1306 				strsignal_nolock(stp, *bp->b_rptr,
1307 					(int32_t)bp->b_band);
1308 				mutex_exit(&stp->sd_lock);
1309 				freemsg(bp);
1310 				if (STREAM_NEEDSERVICE(stp))
1311 					stream_runservice(stp);
1312 				mutex_enter(&stp->sd_lock);
1313 			}
1314 
1315 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1316 			    delim ||
1317 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1318 				goto oops;
1319 			}
1320 			continue;
1321 
1322 		case M_SIG:
1323 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1324 			freemsg(bp);
1325 			mutex_enter(&stp->sd_lock);
1326 			continue;
1327 
1328 		case M_PROTO:
1329 		case M_PCPROTO:
1330 			/*
1331 			 * Only data messages are readable.
1332 			 * Any others generate an error, unless
1333 			 * RD_PROTDIS or RD_PROTDAT is set.
1334 			 */
1335 			if (stp->sd_read_opt & RD_PROTDAT) {
1336 				for (nbp = bp; nbp; nbp = nbp->b_next) {
1337 				    if ((nbp->b_datap->db_type == M_PROTO) ||
1338 					(nbp->b_datap->db_type == M_PCPROTO))
1339 					nbp->b_datap->db_type = M_DATA;
1340 				    else
1341 					break;
1342 				}
1343 				/*
1344 				 * clear stream head hi pri flag based on
1345 				 * first message
1346 				 */
1347 				if (type == M_PCPROTO) {
1348 					mutex_enter(&stp->sd_lock);
1349 					stp->sd_flag &= ~STRPRI;
1350 					mutex_exit(&stp->sd_lock);
1351 				}
1352 				goto ismdata;
1353 			} else if (stp->sd_read_opt & RD_PROTDIS) {
1354 				/*
1355 				 * discard non-data messages
1356 				 */
1357 				while (bp &&
1358 				    ((bp->b_datap->db_type == M_PROTO) ||
1359 				    (bp->b_datap->db_type == M_PCPROTO))) {
1360 					nbp = unlinkb(bp);
1361 					freeb(bp);
1362 					bp = nbp;
1363 				}
1364 				/*
1365 				 * clear stream head hi pri flag based on
1366 				 * first message
1367 				 */
1368 				if (type == M_PCPROTO) {
1369 					mutex_enter(&stp->sd_lock);
1370 					stp->sd_flag &= ~STRPRI;
1371 					mutex_exit(&stp->sd_lock);
1372 				}
1373 				if (bp) {
1374 					bp->b_band = pri;
1375 					goto ismdata;
1376 				} else {
1377 					break;
1378 				}
1379 			}
1380 			/* FALLTHRU */
1381 		case M_PASSFP:
1382 			if ((bp->b_datap->db_type == M_PASSFP) &&
1383 			    (stp->sd_read_opt & RD_PROTDIS)) {
1384 				freemsg(bp);
1385 				break;
1386 			}
1387 			mutex_enter(&stp->sd_lock);
1388 			putback(stp, q, bp, pri);
1389 			mutex_exit(&stp->sd_lock);
1390 			if (rflg == 0)
1391 				error = EBADMSG;
1392 			goto oops1;
1393 
1394 		default:
1395 			/*
1396 			 * Garbage on stream head read queue.
1397 			 */
1398 			cmn_err(CE_WARN, "bad %x found at stream head\n",
1399 				bp->b_datap->db_type);
1400 			freemsg(bp);
1401 			goto oops1;
1402 		}
1403 		mutex_enter(&stp->sd_lock);
1404 	}
1405 oops:
1406 	mutex_exit(&stp->sd_lock);
1407 oops1:
1408 	qbackenable(q, pri);
1409 	return (error);
1410 #undef	_LASTMARK
1411 }
1412 
1413 /*
1414  * Default processing of M_PROTO/M_PCPROTO messages.
1415  * Determine which wakeups and signals are needed.
1416  * This can be replaced by a user-specified procedure for kernel users
1417  * of STREAMS.
1418  */
1419 /* ARGSUSED */
1420 mblk_t *
1421 strrput_proto(vnode_t *vp, mblk_t *mp,
1422     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1423     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1424 {
1425 	*wakeups = RSLEEP;
1426 	*allmsgsigs = 0;
1427 
1428 	switch (mp->b_datap->db_type) {
1429 	case M_PROTO:
1430 		if (mp->b_band == 0) {
1431 			*firstmsgsigs = S_INPUT | S_RDNORM;
1432 			*pollwakeups = POLLIN | POLLRDNORM;
1433 		} else {
1434 			*firstmsgsigs = S_INPUT | S_RDBAND;
1435 			*pollwakeups = POLLIN | POLLRDBAND;
1436 		}
1437 		break;
1438 	case M_PCPROTO:
1439 		*firstmsgsigs = S_HIPRI;
1440 		*pollwakeups = POLLPRI;
1441 		break;
1442 	}
1443 	return (mp);
1444 }
1445 
1446 /*
1447  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1448  * M_PASSFP messages.
1449  * Determine which wakeups and signals are needed.
1450  * This can be replaced by a user-specified procedure for kernel users
1451  * of STREAMS.
1452  */
1453 /* ARGSUSED */
1454 mblk_t *
1455 strrput_misc(vnode_t *vp, mblk_t *mp,
1456     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1457     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1458 {
1459 	*wakeups = 0;
1460 	*firstmsgsigs = 0;
1461 	*allmsgsigs = 0;
1462 	*pollwakeups = 0;
1463 	return (mp);
1464 }
1465 
1466 /*
1467  * Stream read put procedure.  Called from downstream driver/module
1468  * with messages for the stream head.  Data, protocol, and in-stream
1469  * signal messages are placed on the queue, others are handled directly.
1470  */
1471 int
1472 strrput(queue_t *q, mblk_t *bp)
1473 {
1474 	struct stdata	*stp;
1475 	ulong_t		rput_opt;
1476 	strwakeup_t	wakeups;
1477 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
1478 	strsigset_t	allmsgsigs;	/* Signals for all messages */
1479 	strsigset_t	signals;	/* Signals events to generate */
1480 	strpollset_t	pollwakeups;
1481 	mblk_t		*nextbp;
1482 	uchar_t		band = 0;
1483 	int		hipri_sig;
1484 
1485 	stp = (struct stdata *)q->q_ptr;
1486 	/*
1487 	 * Use rput_opt for optimized access to the SR_ flags except
1488 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
1489 	 * is modified by strpoll().
1490 	 */
1491 	rput_opt = stp->sd_rput_opt;
1492 
1493 	ASSERT(qclaimed(q));
1494 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1495 		"strrput called with message type:q %p bp %p", q, bp);
1496 
1497 	/*
1498 	 * Perform initial processing and pass to the parameterized functions.
1499 	 */
1500 	ASSERT(bp->b_next == NULL);
1501 
1502 	switch (bp->b_datap->db_type) {
1503 	case M_DATA:
1504 		/*
1505 		 * sockfs is the only consumer of STREOF and when it is set,
1506 		 * it implies that the receiver is not interested in receiving
1507 		 * any more data, hence the mblk is freed to prevent unnecessary
1508 		 * message queueing at the stream head.
1509 		 */
1510 		if (stp->sd_flag == STREOF) {
1511 			freemsg(bp);
1512 			return (0);
1513 		}
1514 		if ((rput_opt & SR_IGN_ZEROLEN) &&
1515 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1516 			/*
1517 			 * Ignore zero-length M_DATA messages. These might be
1518 			 * generated by some transports.
1519 			 * The zero-length M_DATA messages, even if they
1520 			 * are ignored, should effect the atmark tracking and
1521 			 * should wake up a thread sleeping in strwaitmark.
1522 			 */
1523 			mutex_enter(&stp->sd_lock);
1524 			if (bp->b_flag & MSGMARKNEXT) {
1525 				/*
1526 				 * Record the position of the mark either
1527 				 * in q_last or in STRATMARK.
1528 				 */
1529 				if (q->q_last != NULL) {
1530 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1531 					q->q_last->b_flag |= MSGMARKNEXT;
1532 				} else {
1533 					stp->sd_flag &= ~STRNOTATMARK;
1534 					stp->sd_flag |= STRATMARK;
1535 				}
1536 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
1537 				/*
1538 				 * Record that this is not the position of
1539 				 * the mark either in q_last or in
1540 				 * STRNOTATMARK.
1541 				 */
1542 				if (q->q_last != NULL) {
1543 					q->q_last->b_flag &= ~MSGMARKNEXT;
1544 					q->q_last->b_flag |= MSGNOTMARKNEXT;
1545 				} else {
1546 					stp->sd_flag &= ~STRATMARK;
1547 					stp->sd_flag |= STRNOTATMARK;
1548 				}
1549 			}
1550 			if (stp->sd_flag & RSLEEP) {
1551 				stp->sd_flag &= ~RSLEEP;
1552 				cv_broadcast(&q->q_wait);
1553 			}
1554 			mutex_exit(&stp->sd_lock);
1555 			freemsg(bp);
1556 			return (0);
1557 		}
1558 		wakeups = RSLEEP;
1559 		if (bp->b_band == 0) {
1560 			firstmsgsigs = S_INPUT | S_RDNORM;
1561 			pollwakeups = POLLIN | POLLRDNORM;
1562 		} else {
1563 			firstmsgsigs = S_INPUT | S_RDBAND;
1564 			pollwakeups = POLLIN | POLLRDBAND;
1565 		}
1566 		if (rput_opt & SR_SIGALLDATA)
1567 			allmsgsigs = firstmsgsigs;
1568 		else
1569 			allmsgsigs = 0;
1570 
1571 		mutex_enter(&stp->sd_lock);
1572 		if ((rput_opt & SR_CONSOL_DATA) &&
1573 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1574 			/*
1575 			 * Consolidate on M_DATA message onto an M_DATA,
1576 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
1577 			 * The consolidation does not take place if
1578 			 * the old message is marked with either of the
1579 			 * marks or the delim flag or if the new
1580 			 * message is marked with MSGMARK. The MSGMARK
1581 			 * check is needed to handle the odd semantics of
1582 			 * MSGMARK where essentially the whole message
1583 			 * is to be treated as marked.
1584 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1585 			 * new message to the front of the b_cont chain.
1586 			 */
1587 			mblk_t *lbp;
1588 
1589 			lbp = q->q_last;
1590 			if (lbp != NULL &&
1591 			    (lbp->b_datap->db_type == M_DATA ||
1592 			    lbp->b_datap->db_type == M_PROTO ||
1593 			    lbp->b_datap->db_type == M_PCPROTO) &&
1594 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|
1595 			    MSGMARKNEXT))) {
1596 				rmvq_noenab(q, lbp);
1597 				/*
1598 				 * The first message in the b_cont list
1599 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1600 				 * We need to handle the case where we
1601 				 * are appending
1602 				 *
1603 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1604 				 * 2) a MSGMARKNEXT to a plain message.
1605 				 * 3) a MSGNOTMARKNEXT to a plain message
1606 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1607 				 *    message.
1608 				 *
1609 				 * Thus we never append a MSGMARKNEXT or
1610 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1611 				 */
1612 				if (bp->b_flag & MSGMARKNEXT) {
1613 					lbp->b_flag |= MSGMARKNEXT;
1614 					lbp->b_flag &= ~MSGNOTMARKNEXT;
1615 					bp->b_flag &= ~MSGMARKNEXT;
1616 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
1617 					lbp->b_flag |= MSGNOTMARKNEXT;
1618 					bp->b_flag &= ~MSGNOTMARKNEXT;
1619 				}
1620 
1621 				linkb(lbp, bp);
1622 				bp = lbp;
1623 				/*
1624 				 * The new message logically isn't the first
1625 				 * even though the q_first check below thinks
1626 				 * it is. Clear the firstmsgsigs to make it
1627 				 * not appear to be first.
1628 				 */
1629 				firstmsgsigs = 0;
1630 			}
1631 		}
1632 		break;
1633 
1634 	case M_PASSFP:
1635 		wakeups = RSLEEP;
1636 		allmsgsigs = 0;
1637 		if (bp->b_band == 0) {
1638 			firstmsgsigs = S_INPUT | S_RDNORM;
1639 			pollwakeups = POLLIN | POLLRDNORM;
1640 		} else {
1641 			firstmsgsigs = S_INPUT | S_RDBAND;
1642 			pollwakeups = POLLIN | POLLRDBAND;
1643 		}
1644 		mutex_enter(&stp->sd_lock);
1645 		break;
1646 
1647 	case M_PROTO:
1648 	case M_PCPROTO:
1649 		ASSERT(stp->sd_rprotofunc != NULL);
1650 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1651 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1652 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1653 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1654 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1655 		POLLWRBAND)
1656 
1657 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1658 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1659 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1660 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1661 
1662 		mutex_enter(&stp->sd_lock);
1663 		break;
1664 
1665 	default:
1666 		ASSERT(stp->sd_rmiscfunc != NULL);
1667 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1668 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1669 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1670 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1671 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1672 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1673 #undef	ALLSIG
1674 #undef	ALLPOLL
1675 		mutex_enter(&stp->sd_lock);
1676 		break;
1677 	}
1678 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1679 
1680 	/* By default generate superset of signals */
1681 	signals = (firstmsgsigs | allmsgsigs);
1682 
1683 	/*
1684 	 * The  proto and misc functions can return multiple messages
1685 	 * as a b_next chain. Such messages are processed separately.
1686 	 */
1687 one_more:
1688 	hipri_sig = 0;
1689 	if (bp == NULL) {
1690 		nextbp = NULL;
1691 	} else {
1692 		nextbp = bp->b_next;
1693 		bp->b_next = NULL;
1694 
1695 		switch (bp->b_datap->db_type) {
1696 		case M_PCPROTO:
1697 			/*
1698 			 * Only one priority protocol message is allowed at the
1699 			 * stream head at a time.
1700 			 */
1701 			if (stp->sd_flag & STRPRI) {
1702 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1703 				    "M_PCPROTO already at head");
1704 				freemsg(bp);
1705 				mutex_exit(&stp->sd_lock);
1706 				goto done;
1707 			}
1708 			stp->sd_flag |= STRPRI;
1709 			hipri_sig = 1;
1710 			/* FALLTHRU */
1711 		case M_DATA:
1712 		case M_PROTO:
1713 		case M_PASSFP:
1714 			band = bp->b_band;
1715 			/*
1716 			 * Marking doesn't work well when messages
1717 			 * are marked in more than one band.  We only
1718 			 * remember the last message received, even if
1719 			 * it is placed on the queue ahead of other
1720 			 * marked messages.
1721 			 */
1722 			if (bp->b_flag & MSGMARK)
1723 				stp->sd_mark = bp;
1724 			(void) putq(q, bp);
1725 
1726 			/*
1727 			 * If message is a PCPROTO message, always use
1728 			 * firstmsgsigs to determine if a signal should be
1729 			 * sent as strrput is the only place to send
1730 			 * signals for PCPROTO. Other messages are based on
1731 			 * the STRGETINPROG flag. The flag determines if
1732 			 * strrput or (k)strgetmsg will be responsible for
1733 			 * sending the signals, in the firstmsgsigs case.
1734 			 */
1735 			if ((hipri_sig == 1) ||
1736 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
1737 			    (q->q_first == bp)))
1738 				signals = (firstmsgsigs | allmsgsigs);
1739 			else
1740 				signals = allmsgsigs;
1741 			break;
1742 
1743 		default:
1744 			mutex_exit(&stp->sd_lock);
1745 			(void) strrput_nondata(q, bp);
1746 			mutex_enter(&stp->sd_lock);
1747 			break;
1748 		}
1749 	}
1750 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1751 	/*
1752 	 * Wake sleeping read/getmsg and cancel deferred wakeup
1753 	 */
1754 	if (wakeups & RSLEEP)
1755 		stp->sd_wakeq &= ~RSLEEP;
1756 
1757 	wakeups &= stp->sd_flag;
1758 	if (wakeups & RSLEEP) {
1759 		stp->sd_flag &= ~RSLEEP;
1760 		cv_broadcast(&q->q_wait);
1761 	}
1762 	if (wakeups & WSLEEP) {
1763 		stp->sd_flag &= ~WSLEEP;
1764 		cv_broadcast(&_WR(q)->q_wait);
1765 	}
1766 
1767 	if (pollwakeups != 0) {
1768 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
1769 			/*
1770 			 * Can't use rput_opt since it was not
1771 			 * read when sd_lock was held and SR_POLLIN is changed
1772 			 * by strpoll() under sd_lock.
1773 			 */
1774 			if (!(stp->sd_rput_opt & SR_POLLIN))
1775 				goto no_pollwake;
1776 			stp->sd_rput_opt &= ~SR_POLLIN;
1777 		}
1778 		mutex_exit(&stp->sd_lock);
1779 		pollwakeup(&stp->sd_pollist, pollwakeups);
1780 		mutex_enter(&stp->sd_lock);
1781 	}
1782 no_pollwake:
1783 
1784 	/*
1785 	 * strsendsig can handle multiple signals with a
1786 	 * single call.
1787 	 */
1788 	if (stp->sd_sigflags & signals)
1789 		strsendsig(stp->sd_siglist, signals, band, 0);
1790 	mutex_exit(&stp->sd_lock);
1791 
1792 
1793 done:
1794 	if (nextbp == NULL)
1795 		return (0);
1796 
1797 	/*
1798 	 * Any signals were handled the first time.
1799 	 * Wakeups and pollwakeups are redone to avoid any race
1800 	 * conditions - all the messages are not queued until the
1801 	 * last message has been processed by strrput.
1802 	 */
1803 	bp = nextbp;
1804 	signals = firstmsgsigs = allmsgsigs = 0;
1805 	mutex_enter(&stp->sd_lock);
1806 	goto one_more;
1807 }
1808 
1809 static void
1810 log_dupioc(queue_t *rq, mblk_t *bp)
1811 {
1812 	queue_t *wq, *qp;
1813 	char *modnames, *mnp, *dname;
1814 	size_t maxmodstr;
1815 	boolean_t islast;
1816 
1817 	/*
1818 	 * Allocate a buffer large enough to hold the names of nstrpush modules
1819 	 * and one driver, with spaces between and NUL terminator.  If we can't
1820 	 * get memory, then we'll just log the driver name.
1821 	 */
1822 	maxmodstr = nstrpush * (FMNAMESZ + 1);
1823 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1824 
1825 	/* march down write side to print log message down to the driver */
1826 	wq = WR(rq);
1827 
1828 	/* make sure q_next doesn't shift around while we're grabbing data */
1829 	claimstr(wq);
1830 	qp = wq->q_next;
1831 	do {
1832 		if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL)
1833 			dname = "?";
1834 		islast = !SAMESTR(qp) || qp->q_next == NULL;
1835 		if (modnames == NULL) {
1836 			/*
1837 			 * If we don't have memory, then get the driver name in
1838 			 * the log where we can see it.  Note that memory
1839 			 * pressure is a possible cause of these sorts of bugs.
1840 			 */
1841 			if (islast) {
1842 				modnames = dname;
1843 				maxmodstr = 0;
1844 			}
1845 		} else {
1846 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1847 			if (!islast)
1848 				*mnp++ = ' ';
1849 		}
1850 		qp = qp->q_next;
1851 	} while (!islast);
1852 	releasestr(wq);
1853 	/* Cannot happen unless stream head is corrupt. */
1854 	ASSERT(modnames != NULL);
1855 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1856 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
1857 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1858 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1859 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1860 	if (maxmodstr != 0)
1861 		kmem_free(modnames, maxmodstr);
1862 }
1863 
1864 int
1865 strrput_nondata(queue_t *q, mblk_t *bp)
1866 {
1867 	struct stdata *stp;
1868 	struct iocblk *iocbp;
1869 	struct stroptions *sop;
1870 	struct copyreq *reqp;
1871 	struct copyresp *resp;
1872 	unsigned char bpri;
1873 	unsigned char  flushed_already = 0;
1874 
1875 	stp = (struct stdata *)q->q_ptr;
1876 
1877 	ASSERT(!(stp->sd_flag & STPLEX));
1878 	ASSERT(qclaimed(q));
1879 
1880 	switch (bp->b_datap->db_type) {
1881 	case M_ERROR:
1882 		/*
1883 		 * An error has occurred downstream, the errno is in the first
1884 		 * bytes of the message.
1885 		 */
1886 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
1887 			unsigned char rw = 0;
1888 
1889 			mutex_enter(&stp->sd_lock);
1890 			if (*bp->b_rptr != NOERROR) {	/* read error */
1891 				if (*bp->b_rptr != 0) {
1892 					if (stp->sd_flag & STRDERR)
1893 						flushed_already |= FLUSHR;
1894 					stp->sd_flag |= STRDERR;
1895 					rw |= FLUSHR;
1896 				} else {
1897 					stp->sd_flag &= ~STRDERR;
1898 				}
1899 				stp->sd_rerror = *bp->b_rptr;
1900 			}
1901 			bp->b_rptr++;
1902 			if (*bp->b_rptr != NOERROR) {	/* write error */
1903 				if (*bp->b_rptr != 0) {
1904 					if (stp->sd_flag & STWRERR)
1905 						flushed_already |= FLUSHW;
1906 					stp->sd_flag |= STWRERR;
1907 					rw |= FLUSHW;
1908 				} else {
1909 					stp->sd_flag &= ~STWRERR;
1910 				}
1911 				stp->sd_werror = *bp->b_rptr;
1912 			}
1913 			if (rw) {
1914 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
1915 					"strrput cv_broadcast:q %p, bp %p",
1916 					q, bp);
1917 				cv_broadcast(&q->q_wait); /* readers */
1918 				cv_broadcast(&_WR(q)->q_wait); /* writers */
1919 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1920 
1921 				mutex_exit(&stp->sd_lock);
1922 				pollwakeup(&stp->sd_pollist, POLLERR);
1923 				mutex_enter(&stp->sd_lock);
1924 
1925 				if (stp->sd_sigflags & S_ERROR)
1926 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1927 					    ((rw & FLUSHR) ? stp->sd_rerror :
1928 					    stp->sd_werror));
1929 				mutex_exit(&stp->sd_lock);
1930 				/*
1931 				 * Send the M_FLUSH only
1932 				 * for the first M_ERROR
1933 				 * message on the stream
1934 				 */
1935 				if (flushed_already == rw) {
1936 					freemsg(bp);
1937 					return (0);
1938 				}
1939 
1940 				bp->b_datap->db_type = M_FLUSH;
1941 				*bp->b_rptr = rw;
1942 				bp->b_wptr = bp->b_rptr + 1;
1943 				/*
1944 				 * Protect against the driver
1945 				 * passing up messages after
1946 				 * it has done a qprocsoff
1947 				 */
1948 				if (_OTHERQ(q)->q_next == NULL)
1949 					freemsg(bp);
1950 				else
1951 					qreply(q, bp);
1952 				return (0);
1953 			} else
1954 				mutex_exit(&stp->sd_lock);
1955 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
1956 				if (stp->sd_flag & (STRDERR|STWRERR))
1957 					flushed_already = FLUSHRW;
1958 				mutex_enter(&stp->sd_lock);
1959 				stp->sd_flag |= (STRDERR|STWRERR);
1960 				stp->sd_rerror = *bp->b_rptr;
1961 				stp->sd_werror = *bp->b_rptr;
1962 				TRACE_2(TR_FAC_STREAMS_FR,
1963 					TR_STRRPUT_WAKE2,
1964 					"strrput wakeup #2:q %p, bp %p", q, bp);
1965 				cv_broadcast(&q->q_wait); /* the readers */
1966 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
1967 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1968 
1969 				mutex_exit(&stp->sd_lock);
1970 				pollwakeup(&stp->sd_pollist, POLLERR);
1971 				mutex_enter(&stp->sd_lock);
1972 
1973 				if (stp->sd_sigflags & S_ERROR)
1974 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1975 					    (stp->sd_werror ? stp->sd_werror :
1976 					    stp->sd_rerror));
1977 				mutex_exit(&stp->sd_lock);
1978 
1979 				/*
1980 				 * Send the M_FLUSH only
1981 				 * for the first M_ERROR
1982 				 * message on the stream
1983 				 */
1984 				if (flushed_already != FLUSHRW) {
1985 					bp->b_datap->db_type = M_FLUSH;
1986 					*bp->b_rptr = FLUSHRW;
1987 					/*
1988 					 * Protect against the driver passing up
1989 					 * messages after it has done a
1990 					 * qprocsoff.
1991 					 */
1992 				if (_OTHERQ(q)->q_next == NULL)
1993 					freemsg(bp);
1994 				else
1995 					qreply(q, bp);
1996 				return (0);
1997 				}
1998 		}
1999 		freemsg(bp);
2000 		return (0);
2001 
2002 	case M_HANGUP:
2003 
2004 		freemsg(bp);
2005 		mutex_enter(&stp->sd_lock);
2006 		stp->sd_werror = ENXIO;
2007 		stp->sd_flag |= STRHUP;
2008 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
2009 
2010 		/*
2011 		 * send signal if controlling tty
2012 		 */
2013 
2014 		if (stp->sd_sidp) {
2015 			prsignal(stp->sd_sidp, SIGHUP);
2016 			if (stp->sd_sidp != stp->sd_pgidp)
2017 				pgsignal(stp->sd_pgidp, SIGTSTP);
2018 		}
2019 
2020 		/*
2021 		 * wake up read, write, and exception pollers and
2022 		 * reset wakeup mechanism.
2023 		 */
2024 		cv_broadcast(&q->q_wait);	/* the readers */
2025 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
2026 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
2027 		mutex_exit(&stp->sd_lock);
2028 		strhup(stp);
2029 		return (0);
2030 
2031 	case M_UNHANGUP:
2032 		freemsg(bp);
2033 		mutex_enter(&stp->sd_lock);
2034 		stp->sd_werror = 0;
2035 		stp->sd_flag &= ~STRHUP;
2036 		mutex_exit(&stp->sd_lock);
2037 		return (0);
2038 
2039 	case M_SIG:
2040 		/*
2041 		 * Someone downstream wants to post a signal.  The
2042 		 * signal to post is contained in the first byte of the
2043 		 * message.  If the message would go on the front of
2044 		 * the queue, send a signal to the process group
2045 		 * (if not SIGPOLL) or to the siglist processes
2046 		 * (SIGPOLL).  If something is already on the queue,
2047 		 * OR if we are delivering a delayed suspend (*sigh*
2048 		 * another "tty" hack) and there's no one sleeping already,
2049 		 * just enqueue the message.
2050 		 */
2051 		mutex_enter(&stp->sd_lock);
2052 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2053 		    !(stp->sd_flag & RSLEEP))) {
2054 			(void) putq(q, bp);
2055 			mutex_exit(&stp->sd_lock);
2056 			return (0);
2057 		}
2058 		mutex_exit(&stp->sd_lock);
2059 		/* FALLTHRU */
2060 
2061 	case M_PCSIG:
2062 		/*
2063 		 * Don't enqueue, just post the signal.
2064 		 */
2065 		strsignal(stp, *bp->b_rptr, 0L);
2066 		freemsg(bp);
2067 		return (0);
2068 
2069 	case M_FLUSH:
2070 		/*
2071 		 * Flush queues.  The indication of which queues to flush
2072 		 * is in the first byte of the message.  If the read queue
2073 		 * is specified, then flush it.  If FLUSHBAND is set, just
2074 		 * flush the band specified by the second byte of the message.
2075 		 *
2076 		 * If a module has issued a M_SETOPT to not flush hi
2077 		 * priority messages off of the stream head, then pass this
2078 		 * flag into the flushq code to preserve such messages.
2079 		 */
2080 
2081 		if (*bp->b_rptr & FLUSHR) {
2082 			mutex_enter(&stp->sd_lock);
2083 			if (*bp->b_rptr & FLUSHBAND) {
2084 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2085 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2086 			} else
2087 				flushq_common(q, FLUSHALL,
2088 				    stp->sd_read_opt & RFLUSHPCPROT);
2089 			if ((q->q_first == NULL) ||
2090 			    (q->q_first->b_datap->db_type < QPCTL))
2091 				stp->sd_flag &= ~STRPRI;
2092 			else {
2093 				ASSERT(stp->sd_flag & STRPRI);
2094 			}
2095 			mutex_exit(&stp->sd_lock);
2096 		}
2097 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2098 			*bp->b_rptr &= ~FLUSHR;
2099 			bp->b_flag |= MSGNOLOOP;
2100 			/*
2101 			 * Protect against the driver passing up
2102 			 * messages after it has done a qprocsoff.
2103 			 */
2104 			if (_OTHERQ(q)->q_next == NULL)
2105 				freemsg(bp);
2106 			else
2107 				qreply(q, bp);
2108 			return (0);
2109 		}
2110 		freemsg(bp);
2111 		return (0);
2112 
2113 	case M_IOCACK:
2114 	case M_IOCNAK:
2115 		iocbp = (struct iocblk *)bp->b_rptr;
2116 		/*
2117 		 * If not waiting for ACK or NAK then just free msg.
2118 		 * If incorrect id sequence number then just free msg.
2119 		 * If already have ACK or NAK for user then this is a
2120 		 *    duplicate, display a warning and free the msg.
2121 		 */
2122 		mutex_enter(&stp->sd_lock);
2123 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2124 		    (stp->sd_iocid != iocbp->ioc_id)) {
2125 			/*
2126 			 * If the ACK/NAK is a dup, display a message
2127 			 * Dup is when sd_iocid == ioc_id, and
2128 			 * sd_iocblk == <valid ptr> or -1 (the former
2129 			 * is when an ioctl has been put on the stream
2130 			 * head, but has not yet been consumed, the
2131 			 * later is when it has been consumed).
2132 			 */
2133 			if ((stp->sd_iocid == iocbp->ioc_id) &&
2134 			    (stp->sd_iocblk != NULL)) {
2135 				log_dupioc(q, bp);
2136 			}
2137 			freemsg(bp);
2138 			mutex_exit(&stp->sd_lock);
2139 			return (0);
2140 		}
2141 
2142 		/*
2143 		 * Assign ACK or NAK to user and wake up.
2144 		 */
2145 		stp->sd_iocblk = bp;
2146 		cv_broadcast(&stp->sd_monitor);
2147 		mutex_exit(&stp->sd_lock);
2148 		return (0);
2149 
2150 	case M_COPYIN:
2151 	case M_COPYOUT:
2152 		reqp = (struct copyreq *)bp->b_rptr;
2153 
2154 		/*
2155 		 * If not waiting for ACK or NAK then just fail request.
2156 		 * If already have ACK, NAK, or copy request, then just
2157 		 * fail request.
2158 		 * If incorrect id sequence number then just fail request.
2159 		 */
2160 		mutex_enter(&stp->sd_lock);
2161 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2162 		    (stp->sd_iocid != reqp->cq_id)) {
2163 			if (bp->b_cont) {
2164 				freemsg(bp->b_cont);
2165 				bp->b_cont = NULL;
2166 			}
2167 			bp->b_datap->db_type = M_IOCDATA;
2168 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2169 			resp = (struct copyresp *)bp->b_rptr;
2170 			resp->cp_rval = (caddr_t)1;	/* failure */
2171 			mutex_exit(&stp->sd_lock);
2172 			putnext(stp->sd_wrq, bp);
2173 			return (0);
2174 		}
2175 
2176 		/*
2177 		 * Assign copy request to user and wake up.
2178 		 */
2179 		stp->sd_iocblk = bp;
2180 		cv_broadcast(&stp->sd_monitor);
2181 		mutex_exit(&stp->sd_lock);
2182 		return (0);
2183 
2184 	case M_SETOPTS:
2185 		/*
2186 		 * Set stream head options (read option, write offset,
2187 		 * min/max packet size, and/or high/low water marks for
2188 		 * the read side only).
2189 		 */
2190 
2191 		bpri = 0;
2192 		sop = (struct stroptions *)bp->b_rptr;
2193 		mutex_enter(&stp->sd_lock);
2194 		if (sop->so_flags & SO_READOPT) {
2195 			switch (sop->so_readopt & RMODEMASK) {
2196 			case RNORM:
2197 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2198 				break;
2199 
2200 			case RMSGD:
2201 				stp->sd_read_opt =
2202 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
2203 				    RD_MSGDIS);
2204 				break;
2205 
2206 			case RMSGN:
2207 				stp->sd_read_opt =
2208 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
2209 				    RD_MSGNODIS);
2210 				break;
2211 			}
2212 			switch (sop->so_readopt & RPROTMASK) {
2213 			case RPROTNORM:
2214 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2215 				break;
2216 
2217 			case RPROTDAT:
2218 				stp->sd_read_opt =
2219 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
2220 				    RD_PROTDAT);
2221 				break;
2222 
2223 			case RPROTDIS:
2224 				stp->sd_read_opt =
2225 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
2226 				    RD_PROTDIS);
2227 				break;
2228 			}
2229 			switch (sop->so_readopt & RFLUSHMASK) {
2230 			case RFLUSHPCPROT:
2231 				/*
2232 				 * This sets the stream head to NOT flush
2233 				 * M_PCPROTO messages.
2234 				 */
2235 				stp->sd_read_opt |= RFLUSHPCPROT;
2236 				break;
2237 			}
2238 		}
2239 		if (sop->so_flags & SO_ERROPT) {
2240 			switch (sop->so_erropt & RERRMASK) {
2241 			case RERRNORM:
2242 				stp->sd_flag &= ~STRDERRNONPERSIST;
2243 				break;
2244 			case RERRNONPERSIST:
2245 				stp->sd_flag |= STRDERRNONPERSIST;
2246 				break;
2247 			}
2248 			switch (sop->so_erropt & WERRMASK) {
2249 			case WERRNORM:
2250 				stp->sd_flag &= ~STWRERRNONPERSIST;
2251 				break;
2252 			case WERRNONPERSIST:
2253 				stp->sd_flag |= STWRERRNONPERSIST;
2254 				break;
2255 			}
2256 		}
2257 		if (sop->so_flags & SO_COPYOPT) {
2258 			if (sop->so_copyopt & ZCVMSAFE) {
2259 				stp->sd_copyflag |= STZCVMSAFE;
2260 				stp->sd_copyflag &= ~STZCVMUNSAFE;
2261 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
2262 				stp->sd_copyflag |= STZCVMUNSAFE;
2263 				stp->sd_copyflag &= ~STZCVMSAFE;
2264 			}
2265 
2266 			if (sop->so_copyopt & COPYCACHED) {
2267 				stp->sd_copyflag |= STRCOPYCACHED;
2268 			}
2269 		}
2270 		if (sop->so_flags & SO_WROFF)
2271 			stp->sd_wroff = sop->so_wroff;
2272 		if (sop->so_flags & SO_MINPSZ)
2273 			q->q_minpsz = sop->so_minpsz;
2274 		if (sop->so_flags & SO_MAXPSZ)
2275 			q->q_maxpsz = sop->so_maxpsz;
2276 		if (sop->so_flags & SO_MAXBLK)
2277 			stp->sd_maxblk = sop->so_maxblk;
2278 		if (sop->so_flags & SO_HIWAT) {
2279 		    if (sop->so_flags & SO_BAND) {
2280 			if (strqset(q, QHIWAT, sop->so_band, sop->so_hiwat))
2281 				cmn_err(CE_WARN,
2282 				    "strrput: could not allocate qband\n");
2283 			else
2284 				bpri = sop->so_band;
2285 		    } else {
2286 			q->q_hiwat = sop->so_hiwat;
2287 		    }
2288 		}
2289 		if (sop->so_flags & SO_LOWAT) {
2290 		    if (sop->so_flags & SO_BAND) {
2291 			if (strqset(q, QLOWAT, sop->so_band, sop->so_lowat))
2292 				cmn_err(CE_WARN,
2293 				    "strrput: could not allocate qband\n");
2294 			else
2295 				bpri = sop->so_band;
2296 		    } else {
2297 			q->q_lowat = sop->so_lowat;
2298 		    }
2299 		}
2300 		if (sop->so_flags & SO_MREADON)
2301 			stp->sd_flag |= SNDMREAD;
2302 		if (sop->so_flags & SO_MREADOFF)
2303 			stp->sd_flag &= ~SNDMREAD;
2304 		if (sop->so_flags & SO_NDELON)
2305 			stp->sd_flag |= OLDNDELAY;
2306 		if (sop->so_flags & SO_NDELOFF)
2307 			stp->sd_flag &= ~OLDNDELAY;
2308 		if (sop->so_flags & SO_ISTTY)
2309 			stp->sd_flag |= STRISTTY;
2310 		if (sop->so_flags & SO_ISNTTY)
2311 			stp->sd_flag &= ~STRISTTY;
2312 		if (sop->so_flags & SO_TOSTOP)
2313 			stp->sd_flag |= STRTOSTOP;
2314 		if (sop->so_flags & SO_TONSTOP)
2315 			stp->sd_flag &= ~STRTOSTOP;
2316 		if (sop->so_flags & SO_DELIM)
2317 			stp->sd_flag |= STRDELIM;
2318 		if (sop->so_flags & SO_NODELIM)
2319 			stp->sd_flag &= ~STRDELIM;
2320 
2321 		mutex_exit(&stp->sd_lock);
2322 		freemsg(bp);
2323 
2324 		/* Check backenable in case the water marks changed */
2325 		qbackenable(q, bpri);
2326 		return (0);
2327 
2328 	/*
2329 	 * The following set of cases deal with situations where two stream
2330 	 * heads are connected to each other (twisted streams).  These messages
2331 	 * have no meaning at the stream head.
2332 	 */
2333 	case M_BREAK:
2334 	case M_CTL:
2335 	case M_DELAY:
2336 	case M_START:
2337 	case M_STOP:
2338 	case M_IOCDATA:
2339 	case M_STARTI:
2340 	case M_STOPI:
2341 		freemsg(bp);
2342 		return (0);
2343 
2344 	case M_IOCTL:
2345 		/*
2346 		 * Always NAK this condition
2347 		 * (makes no sense)
2348 		 * If there is one or more threads in the read side
2349 		 * rwnext we have to defer the nacking until that thread
2350 		 * returns (in strget).
2351 		 */
2352 		mutex_enter(&stp->sd_lock);
2353 		if (stp->sd_struiodnak != 0) {
2354 			/*
2355 			 * Defer NAK to the streamhead. Queue at the end
2356 			 * the list.
2357 			 */
2358 			mblk_t *mp = stp->sd_struionak;
2359 
2360 			while (mp && mp->b_next)
2361 				mp = mp->b_next;
2362 			if (mp)
2363 				mp->b_next = bp;
2364 			else
2365 				stp->sd_struionak = bp;
2366 			bp->b_next = NULL;
2367 			mutex_exit(&stp->sd_lock);
2368 			return (0);
2369 		}
2370 		mutex_exit(&stp->sd_lock);
2371 
2372 		bp->b_datap->db_type = M_IOCNAK;
2373 		/*
2374 		 * Protect against the driver passing up
2375 		 * messages after it has done a qprocsoff.
2376 		 */
2377 		if (_OTHERQ(q)->q_next == NULL)
2378 			freemsg(bp);
2379 		else
2380 			qreply(q, bp);
2381 		return (0);
2382 
2383 	default:
2384 #ifdef DEBUG
2385 		cmn_err(CE_WARN,
2386 			"bad message type %x received at stream head\n",
2387 			bp->b_datap->db_type);
2388 #endif
2389 		freemsg(bp);
2390 		return (0);
2391 	}
2392 
2393 	/* NOTREACHED */
2394 }
2395 
2396 /*
2397  * Check if the stream pointed to by `stp' can be written to, and return an
2398  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2399  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2400  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2401  */
2402 static int
2403 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2404 {
2405 	int error;
2406 
2407 	ASSERT(MUTEX_HELD(&stp->sd_lock));
2408 
2409 	/*
2410 	 * For modem support, POSIX states that on writes, EIO should
2411 	 * be returned if the stream has been hung up.
2412 	 */
2413 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2414 		error = EIO;
2415 	else
2416 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2417 
2418 	if (error != 0) {
2419 		if (!(stp->sd_flag & STPLEX) &&
2420 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2421 			tsignal(curthread, SIGPIPE);
2422 			error = EPIPE;
2423 		}
2424 	}
2425 
2426 	return (error);
2427 }
2428 
2429 /*
2430  * Copyin and send data down a stream.
2431  * The caller will allocate and copyin any control part that precedes the
2432  * message and pass than in as mctl.
2433  *
2434  * Caller should *not* hold sd_lock.
2435  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2436  * under sd_lock in order to avoid missing a backenabling wakeup.
2437  *
2438  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2439  *
2440  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2441  * For sync streams we can only ignore flow control by reverting to using
2442  * putnext.
2443  *
2444  * If sd_maxblk is less than *iosize this routine might return without
2445  * transferring all of *iosize. In all cases, on return *iosize will contain
2446  * the amount of data that was transferred.
2447  */
2448 static int
2449 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2450     int b_flag, int pri, int flags)
2451 {
2452 	struiod_t uiod;
2453 	mblk_t *mp;
2454 	queue_t *wqp = stp->sd_wrq;
2455 	int error = 0;
2456 	ssize_t count = *iosize;
2457 	cred_t *cr;
2458 
2459 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2460 
2461 	if (uiop != NULL && count >= 0)
2462 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2463 
2464 	if (!(flags & STRUIO_POSTPONE)) {
2465 		/*
2466 		 * Use regular canputnext, strmakedata, putnext sequence.
2467 		 */
2468 		if (pri == 0) {
2469 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2470 				freemsg(mctl);
2471 				return (EWOULDBLOCK);
2472 			}
2473 		} else {
2474 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2475 				freemsg(mctl);
2476 				return (EWOULDBLOCK);
2477 			}
2478 		}
2479 
2480 		if ((error = strmakedata(iosize, uiop, stp, flags,
2481 					&mp)) != 0) {
2482 			freemsg(mctl);
2483 			/*
2484 			 * need to change return code to ENOMEM
2485 			 * so that this is not confused with
2486 			 * flow control, EAGAIN.
2487 			 */
2488 
2489 			if (error == EAGAIN)
2490 				return (ENOMEM);
2491 			else
2492 				return (error);
2493 		}
2494 		if (mctl != NULL) {
2495 			if (mctl->b_cont == NULL)
2496 				mctl->b_cont = mp;
2497 			else if (mp != NULL)
2498 				linkb(mctl, mp);
2499 			mp = mctl;
2500 			/*
2501 			 * Note that for interrupt thread, the CRED() is
2502 			 * NULL. Don't bother with the pid either.
2503 			 */
2504 			if ((cr = CRED()) != NULL) {
2505 				mblk_setcred(mp, cr);
2506 				DB_CPID(mp) = curproc->p_pid;
2507 			}
2508 		} else if (mp == NULL)
2509 			return (0);
2510 
2511 		mp->b_flag |= b_flag;
2512 		mp->b_band = (uchar_t)pri;
2513 
2514 		if (flags & MSG_IGNFLOW) {
2515 			/*
2516 			 * XXX Hack: Don't get stuck running service
2517 			 * procedures. This is needed for sockfs when
2518 			 * sending the unbind message out of the rput
2519 			 * procedure - we don't want a put procedure
2520 			 * to run service procedures.
2521 			 */
2522 			putnext(wqp, mp);
2523 		} else {
2524 			stream_willservice(stp);
2525 			putnext(wqp, mp);
2526 			stream_runservice(stp);
2527 		}
2528 		return (0);
2529 	}
2530 	/*
2531 	 * Stream supports rwnext() for the write side.
2532 	 */
2533 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2534 		freemsg(mctl);
2535 		/*
2536 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2537 		 */
2538 		return (error == EAGAIN ? ENOMEM : error);
2539 	}
2540 	if (mctl != NULL) {
2541 		if (mctl->b_cont == NULL)
2542 			mctl->b_cont = mp;
2543 		else if (mp != NULL)
2544 			linkb(mctl, mp);
2545 		mp = mctl;
2546 		/*
2547 		 * Note that for interrupt thread, the CRED() is
2548 		 * NULL.  Don't bother with the pid either.
2549 		 */
2550 		if ((cr = CRED()) != NULL) {
2551 			mblk_setcred(mp, cr);
2552 			DB_CPID(mp) = curproc->p_pid;
2553 		}
2554 	} else if (mp == NULL) {
2555 		return (0);
2556 	}
2557 
2558 	mp->b_flag |= b_flag;
2559 	mp->b_band = (uchar_t)pri;
2560 
2561 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2562 		sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2563 	uiod.d_uio.uio_offset = 0;
2564 	uiod.d_mp = mp;
2565 	error = rwnext(wqp, &uiod);
2566 	if (! uiod.d_mp) {
2567 		uioskip(uiop, *iosize);
2568 		return (error);
2569 	}
2570 	ASSERT(mp == uiod.d_mp);
2571 	if (error == EINVAL) {
2572 		/*
2573 		 * The stream plumbing must have changed while
2574 		 * we were away, so just turn off rwnext()s.
2575 		 */
2576 		error = 0;
2577 	} else if (error == EBUSY || error == EWOULDBLOCK) {
2578 		/*
2579 		 * Couldn't enter a perimeter or took a page fault,
2580 		 * so fall-back to putnext().
2581 		 */
2582 		error = 0;
2583 	} else {
2584 		freemsg(mp);
2585 		return (error);
2586 	}
2587 	/* Have to check canput before consuming data from the uio */
2588 	if (pri == 0) {
2589 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2590 			freemsg(mp);
2591 			return (EWOULDBLOCK);
2592 		}
2593 	} else {
2594 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2595 			freemsg(mp);
2596 			return (EWOULDBLOCK);
2597 		}
2598 	}
2599 	ASSERT(mp == uiod.d_mp);
2600 	/* Copyin data from the uio */
2601 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2602 		freemsg(mp);
2603 		return (error);
2604 	}
2605 	uioskip(uiop, *iosize);
2606 	if (flags & MSG_IGNFLOW) {
2607 		/*
2608 		 * XXX Hack: Don't get stuck running service procedures.
2609 		 * This is needed for sockfs when sending the unbind message
2610 		 * out of the rput procedure - we don't want a put procedure
2611 		 * to run service procedures.
2612 		 */
2613 		putnext(wqp, mp);
2614 	} else {
2615 		stream_willservice(stp);
2616 		putnext(wqp, mp);
2617 		stream_runservice(stp);
2618 	}
2619 	return (0);
2620 }
2621 
2622 /*
2623  * Write attempts to break the write request into messages conforming
2624  * with the minimum and maximum packet sizes set downstream.
2625  *
2626  * Write will not block if downstream queue is full and
2627  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2628  *
2629  * A write of zero bytes gets packaged into a zero length message and sent
2630  * downstream like any other message.
2631  *
2632  * If buffers of the requested sizes are not available, the write will
2633  * sleep until the buffers become available.
2634  *
2635  * Write (if specified) will supply a write offset in a message if it
2636  * makes sense. This can be specified by downstream modules as part of
2637  * a M_SETOPTS message.  Write will not supply the write offset if it
2638  * cannot supply any data in a buffer.  In other words, write will never
2639  * send down an empty packet due to a write offset.
2640  */
2641 /* ARGSUSED2 */
2642 int
2643 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2644 {
2645 	return (strwrite_common(vp, uiop, crp, 0));
2646 }
2647 
2648 /* ARGSUSED2 */
2649 int
2650 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2651 {
2652 	struct stdata *stp;
2653 	struct queue *wqp;
2654 	ssize_t rmin, rmax;
2655 	ssize_t iosize;
2656 	int waitflag;
2657 	int tempmode;
2658 	int error = 0;
2659 	int b_flag;
2660 
2661 	ASSERT(vp->v_stream);
2662 	stp = vp->v_stream;
2663 
2664 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
2665 		if ((error = straccess(stp, JCWRITE)) != 0)
2666 			return (error);
2667 
2668 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2669 		mutex_enter(&stp->sd_lock);
2670 		error = strwriteable(stp, B_TRUE, B_TRUE);
2671 		mutex_exit(&stp->sd_lock);
2672 		if (error != 0)
2673 			return (error);
2674 	}
2675 
2676 	wqp = stp->sd_wrq;
2677 
2678 	/* get these values from them cached in the stream head */
2679 	rmin = stp->sd_qn_minpsz;
2680 	rmax = stp->sd_qn_maxpsz;
2681 
2682 	/*
2683 	 * Check the min/max packet size constraints.  If min packet size
2684 	 * is non-zero, the write cannot be split into multiple messages
2685 	 * and still guarantee the size constraints.
2686 	 */
2687 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2688 
2689 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
2690 	if (rmax == 0) {
2691 		return (0);
2692 	}
2693 	if (rmin > 0) {
2694 		if (uiop->uio_resid < rmin) {
2695 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2696 				"strwrite out:q %p out %d error %d",
2697 				wqp, 0, ERANGE);
2698 			return (ERANGE);
2699 		}
2700 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2701 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2702 				"strwrite out:q %p out %d error %d",
2703 				wqp, 1, ERANGE);
2704 			return (ERANGE);
2705 		}
2706 	}
2707 
2708 	/*
2709 	 * Do until count satisfied or error.
2710 	 */
2711 	waitflag = WRITEWAIT | wflag;
2712 	if (stp->sd_flag & OLDNDELAY)
2713 		tempmode = uiop->uio_fmode & ~FNDELAY;
2714 	else
2715 		tempmode = uiop->uio_fmode;
2716 
2717 	if (rmax == INFPSZ)
2718 		rmax = uiop->uio_resid;
2719 
2720 	/*
2721 	 * Note that tempmode does not get used in strput/strmakedata
2722 	 * but only in strwaitq. The other routines use uio_fmode
2723 	 * unmodified.
2724 	 */
2725 
2726 	/* LINTED: constant in conditional context */
2727 	while (1) {	/* breaks when uio_resid reaches zero */
2728 		/*
2729 		 * Determine the size of the next message to be
2730 		 * packaged.  May have to break write into several
2731 		 * messages based on max packet size.
2732 		 */
2733 		iosize = MIN(uiop->uio_resid, rmax);
2734 
2735 		/*
2736 		 * Put block downstream when flow control allows it.
2737 		 */
2738 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2739 			b_flag = MSGDELIM;
2740 		else
2741 			b_flag = 0;
2742 
2743 		for (;;) {
2744 			int done = 0;
2745 
2746 			error = strput(stp, NULL, uiop, &iosize, b_flag,
2747 				0, 0);
2748 			if (error == 0)
2749 				break;
2750 			if (error != EWOULDBLOCK)
2751 				goto out;
2752 
2753 			mutex_enter(&stp->sd_lock);
2754 			/*
2755 			 * Check for a missed wakeup.
2756 			 * Needed since strput did not hold sd_lock across
2757 			 * the canputnext.
2758 			 */
2759 			if (canputnext(wqp)) {
2760 				/* Try again */
2761 				mutex_exit(&stp->sd_lock);
2762 				continue;
2763 			}
2764 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2765 				"strwrite wait:q %p wait", wqp);
2766 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2767 			    tempmode, -1, &done)) != 0 || done) {
2768 				mutex_exit(&stp->sd_lock);
2769 				if ((vp->v_type == VFIFO) &&
2770 				    (uiop->uio_fmode & FNDELAY) &&
2771 				    (error == EAGAIN))
2772 					error = 0;
2773 				goto out;
2774 			}
2775 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2776 				"strwrite wake:q %p awakes", wqp);
2777 			mutex_exit(&stp->sd_lock);
2778 			if (stp->sd_sidp != NULL &&
2779 			    stp->sd_vnode->v_type != VFIFO)
2780 				if (error = straccess(stp, JCWRITE))
2781 					goto out;
2782 		}
2783 		waitflag |= NOINTR;
2784 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2785 			"strwrite resid:q %p uiop %p", wqp, uiop);
2786 		if (uiop->uio_resid) {
2787 			/* Recheck for errors - needed for sockets */
2788 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2789 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2790 				mutex_enter(&stp->sd_lock);
2791 				error = strwriteable(stp, B_FALSE, B_TRUE);
2792 				mutex_exit(&stp->sd_lock);
2793 				if (error != 0)
2794 					return (error);
2795 			}
2796 			continue;
2797 		}
2798 		break;
2799 	}
2800 out:
2801 	/*
2802 	 * For historical reasons, applications expect EAGAIN when a data
2803 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2804 	 */
2805 	if (error == ENOMEM)
2806 		error = EAGAIN;
2807 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2808 		"strwrite out:q %p out %d error %d", wqp, 2, error);
2809 	return (error);
2810 }
2811 
2812 /*
2813  * Stream head write service routine.
2814  * Its job is to wake up any sleeping writers when a queue
2815  * downstream needs data (part of the flow control in putq and getq).
2816  * It also must wake anyone sleeping on a poll().
2817  * For stream head right below mux module, it must also invoke put procedure
2818  * of next downstream module.
2819  */
2820 int
2821 strwsrv(queue_t *q)
2822 {
2823 	struct stdata *stp;
2824 	queue_t *tq;
2825 	qband_t *qbp;
2826 	int i;
2827 	qband_t *myqbp;
2828 	int isevent;
2829 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
2830 
2831 	TRACE_1(TR_FAC_STREAMS_FR,
2832 		TR_STRWSRV, "strwsrv:q %p", q);
2833 	stp = (struct stdata *)q->q_ptr;
2834 	ASSERT(qclaimed(q));
2835 	mutex_enter(&stp->sd_lock);
2836 	ASSERT(!(stp->sd_flag & STPLEX));
2837 
2838 	if (stp->sd_flag & WSLEEP) {
2839 		stp->sd_flag &= ~WSLEEP;
2840 		cv_broadcast(&q->q_wait);
2841 	}
2842 	mutex_exit(&stp->sd_lock);
2843 
2844 	/* The other end of a stream pipe went away. */
2845 	if ((tq = q->q_next) == NULL) {
2846 		return (0);
2847 	}
2848 
2849 	/* Find the next module forward that has a service procedure */
2850 	claimstr(q);
2851 	tq = q->q_nfsrv;
2852 	ASSERT(tq != NULL);
2853 
2854 	if ((q->q_flag & QBACK)) {
2855 		if ((tq->q_flag & QFULL)) {
2856 			mutex_enter(QLOCK(tq));
2857 			if (!(tq->q_flag & QFULL)) {
2858 				mutex_exit(QLOCK(tq));
2859 				goto wakeup;
2860 			}
2861 			/*
2862 			 * The queue must have become full again. Set QWANTW
2863 			 * again so strwsrv will be back enabled when
2864 			 * the queue becomes non-full next time.
2865 			 */
2866 			tq->q_flag |= QWANTW;
2867 			mutex_exit(QLOCK(tq));
2868 		} else {
2869 		wakeup:
2870 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
2871 			mutex_enter(&stp->sd_lock);
2872 			if (stp->sd_sigflags & S_WRNORM)
2873 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2874 			mutex_exit(&stp->sd_lock);
2875 		}
2876 	}
2877 
2878 	isevent = 0;
2879 	i = 1;
2880 	bzero((caddr_t)qbf, NBAND);
2881 	mutex_enter(QLOCK(tq));
2882 	if ((myqbp = q->q_bandp) != NULL)
2883 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
2884 			ASSERT(myqbp);
2885 			if ((myqbp->qb_flag & QB_BACK)) {
2886 				if (qbp->qb_flag & QB_FULL) {
2887 					/*
2888 					 * The band must have become full again.
2889 					 * Set QB_WANTW again so strwsrv will
2890 					 * be back enabled when the band becomes
2891 					 * non-full next time.
2892 					 */
2893 					qbp->qb_flag |= QB_WANTW;
2894 				} else {
2895 					isevent = 1;
2896 					qbf[i] = 1;
2897 				}
2898 			}
2899 			myqbp = myqbp->qb_next;
2900 			i++;
2901 		}
2902 	mutex_exit(QLOCK(tq));
2903 
2904 	if (isevent) {
2905 	    for (i = tq->q_nband; i; i--) {
2906 		if (qbf[i]) {
2907 			pollwakeup(&stp->sd_pollist, POLLWRBAND);
2908 			mutex_enter(&stp->sd_lock);
2909 			if (stp->sd_sigflags & S_WRBAND)
2910 				strsendsig(stp->sd_siglist, S_WRBAND,
2911 					(uchar_t)i, 0);
2912 			mutex_exit(&stp->sd_lock);
2913 		}
2914 	    }
2915 	}
2916 
2917 	releasestr(q);
2918 	return (0);
2919 }
2920 
2921 /*
2922  * Special case of strcopyin/strcopyout for copying
2923  * struct strioctl that can deal with both data
2924  * models.
2925  */
2926 
2927 #ifdef	_LP64
2928 
2929 static int
2930 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
2931 {
2932 	struct	strioctl32 strioc32;
2933 	struct	strioctl *striocp;
2934 
2935 	if (copyflag & U_TO_K) {
2936 		ASSERT((copyflag & K_TO_K) == 0);
2937 
2938 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
2939 			if (copyin(from, &strioc32, sizeof (strioc32)))
2940 				return (EFAULT);
2941 
2942 			striocp = (struct strioctl *)to;
2943 			striocp->ic_cmd	= strioc32.ic_cmd;
2944 			striocp->ic_timout = strioc32.ic_timout;
2945 			striocp->ic_len	= strioc32.ic_len;
2946 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
2947 
2948 		} else { /* NATIVE data model */
2949 			if (copyin(from, to, sizeof (struct strioctl))) {
2950 				return (EFAULT);
2951 			} else {
2952 				return (0);
2953 			}
2954 		}
2955 	} else {
2956 		ASSERT(copyflag & K_TO_K);
2957 		bcopy(from, to, sizeof (struct strioctl));
2958 	}
2959 	return (0);
2960 }
2961 
2962 static int
2963 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
2964 {
2965 	struct	strioctl32 strioc32;
2966 	struct	strioctl *striocp;
2967 
2968 	if (copyflag & U_TO_K) {
2969 		ASSERT((copyflag & K_TO_K) == 0);
2970 
2971 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
2972 			striocp = (struct strioctl *)from;
2973 			strioc32.ic_cmd	= striocp->ic_cmd;
2974 			strioc32.ic_timout = striocp->ic_timout;
2975 			strioc32.ic_len	= striocp->ic_len;
2976 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
2977 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
2978 			    striocp->ic_dp);
2979 
2980 			if (copyout(&strioc32, to, sizeof (strioc32)))
2981 				return (EFAULT);
2982 
2983 		} else { /* NATIVE data model */
2984 			if (copyout(from, to, sizeof (struct strioctl))) {
2985 				return (EFAULT);
2986 			} else {
2987 				return (0);
2988 			}
2989 		}
2990 	} else {
2991 		ASSERT(copyflag & K_TO_K);
2992 		bcopy(from, to, sizeof (struct strioctl));
2993 	}
2994 	return (0);
2995 }
2996 
2997 #else	/* ! _LP64 */
2998 
2999 /* ARGSUSED2 */
3000 static int
3001 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3002 {
3003 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3004 }
3005 
3006 /* ARGSUSED2 */
3007 static int
3008 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3009 {
3010 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3011 }
3012 
3013 #endif	/* _LP64 */
3014 
3015 /*
3016  * Determine type of job control semantics expected by user.  The
3017  * possibilities are:
3018  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
3019  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
3020  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3021  *	JCGETP	- Gets a value in the stream; no signals.
3022  * See straccess in strsubr.c for usage of these values.
3023  *
3024  * This routine also returns -1 for I_STR as a special case; the
3025  * caller must call again with the real ioctl number for
3026  * classification.
3027  */
3028 static int
3029 job_control_type(int cmd)
3030 {
3031 	switch (cmd) {
3032 	case I_STR:
3033 		return (-1);
3034 
3035 	case I_RECVFD:
3036 	case I_E_RECVFD:
3037 		return (JCREAD);
3038 
3039 	case I_FDINSERT:
3040 	case I_SENDFD:
3041 		return (JCWRITE);
3042 
3043 	case TCSETA:
3044 	case TCSETAW:
3045 	case TCSETAF:
3046 	case TCSBRK:
3047 	case TCXONC:
3048 	case TCFLSH:
3049 	case TCDSET:	/* Obsolete */
3050 	case TIOCSWINSZ:
3051 	case TCSETS:
3052 	case TCSETSW:
3053 	case TCSETSF:
3054 	case TIOCSETD:
3055 	case TIOCHPCL:
3056 	case TIOCSETP:
3057 	case TIOCSETN:
3058 	case TIOCEXCL:
3059 	case TIOCNXCL:
3060 	case TIOCFLUSH:
3061 	case TIOCSETC:
3062 	case TIOCLBIS:
3063 	case TIOCLBIC:
3064 	case TIOCLSET:
3065 	case TIOCSBRK:
3066 	case TIOCCBRK:
3067 	case TIOCSDTR:
3068 	case TIOCCDTR:
3069 	case TIOCSLTC:
3070 	case TIOCSTOP:
3071 	case TIOCSTART:
3072 	case TIOCSTI:
3073 	case TIOCSPGRP:
3074 	case TIOCMSET:
3075 	case TIOCMBIS:
3076 	case TIOCMBIC:
3077 	case TIOCREMOTE:
3078 	case TIOCSIGNAL:
3079 	case LDSETT:
3080 	case LDSMAP:	/* Obsolete */
3081 	case DIOCSETP:
3082 	case I_FLUSH:
3083 	case I_SRDOPT:
3084 	case I_SETSIG:
3085 	case I_SWROPT:
3086 	case I_FLUSHBAND:
3087 	case I_SETCLTIME:
3088 	case I_SERROPT:
3089 	case I_ESETSIG:
3090 	case FIONBIO:
3091 	case FIOASYNC:
3092 	case FIOSETOWN:
3093 	case JBOOT:	/* Obsolete */
3094 	case JTERM:	/* Obsolete */
3095 	case JTIMOM:	/* Obsolete */
3096 	case JZOMBOOT:	/* Obsolete */
3097 	case JAGENT:	/* Obsolete */
3098 	case JTRUN:	/* Obsolete */
3099 	case JXTPROTO:	/* Obsolete */
3100 		return (JCSETP);
3101 	}
3102 
3103 	return (JCGETP);
3104 }
3105 
3106 /*
3107  * ioctl for streams
3108  */
3109 int
3110 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3111     cred_t *crp, int *rvalp)
3112 {
3113 	struct stdata *stp;
3114 	struct strioctl strioc;
3115 	struct uio uio;
3116 	struct iovec iov;
3117 	int access;
3118 	mblk_t *mp;
3119 	int error = 0;
3120 	int done = 0;
3121 	ssize_t	rmin, rmax;
3122 	queue_t *wrq;
3123 	queue_t *rdq;
3124 	boolean_t kioctl = B_FALSE;
3125 
3126 	if (flag & FKIOCTL) {
3127 		copyflag = K_TO_K;
3128 		kioctl = B_TRUE;
3129 	}
3130 	ASSERT(vp->v_stream);
3131 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3132 	stp = vp->v_stream;
3133 
3134 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3135 		"strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3136 
3137 #ifdef C2_AUDIT
3138 	if (audit_active)
3139 		audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp);
3140 #endif
3141 
3142 	/*
3143 	 * If the copy is kernel to kernel, make sure that the FNATIVE
3144 	 * flag is set.  After this it would be a serious error to have
3145 	 * no model flag.
3146 	 */
3147 	if (copyflag == K_TO_K)
3148 		flag = (flag & ~FMODELS) | FNATIVE;
3149 
3150 	ASSERT((flag & FMODELS) != 0);
3151 
3152 	wrq = stp->sd_wrq;
3153 	rdq = _RD(wrq);
3154 
3155 	access = job_control_type(cmd);
3156 
3157 	/* We should never see these here, should be handled by iwscn */
3158 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3159 		return (EINVAL);
3160 
3161 	if (access != -1 && stp->sd_sidp != NULL &&
3162 	    stp->sd_vnode->v_type != VFIFO)
3163 		if (error = straccess(stp, access))
3164 			return (error);
3165 
3166 	/*
3167 	 * Check for sgttyb-related ioctls first, and complain as
3168 	 * necessary.
3169 	 */
3170 	switch (cmd) {
3171 	case TIOCGETP:
3172 	case TIOCSETP:
3173 	case TIOCSETN:
3174 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3175 			sgttyb_complaint = B_TRUE;
3176 			cmn_err(CE_NOTE,
3177 			    "application used obsolete TIOC[GS]ET");
3178 		}
3179 		if (sgttyb_handling >= 3) {
3180 			tsignal(curthread, SIGSYS);
3181 			return (EIO);
3182 		}
3183 		break;
3184 	}
3185 
3186 	mutex_enter(&stp->sd_lock);
3187 
3188 	switch (cmd) {
3189 	case I_RECVFD:
3190 	case I_E_RECVFD:
3191 	case I_PEEK:
3192 	case I_NREAD:
3193 	case FIONREAD:
3194 	case FIORDCHK:
3195 	case I_ATMARK:
3196 	case FIONBIO:
3197 	case FIOASYNC:
3198 		if (stp->sd_flag & (STRDERR|STPLEX)) {
3199 			error = strgeterr(stp, STRDERR|STPLEX, 0);
3200 			if (error != 0) {
3201 				mutex_exit(&stp->sd_lock);
3202 				return (error);
3203 			}
3204 		}
3205 		break;
3206 
3207 	default:
3208 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3209 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3210 			if (error != 0) {
3211 				mutex_exit(&stp->sd_lock);
3212 				return (error);
3213 			}
3214 		}
3215 	}
3216 
3217 	mutex_exit(&stp->sd_lock);
3218 
3219 	switch (cmd) {
3220 	default:
3221 		/*
3222 		 * The stream head has hardcoded knowledge of a
3223 		 * miscellaneous collection of terminal-, keyboard- and
3224 		 * mouse-related ioctls, enumerated below.  This hardcoded
3225 		 * knowledge allows the stream head to automatically
3226 		 * convert transparent ioctl requests made by userland
3227 		 * programs into I_STR ioctls which many old STREAMS
3228 		 * modules and drivers require.
3229 		 *
3230 		 * No new ioctls should ever be added to this list.
3231 		 * Instead, the STREAMS module or driver should be written
3232 		 * to either handle transparent ioctls or require any
3233 		 * userland programs to use I_STR ioctls (by returning
3234 		 * EINVAL to any transparent ioctl requests).
3235 		 *
3236 		 * More importantly, removing ioctls from this list should
3237 		 * be done with the utmost care, since our STREAMS modules
3238 		 * and drivers *count* on the stream head performing this
3239 		 * conversion, and thus may panic while processing
3240 		 * transparent ioctl request for one of these ioctls (keep
3241 		 * in mind that third party modules and drivers may have
3242 		 * similar problems).
3243 		 */
3244 		if (((cmd & IOCTYPE) == LDIOC) ||
3245 		    ((cmd & IOCTYPE) == tIOC) ||
3246 		    ((cmd & IOCTYPE) == TIOC) ||
3247 		    ((cmd & IOCTYPE) == KIOC) ||
3248 		    ((cmd & IOCTYPE) == MSIOC) ||
3249 		    ((cmd & IOCTYPE) == VUIOC)) {
3250 			/*
3251 			 * The ioctl is a tty ioctl - set up strioc buffer
3252 			 * and call strdoioctl() to do the work.
3253 			 */
3254 			if (stp->sd_flag & STRHUP)
3255 				return (ENXIO);
3256 			strioc.ic_cmd = cmd;
3257 			strioc.ic_timout = INFTIM;
3258 
3259 			switch (cmd) {
3260 
3261 			case TCXONC:
3262 			case TCSBRK:
3263 			case TCFLSH:
3264 			case TCDSET:
3265 				{
3266 				int native_arg = (int)arg;
3267 				strioc.ic_len = sizeof (int);
3268 				strioc.ic_dp = (char *)&native_arg;
3269 				return (strdoioctl(stp, &strioc, flag,
3270 				    K_TO_K, crp, rvalp));
3271 				}
3272 
3273 			case TCSETA:
3274 			case TCSETAW:
3275 			case TCSETAF:
3276 				strioc.ic_len = sizeof (struct termio);
3277 				strioc.ic_dp = (char *)arg;
3278 				return (strdoioctl(stp, &strioc, flag,
3279 					copyflag, crp, rvalp));
3280 
3281 			case TCSETS:
3282 			case TCSETSW:
3283 			case TCSETSF:
3284 				strioc.ic_len = sizeof (struct termios);
3285 				strioc.ic_dp = (char *)arg;
3286 				return (strdoioctl(stp, &strioc, flag,
3287 					copyflag, crp, rvalp));
3288 
3289 			case LDSETT:
3290 				strioc.ic_len = sizeof (struct termcb);
3291 				strioc.ic_dp = (char *)arg;
3292 				return (strdoioctl(stp, &strioc, flag,
3293 					copyflag, crp, rvalp));
3294 
3295 			case TIOCSETP:
3296 				strioc.ic_len = sizeof (struct sgttyb);
3297 				strioc.ic_dp = (char *)arg;
3298 				return (strdoioctl(stp, &strioc, flag,
3299 					copyflag, crp, rvalp));
3300 
3301 			case TIOCSTI:
3302 				if ((flag & FREAD) == 0 &&
3303 				    secpolicy_sti(crp) != 0) {
3304 					return (EPERM);
3305 				}
3306 				if (stp->sd_sidp !=
3307 				    ttoproc(curthread)->p_sessp->s_sidp &&
3308 				    secpolicy_sti(crp) != 0) {
3309 					return (EACCES);
3310 				}
3311 
3312 				strioc.ic_len = sizeof (char);
3313 				strioc.ic_dp = (char *)arg;
3314 				return (strdoioctl(stp, &strioc, flag,
3315 					copyflag, crp, rvalp));
3316 
3317 			case TIOCSWINSZ:
3318 				strioc.ic_len = sizeof (struct winsize);
3319 				strioc.ic_dp = (char *)arg;
3320 				return (strdoioctl(stp, &strioc, flag,
3321 					copyflag, crp, rvalp));
3322 
3323 			case TIOCSSIZE:
3324 				strioc.ic_len = sizeof (struct ttysize);
3325 				strioc.ic_dp = (char *)arg;
3326 				return (strdoioctl(stp, &strioc, flag,
3327 					copyflag, crp, rvalp));
3328 
3329 			case TIOCSSOFTCAR:
3330 			case KIOCTRANS:
3331 			case KIOCTRANSABLE:
3332 			case KIOCCMD:
3333 			case KIOCSDIRECT:
3334 			case KIOCSCOMPAT:
3335 			case KIOCSKABORTEN:
3336 			case KIOCSRPTDELAY:
3337 			case KIOCSRPTRATE:
3338 			case VUIDSFORMAT:
3339 			case TIOCSPPS:
3340 				strioc.ic_len = sizeof (int);
3341 				strioc.ic_dp = (char *)arg;
3342 				return (strdoioctl(stp, &strioc, flag,
3343 					copyflag, crp, rvalp));
3344 
3345 			case KIOCSETKEY:
3346 			case KIOCGETKEY:
3347 				strioc.ic_len = sizeof (struct kiockey);
3348 				strioc.ic_dp = (char *)arg;
3349 				return (strdoioctl(stp, &strioc, flag,
3350 					copyflag, crp, rvalp));
3351 
3352 			case KIOCSKEY:
3353 			case KIOCGKEY:
3354 				strioc.ic_len = sizeof (struct kiockeymap);
3355 				strioc.ic_dp = (char *)arg;
3356 				return (strdoioctl(stp, &strioc, flag,
3357 					copyflag, crp, rvalp));
3358 
3359 			case KIOCSLED:
3360 				/* arg is a pointer to char */
3361 				strioc.ic_len = sizeof (char);
3362 				strioc.ic_dp = (char *)arg;
3363 				return (strdoioctl(stp, &strioc, flag,
3364 					copyflag, crp, rvalp));
3365 
3366 			case MSIOSETPARMS:
3367 				strioc.ic_len = sizeof (Ms_parms);
3368 				strioc.ic_dp = (char *)arg;
3369 				return (strdoioctl(stp, &strioc, flag,
3370 					copyflag, crp, rvalp));
3371 
3372 			case VUIDSADDR:
3373 			case VUIDGADDR:
3374 				strioc.ic_len = sizeof (struct vuid_addr_probe);
3375 				strioc.ic_dp = (char *)arg;
3376 				return (strdoioctl(stp, &strioc, flag,
3377 					copyflag, crp, rvalp));
3378 
3379 			/*
3380 			 * These M_IOCTL's don't require any data to be sent
3381 			 * downstream, and the driver will allocate and link
3382 			 * on its own mblk_t upon M_IOCACK -- thus we set
3383 			 * ic_len to zero and set ic_dp to arg so we know
3384 			 * where to copyout to later.
3385 			 */
3386 			case TIOCGSOFTCAR:
3387 			case TIOCGWINSZ:
3388 			case TIOCGSIZE:
3389 			case KIOCGTRANS:
3390 			case KIOCGTRANSABLE:
3391 			case KIOCTYPE:
3392 			case KIOCGDIRECT:
3393 			case KIOCGCOMPAT:
3394 			case KIOCLAYOUT:
3395 			case KIOCGLED:
3396 			case MSIOGETPARMS:
3397 			case MSIOBUTTONS:
3398 			case VUIDGFORMAT:
3399 			case TIOCGPPS:
3400 			case TIOCGPPSEV:
3401 			case TCGETA:
3402 			case TCGETS:
3403 			case LDGETT:
3404 			case TIOCGETP:
3405 			case KIOCGRPTDELAY:
3406 			case KIOCGRPTRATE:
3407 				strioc.ic_len = 0;
3408 				strioc.ic_dp = (char *)arg;
3409 				return (strdoioctl(stp, &strioc, flag,
3410 					copyflag, crp, rvalp));
3411 			}
3412 		}
3413 
3414 		/*
3415 		 * Unknown cmd - send it down as a transparent ioctl.
3416 		 */
3417 		strioc.ic_cmd = cmd;
3418 		strioc.ic_timout = INFTIM;
3419 		strioc.ic_len = TRANSPARENT;
3420 		strioc.ic_dp = (char *)&arg;
3421 
3422 		return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3423 
3424 	case I_STR:
3425 		/*
3426 		 * Stream ioctl.  Read in an strioctl buffer from the user
3427 		 * along with any data specified and send it downstream.
3428 		 * Strdoioctl will wait allow only one ioctl message at
3429 		 * a time, and waits for the acknowledgement.
3430 		 */
3431 
3432 		if (stp->sd_flag & STRHUP)
3433 			return (ENXIO);
3434 
3435 		error = strcopyin_strioctl((void *)arg, &strioc, flag,
3436 		    copyflag);
3437 		if (error != 0)
3438 			return (error);
3439 
3440 		if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3441 			return (EINVAL);
3442 
3443 		access = job_control_type(strioc.ic_cmd);
3444 		if (access != -1 && stp->sd_sidp != NULL &&
3445 		    stp->sd_vnode->v_type != VFIFO &&
3446 		    (error = straccess(stp, access)) != 0)
3447 			return (error);
3448 
3449 		/*
3450 		 * The I_STR facility provides a trap door for malicious
3451 		 * code to send down bogus streamio(7I) ioctl commands to
3452 		 * unsuspecting STREAMS modules and drivers which expect to
3453 		 * only get these messages from the stream head.
3454 		 * Explicitly prohibit any streamio ioctls which can be
3455 		 * passed downstream by the stream head.  Note that we do
3456 		 * not block all streamio ioctls because the ioctl
3457 		 * numberspace is not well managed and thus it's possible
3458 		 * that a module or driver's ioctl numbers may accidentally
3459 		 * collide with them.
3460 		 */
3461 		switch (strioc.ic_cmd) {
3462 		case I_LINK:
3463 		case I_PLINK:
3464 		case I_UNLINK:
3465 		case I_PUNLINK:
3466 		case _I_GETPEERCRED:
3467 		case _I_PLINK_LH:
3468 			return (EINVAL);
3469 		}
3470 
3471 		error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3472 		if (error == 0) {
3473 			error = strcopyout_strioctl(&strioc, (void *)arg,
3474 			    flag, copyflag);
3475 		}
3476 		return (error);
3477 
3478 	case I_NREAD:
3479 		/*
3480 		 * Return number of bytes of data in first message
3481 		 * in queue in "arg" and return the number of messages
3482 		 * in queue in return value.
3483 		 */
3484 	    {
3485 		size_t	size;
3486 		int	retval;
3487 		int	count = 0;
3488 
3489 		mutex_enter(QLOCK(rdq));
3490 
3491 		size = msgdsize(rdq->q_first);
3492 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3493 			count++;
3494 
3495 		mutex_exit(QLOCK(rdq));
3496 		if (stp->sd_struiordq) {
3497 			infod_t infod;
3498 
3499 			infod.d_cmd = INFOD_COUNT;
3500 			infod.d_count = 0;
3501 			if (count == 0) {
3502 				infod.d_cmd |= INFOD_FIRSTBYTES;
3503 				infod.d_bytes = 0;
3504 			}
3505 			infod.d_res = 0;
3506 			(void) infonext(rdq, &infod);
3507 			count += infod.d_count;
3508 			if (infod.d_res & INFOD_FIRSTBYTES)
3509 				size = infod.d_bytes;
3510 		}
3511 
3512 		/*
3513 		 * Drop down from size_t to the "int" required by the
3514 		 * interface.  Cap at INT_MAX.
3515 		 */
3516 		retval = MIN(size, INT_MAX);
3517 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3518 		    copyflag);
3519 		if (!error)
3520 			*rvalp = count;
3521 		return (error);
3522 	    }
3523 
3524 	case FIONREAD:
3525 		/*
3526 		 * Return number of bytes of data in all data messages
3527 		 * in queue in "arg".
3528 		 */
3529 	    {
3530 		size_t	size = 0;
3531 		int	retval;
3532 
3533 		mutex_enter(QLOCK(rdq));
3534 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3535 			size += msgdsize(mp);
3536 		mutex_exit(QLOCK(rdq));
3537 
3538 		if (stp->sd_struiordq) {
3539 			infod_t infod;
3540 
3541 			infod.d_cmd = INFOD_BYTES;
3542 			infod.d_res = 0;
3543 			infod.d_bytes = 0;
3544 			(void) infonext(rdq, &infod);
3545 			size += infod.d_bytes;
3546 		}
3547 
3548 		/*
3549 		 * Drop down from size_t to the "int" required by the
3550 		 * interface.  Cap at INT_MAX.
3551 		 */
3552 		retval = MIN(size, INT_MAX);
3553 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3554 		    copyflag);
3555 
3556 		*rvalp = 0;
3557 		return (error);
3558 	    }
3559 	case FIORDCHK:
3560 		/*
3561 		 * FIORDCHK does not use arg value (like FIONREAD),
3562 		 * instead a count is returned. I_NREAD value may
3563 		 * not be accurate but safe. The real thing to do is
3564 		 * to add the msgdsizes of all data  messages until
3565 		 * a non-data message.
3566 		 */
3567 	    {
3568 		size_t size = 0;
3569 
3570 		mutex_enter(QLOCK(rdq));
3571 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3572 			size += msgdsize(mp);
3573 		mutex_exit(QLOCK(rdq));
3574 
3575 		if (stp->sd_struiordq) {
3576 			infod_t infod;
3577 
3578 			infod.d_cmd = INFOD_BYTES;
3579 			infod.d_res = 0;
3580 			infod.d_bytes = 0;
3581 			(void) infonext(rdq, &infod);
3582 			size += infod.d_bytes;
3583 		}
3584 
3585 		/*
3586 		 * Since ioctl returns an int, and memory sizes under
3587 		 * LP64 may not fit, we return INT_MAX if the count was
3588 		 * actually greater.
3589 		 */
3590 		*rvalp = MIN(size, INT_MAX);
3591 		return (0);
3592 	    }
3593 
3594 	case I_FIND:
3595 		/*
3596 		 * Get module name.
3597 		 */
3598 	    {
3599 		char mname[FMNAMESZ + 1];
3600 		queue_t *q;
3601 
3602 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3603 		    mname, FMNAMESZ + 1, NULL);
3604 		if (error)
3605 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3606 
3607 		/*
3608 		 * Return EINVAL if we're handed a bogus module name.
3609 		 */
3610 		if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3611 			TRACE_0(TR_FAC_STREAMS_FR,
3612 				TR_I_CANT_FIND, "couldn't I_FIND");
3613 			return (EINVAL);
3614 		}
3615 
3616 		*rvalp = 0;
3617 
3618 		/* Look downstream to see if module is there. */
3619 		claimstr(stp->sd_wrq);
3620 		for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3621 			if (q->q_flag&QREADR) {
3622 				q = NULL;
3623 				break;
3624 			}
3625 			if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0)
3626 				break;
3627 		}
3628 		releasestr(stp->sd_wrq);
3629 
3630 		*rvalp = (q ? 1 : 0);
3631 		return (error);
3632 	    }
3633 
3634 	case I_PUSH:
3635 	case __I_PUSH_NOCTTY:
3636 		/*
3637 		 * Push a module.
3638 		 * For the case __I_PUSH_NOCTTY push a module but
3639 		 * do not allocate controlling tty. See bugid 4025044
3640 		 */
3641 
3642 	    {
3643 		char mname[FMNAMESZ + 1];
3644 		fmodsw_impl_t *fp;
3645 		dev_t dummydev;
3646 
3647 		if (stp->sd_flag & STRHUP)
3648 			return (ENXIO);
3649 
3650 		/*
3651 		 * Get module name and look up in fmodsw.
3652 		 */
3653 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3654 		    mname, FMNAMESZ + 1, NULL);
3655 		if (error)
3656 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3657 
3658 		if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3659 		    NULL)
3660 			return (EINVAL);
3661 
3662 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3663 		    "I_PUSH:fp %p stp %p", fp, stp);
3664 
3665 		if (error = strstartplumb(stp, flag, cmd)) {
3666 			fmodsw_rele(fp);
3667 			return (error);
3668 		}
3669 
3670 		/*
3671 		 * See if any more modules can be pushed on this stream.
3672 		 * Note that this check must be done after strstartplumb()
3673 		 * since otherwise multiple threads issuing I_PUSHes on
3674 		 * the same stream will be able to exceed nstrpush.
3675 		 */
3676 		mutex_enter(&stp->sd_lock);
3677 		if (stp->sd_pushcnt >= nstrpush) {
3678 			fmodsw_rele(fp);
3679 			strendplumb(stp);
3680 			mutex_exit(&stp->sd_lock);
3681 			return (EINVAL);
3682 		}
3683 		mutex_exit(&stp->sd_lock);
3684 
3685 		/*
3686 		 * Push new module and call its open routine
3687 		 * via qattach().  Modules don't change device
3688 		 * numbers, so just ignore dummydev here.
3689 		 */
3690 		dummydev = vp->v_rdev;
3691 		if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3692 		    B_FALSE)) == 0) {
3693 			if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3694 			    (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3695 				/*
3696 				 * try to allocate it as a controlling terminal
3697 				 */
3698 				stralloctty(stp);
3699 			}
3700 		}
3701 
3702 		mutex_enter(&stp->sd_lock);
3703 
3704 		/*
3705 		 * As a performance concern we are caching the values of
3706 		 * q_minpsz and q_maxpsz of the module below the stream
3707 		 * head in the stream head.
3708 		 */
3709 		mutex_enter(QLOCK(stp->sd_wrq->q_next));
3710 		rmin = stp->sd_wrq->q_next->q_minpsz;
3711 		rmax = stp->sd_wrq->q_next->q_maxpsz;
3712 		mutex_exit(QLOCK(stp->sd_wrq->q_next));
3713 
3714 		/* Do this processing here as a performance concern */
3715 		if (strmsgsz != 0) {
3716 			if (rmax == INFPSZ)
3717 				rmax = strmsgsz;
3718 			else  {
3719 				if (vp->v_type == VFIFO)
3720 					rmax = MIN(PIPE_BUF, rmax);
3721 				else	rmax = MIN(strmsgsz, rmax);
3722 			}
3723 		}
3724 
3725 		mutex_enter(QLOCK(wrq));
3726 		stp->sd_qn_minpsz = rmin;
3727 		stp->sd_qn_maxpsz = rmax;
3728 		mutex_exit(QLOCK(wrq));
3729 
3730 		strendplumb(stp);
3731 		mutex_exit(&stp->sd_lock);
3732 		return (error);
3733 	    }
3734 
3735 	case I_POP:
3736 	    {
3737 		queue_t	*q;
3738 
3739 		if (stp->sd_flag & STRHUP)
3740 			return (ENXIO);
3741 		if (!wrq->q_next)	/* for broken pipes */
3742 			return (EINVAL);
3743 
3744 		if (error = strstartplumb(stp, flag, cmd))
3745 			return (error);
3746 
3747 		/*
3748 		 * If there is an anchor on this stream and popping
3749 		 * the current module would attempt to pop through the
3750 		 * anchor, then disallow the pop unless we have sufficient
3751 		 * privileges; take the cheapest (non-locking) check
3752 		 * first.
3753 		 */
3754 		if (secpolicy_net_config(crp, B_TRUE) != 0) {
3755 			mutex_enter(&stp->sd_lock);
3756 			/*
3757 			 * Anchors only apply if there's at least one
3758 			 * module on the stream (sd_pushcnt > 0).
3759 			 */
3760 			if (stp->sd_pushcnt > 0 &&
3761 			    stp->sd_pushcnt == stp->sd_anchor &&
3762 			    stp->sd_vnode->v_type != VFIFO) {
3763 				strendplumb(stp);
3764 				mutex_exit(&stp->sd_lock);
3765 				/* Audit and report error */
3766 				return (secpolicy_net_config(crp, B_FALSE));
3767 			}
3768 			mutex_exit(&stp->sd_lock);
3769 		}
3770 
3771 		q = wrq->q_next;
3772 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3773 			"I_POP:%p from %p", q, stp);
3774 		if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3775 			error = EINVAL;
3776 		} else {
3777 			qdetach(_RD(q), 1, flag, crp, B_FALSE);
3778 			error = 0;
3779 		}
3780 		mutex_enter(&stp->sd_lock);
3781 
3782 		/*
3783 		 * As a performance concern we are caching the values of
3784 		 * q_minpsz and q_maxpsz of the module below the stream
3785 		 * head in the stream head.
3786 		 */
3787 		mutex_enter(QLOCK(wrq->q_next));
3788 		rmin = wrq->q_next->q_minpsz;
3789 		rmax = wrq->q_next->q_maxpsz;
3790 		mutex_exit(QLOCK(wrq->q_next));
3791 
3792 		/* Do this processing here as a performance concern */
3793 		if (strmsgsz != 0) {
3794 			if (rmax == INFPSZ)
3795 				rmax = strmsgsz;
3796 			else  {
3797 				if (vp->v_type == VFIFO)
3798 					rmax = MIN(PIPE_BUF, rmax);
3799 				else	rmax = MIN(strmsgsz, rmax);
3800 			}
3801 		}
3802 
3803 		mutex_enter(QLOCK(wrq));
3804 		stp->sd_qn_minpsz = rmin;
3805 		stp->sd_qn_maxpsz = rmax;
3806 		mutex_exit(QLOCK(wrq));
3807 
3808 		/* If we popped through the anchor, then reset the anchor. */
3809 		if (stp->sd_pushcnt < stp->sd_anchor)
3810 			stp->sd_anchor = 0;
3811 
3812 		strendplumb(stp);
3813 		mutex_exit(&stp->sd_lock);
3814 		return (error);
3815 	    }
3816 
3817 	case _I_MUXID2FD:
3818 	{
3819 		/*
3820 		 * Create a fd for a I_PLINK'ed lower stream with a given
3821 		 * muxid.  With the fd, application can send down ioctls,
3822 		 * like I_LIST, to the previously I_PLINK'ed stream.  Note
3823 		 * that after getting the fd, the application has to do an
3824 		 * I_PUNLINK on the muxid before it can do any operation
3825 		 * on the lower stream.  This is required by spec1170.
3826 		 *
3827 		 * The fd used to do this ioctl should point to the same
3828 		 * controlling device used to do the I_PLINK.  If it uses
3829 		 * a different stream or an invalid muxid, I_MUXID2FD will
3830 		 * fail.  The error code is set to EINVAL.
3831 		 *
3832 		 * The intended use of this interface is the following.
3833 		 * An application I_PLINK'ed a stream and exits.  The fd
3834 		 * to the lower stream is gone.  Another application
3835 		 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
3836 		 */
3837 		int muxid = (int)arg;
3838 		int fd;
3839 		linkinfo_t *linkp;
3840 		struct file *fp;
3841 
3842 		/*
3843 		 * Do not allow the wildcard muxid.  This ioctl is not
3844 		 * intended to find arbitrary link.
3845 		 */
3846 		if (muxid == 0) {
3847 			return (EINVAL);
3848 		}
3849 
3850 		mutex_enter(&muxifier);
3851 		linkp = findlinks(vp->v_stream, muxid, LINKPERSIST);
3852 		if (linkp == NULL) {
3853 			mutex_exit(&muxifier);
3854 			return (EINVAL);
3855 		}
3856 
3857 		if ((fd = ufalloc(0)) == -1) {
3858 			mutex_exit(&muxifier);
3859 			return (EMFILE);
3860 		}
3861 		fp = linkp->li_fpdown;
3862 		mutex_enter(&fp->f_tlock);
3863 		fp->f_count++;
3864 		mutex_exit(&fp->f_tlock);
3865 		mutex_exit(&muxifier);
3866 		setf(fd, fp);
3867 		*rvalp = fd;
3868 		return (0);
3869 	}
3870 
3871 	case _I_INSERT:
3872 	{
3873 		/*
3874 		 * To insert a module to a given position in a stream.
3875 		 * In the first release, only allow privileged user
3876 		 * to use this ioctl.
3877 		 *
3878 		 * Note that we do not plan to support this ioctl
3879 		 * on pipes in the first release.  We want to learn more
3880 		 * about the implications of these ioctls before extending
3881 		 * their support.  And we do not think these features are
3882 		 * valuable for pipes.
3883 		 *
3884 		 * Neither do we support O/C hot stream.  Note that only
3885 		 * the upper streams of TCP/IP stack are O/C hot streams.
3886 		 * The lower IP stream is not.
3887 		 * When there is a O/C cold barrier, we only allow inserts
3888 		 * above the barrier.
3889 		 */
3890 		STRUCT_DECL(strmodconf, strmodinsert);
3891 		char mod_name[FMNAMESZ + 1];
3892 		fmodsw_impl_t *fp;
3893 		dev_t dummydev;
3894 		queue_t *tmp_wrq;
3895 		int pos;
3896 		boolean_t is_insert;
3897 
3898 		STRUCT_INIT(strmodinsert, flag);
3899 		if (stp->sd_flag & STRHUP)
3900 			return (ENXIO);
3901 		if (STRMATED(stp))
3902 			return (EINVAL);
3903 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
3904 			return (error);
3905 
3906 		error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
3907 		    STRUCT_SIZE(strmodinsert), copyflag);
3908 		if (error)
3909 			return (error);
3910 
3911 		/*
3912 		 * Get module name and look up in fmodsw.
3913 		 */
3914 		error = (copyflag & U_TO_K ? copyinstr :
3915 		    copystr)(STRUCT_FGETP(strmodinsert, mod_name),
3916 		    mod_name, FMNAMESZ + 1, NULL);
3917 		if (error)
3918 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3919 
3920 		if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
3921 		    NULL)
3922 			return (EINVAL);
3923 
3924 		if (error = strstartplumb(stp, flag, cmd)) {
3925 			fmodsw_rele(fp);
3926 			return (error);
3927 		}
3928 
3929 		/*
3930 		 * Is this _I_INSERT just like an I_PUSH?  We need to know
3931 		 * this because we do some optimizations if this is a
3932 		 * module being pushed.
3933 		 */
3934 		pos = STRUCT_FGET(strmodinsert, pos);
3935 		is_insert = (pos != 0);
3936 
3937 		/*
3938 		 * Make sure pos is valid.  Even though it is not an I_PUSH,
3939 		 * we impose the same limit on the number of modules in a
3940 		 * stream.
3941 		 */
3942 		mutex_enter(&stp->sd_lock);
3943 		if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
3944 		    pos > stp->sd_pushcnt) {
3945 			fmodsw_rele(fp);
3946 			strendplumb(stp);
3947 			mutex_exit(&stp->sd_lock);
3948 			return (EINVAL);
3949 		}
3950 		mutex_exit(&stp->sd_lock);
3951 
3952 		/*
3953 		 * First find the correct position this module to
3954 		 * be inserted.  We don't need to call claimstr()
3955 		 * as the stream should not be changing at this point.
3956 		 *
3957 		 * Insert new module and call its open routine
3958 		 * via qattach().  Modules don't change device
3959 		 * numbers, so just ignore dummydev here.
3960 		 */
3961 		for (tmp_wrq = stp->sd_wrq; pos > 0;
3962 		    tmp_wrq = tmp_wrq->q_next, pos--) {
3963 			ASSERT(SAMESTR(tmp_wrq));
3964 		}
3965 		dummydev = vp->v_rdev;
3966 		if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
3967 		    fp, is_insert)) != 0) {
3968 			mutex_enter(&stp->sd_lock);
3969 			strendplumb(stp);
3970 			mutex_exit(&stp->sd_lock);
3971 			return (error);
3972 		}
3973 
3974 		mutex_enter(&stp->sd_lock);
3975 
3976 		/*
3977 		 * As a performance concern we are caching the values of
3978 		 * q_minpsz and q_maxpsz of the module below the stream
3979 		 * head in the stream head.
3980 		 */
3981 		if (!is_insert) {
3982 			mutex_enter(QLOCK(stp->sd_wrq->q_next));
3983 			rmin = stp->sd_wrq->q_next->q_minpsz;
3984 			rmax = stp->sd_wrq->q_next->q_maxpsz;
3985 			mutex_exit(QLOCK(stp->sd_wrq->q_next));
3986 
3987 			/* Do this processing here as a performance concern */
3988 			if (strmsgsz != 0) {
3989 				if (rmax == INFPSZ) {
3990 					rmax = strmsgsz;
3991 				} else  {
3992 					rmax = MIN(strmsgsz, rmax);
3993 				}
3994 			}
3995 
3996 			mutex_enter(QLOCK(wrq));
3997 			stp->sd_qn_minpsz = rmin;
3998 			stp->sd_qn_maxpsz = rmax;
3999 			mutex_exit(QLOCK(wrq));
4000 		}
4001 
4002 		/*
4003 		 * Need to update the anchor value if this module is
4004 		 * inserted below the anchor point.
4005 		 */
4006 		if (stp->sd_anchor != 0) {
4007 			pos = STRUCT_FGET(strmodinsert, pos);
4008 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4009 				stp->sd_anchor++;
4010 		}
4011 
4012 		strendplumb(stp);
4013 		mutex_exit(&stp->sd_lock);
4014 		return (0);
4015 	}
4016 
4017 	case _I_REMOVE:
4018 	{
4019 		/*
4020 		 * To remove a module with a given name in a stream.  The
4021 		 * caller of this ioctl needs to provide both the name and
4022 		 * the position of the module to be removed.  This eliminates
4023 		 * the ambiguity of removal if a module is inserted/pushed
4024 		 * multiple times in a stream.  In the first release, only
4025 		 * allow privileged user to use this ioctl.
4026 		 *
4027 		 * Note that we do not plan to support this ioctl
4028 		 * on pipes in the first release.  We want to learn more
4029 		 * about the implications of these ioctls before extending
4030 		 * their support.  And we do not think these features are
4031 		 * valuable for pipes.
4032 		 *
4033 		 * Neither do we support O/C hot stream.  Note that only
4034 		 * the upper streams of TCP/IP stack are O/C hot streams.
4035 		 * The lower IP stream is not.
4036 		 * When there is a O/C cold barrier we do not allow removal
4037 		 * below the barrier.
4038 		 *
4039 		 * Also note that _I_REMOVE cannot be used to remove a
4040 		 * driver or the stream head.
4041 		 */
4042 		STRUCT_DECL(strmodconf, strmodremove);
4043 		queue_t	*q;
4044 		int pos;
4045 		char mod_name[FMNAMESZ + 1];
4046 		boolean_t is_remove;
4047 
4048 		STRUCT_INIT(strmodremove, flag);
4049 		if (stp->sd_flag & STRHUP)
4050 			return (ENXIO);
4051 		if (STRMATED(stp))
4052 			return (EINVAL);
4053 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4054 			return (error);
4055 
4056 		error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4057 		    STRUCT_SIZE(strmodremove), copyflag);
4058 		if (error)
4059 			return (error);
4060 
4061 		error = (copyflag & U_TO_K ? copyinstr :
4062 		    copystr)(STRUCT_FGETP(strmodremove, mod_name),
4063 		    mod_name, FMNAMESZ + 1, NULL);
4064 		if (error)
4065 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4066 
4067 		if ((error = strstartplumb(stp, flag, cmd)) != 0)
4068 			return (error);
4069 
4070 		/*
4071 		 * Match the name of given module to the name of module at
4072 		 * the given position.
4073 		 */
4074 		pos = STRUCT_FGET(strmodremove, pos);
4075 
4076 		is_remove = (pos != 0);
4077 		for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4078 		    q = q->q_next, pos--)
4079 			;
4080 		if (pos > 0 || ! SAMESTR(q) ||
4081 		    strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name,
4082 		    strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) {
4083 			mutex_enter(&stp->sd_lock);
4084 			strendplumb(stp);
4085 			mutex_exit(&stp->sd_lock);
4086 			return (EINVAL);
4087 		}
4088 
4089 		ASSERT(!(q->q_flag & QREADR));
4090 		qdetach(_RD(q), 1, flag, crp, is_remove);
4091 
4092 		mutex_enter(&stp->sd_lock);
4093 
4094 		/*
4095 		 * As a performance concern we are caching the values of
4096 		 * q_minpsz and q_maxpsz of the module below the stream
4097 		 * head in the stream head.
4098 		 */
4099 		if (!is_remove) {
4100 			mutex_enter(QLOCK(wrq->q_next));
4101 			rmin = wrq->q_next->q_minpsz;
4102 			rmax = wrq->q_next->q_maxpsz;
4103 			mutex_exit(QLOCK(wrq->q_next));
4104 
4105 			/* Do this processing here as a performance concern */
4106 			if (strmsgsz != 0) {
4107 				if (rmax == INFPSZ)
4108 					rmax = strmsgsz;
4109 				else  {
4110 					if (vp->v_type == VFIFO)
4111 						rmax = MIN(PIPE_BUF, rmax);
4112 					else	rmax = MIN(strmsgsz, rmax);
4113 				}
4114 			}
4115 
4116 			mutex_enter(QLOCK(wrq));
4117 			stp->sd_qn_minpsz = rmin;
4118 			stp->sd_qn_maxpsz = rmax;
4119 			mutex_exit(QLOCK(wrq));
4120 		}
4121 
4122 		/*
4123 		 * Need to update the anchor value if this module is removed
4124 		 * at or below the anchor point.  If the removed module is at
4125 		 * the anchor point, remove the anchor for this stream if
4126 		 * there is no module above the anchor point.  Otherwise, if
4127 		 * the removed module is below the anchor point, decrement the
4128 		 * anchor point by 1.
4129 		 */
4130 		if (stp->sd_anchor != 0) {
4131 			pos = STRUCT_FGET(strmodremove, pos);
4132 			if (pos == 0)
4133 				stp->sd_anchor = 0;
4134 			else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4135 				stp->sd_anchor--;
4136 		}
4137 
4138 		strendplumb(stp);
4139 		mutex_exit(&stp->sd_lock);
4140 		return (0);
4141 	}
4142 
4143 	case I_ANCHOR:
4144 		/*
4145 		 * Set the anchor position on the stream to reside at
4146 		 * the top module (in other words, the top module
4147 		 * cannot be popped).  Anchors with a FIFO make no
4148 		 * obvious sense, so they're not allowed.
4149 		 */
4150 		mutex_enter(&stp->sd_lock);
4151 
4152 		if (stp->sd_vnode->v_type == VFIFO) {
4153 			mutex_exit(&stp->sd_lock);
4154 			return (EINVAL);
4155 		}
4156 
4157 		stp->sd_anchor = stp->sd_pushcnt;
4158 
4159 		mutex_exit(&stp->sd_lock);
4160 		return (0);
4161 
4162 	case I_LOOK:
4163 		/*
4164 		 * Get name of first module downstream.
4165 		 * If no module, return an error.
4166 		 */
4167 	    {
4168 		claimstr(wrq);
4169 		if (_SAMESTR(wrq) && wrq->q_next->q_next) {
4170 			char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname;
4171 			error = strcopyout(name, (void *)arg, strlen(name) + 1,
4172 			    copyflag);
4173 			releasestr(wrq);
4174 			return (error);
4175 		}
4176 		releasestr(wrq);
4177 		return (EINVAL);
4178 	    }
4179 
4180 	case I_LINK:
4181 	case I_PLINK:
4182 		/*
4183 		 * Link a multiplexor.
4184 		 */
4185 		return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4186 
4187 	case _I_PLINK_LH:
4188 		/*
4189 		 * Link a multiplexor: Call must originate from kernel.
4190 		 */
4191 		if (kioctl)
4192 			return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4193 
4194 		return (EINVAL);
4195 	case I_UNLINK:
4196 	case I_PUNLINK:
4197 		/*
4198 		 * Unlink a multiplexor.
4199 		 * If arg is -1, unlink all links for which this is the
4200 		 * controlling stream.  Otherwise, arg is an index number
4201 		 * for a link to be removed.
4202 		 */
4203 	    {
4204 		struct linkinfo *linkp;
4205 		int native_arg = (int)arg;
4206 		int type;
4207 
4208 		TRACE_1(TR_FAC_STREAMS_FR,
4209 			TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4210 		if (vp->v_type == VFIFO) {
4211 			return (EINVAL);
4212 		}
4213 		if (cmd == I_UNLINK)
4214 			type = LINKNORMAL;
4215 		else	/* I_PUNLINK */
4216 			type = LINKPERSIST;
4217 		if (native_arg == 0) {
4218 			return (EINVAL);
4219 		}
4220 		if (native_arg == MUXID_ALL)
4221 			error = munlinkall(stp, type, crp, rvalp);
4222 		else {
4223 			mutex_enter(&muxifier);
4224 			if (!(linkp = findlinks(stp, (int)arg, type))) {
4225 				/* invalid user supplied index number */
4226 				mutex_exit(&muxifier);
4227 				return (EINVAL);
4228 			}
4229 			/* munlink drops the muxifier lock */
4230 			error = munlink(stp, linkp, type, crp, rvalp);
4231 		}
4232 		return (error);
4233 	    }
4234 
4235 	case I_FLUSH:
4236 		/*
4237 		 * send a flush message downstream
4238 		 * flush message can indicate
4239 		 * FLUSHR - flush read queue
4240 		 * FLUSHW - flush write queue
4241 		 * FLUSHRW - flush read/write queue
4242 		 */
4243 		if (stp->sd_flag & STRHUP)
4244 			return (ENXIO);
4245 		if (arg & ~FLUSHRW)
4246 			return (EINVAL);
4247 
4248 		for (;;) {
4249 			if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4250 				break;
4251 			}
4252 			if (error = strwaitbuf(1, BPRI_HI)) {
4253 				return (error);
4254 			}
4255 		}
4256 
4257 		/*
4258 		 * Send down an unsupported ioctl and wait for the nack
4259 		 * in order to allow the M_FLUSH to propagate back
4260 		 * up to the stream head.
4261 		 * Replaces if (qready()) runqueues();
4262 		 */
4263 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4264 		strioc.ic_timout = 0;
4265 		strioc.ic_len = 0;
4266 		strioc.ic_dp = NULL;
4267 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4268 		*rvalp = 0;
4269 		return (0);
4270 
4271 	case I_FLUSHBAND:
4272 	    {
4273 		struct bandinfo binfo;
4274 
4275 		error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4276 		    copyflag);
4277 		if (error)
4278 			return (error);
4279 		if (stp->sd_flag & STRHUP)
4280 			return (ENXIO);
4281 		if (binfo.bi_flag & ~FLUSHRW)
4282 			return (EINVAL);
4283 		while (!(mp = allocb(2, BPRI_HI))) {
4284 			if (error = strwaitbuf(2, BPRI_HI))
4285 				return (error);
4286 		}
4287 		mp->b_datap->db_type = M_FLUSH;
4288 		*mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4289 		*mp->b_wptr++ = binfo.bi_pri;
4290 		putnext(stp->sd_wrq, mp);
4291 		/*
4292 		 * Send down an unsupported ioctl and wait for the nack
4293 		 * in order to allow the M_FLUSH to propagate back
4294 		 * up to the stream head.
4295 		 * Replaces if (qready()) runqueues();
4296 		 */
4297 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4298 		strioc.ic_timout = 0;
4299 		strioc.ic_len = 0;
4300 		strioc.ic_dp = NULL;
4301 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4302 		*rvalp = 0;
4303 		return (0);
4304 	    }
4305 
4306 	case I_SRDOPT:
4307 		/*
4308 		 * Set read options
4309 		 *
4310 		 * RNORM - default stream mode
4311 		 * RMSGN - message no discard
4312 		 * RMSGD - message discard
4313 		 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4314 		 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4315 		 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4316 		 */
4317 		if (arg & ~(RMODEMASK | RPROTMASK))
4318 			return (EINVAL);
4319 
4320 		if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4321 			return (EINVAL);
4322 
4323 		mutex_enter(&stp->sd_lock);
4324 		switch (arg & RMODEMASK) {
4325 		case RNORM:
4326 			stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4327 			break;
4328 		case RMSGD:
4329 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4330 			    RD_MSGDIS;
4331 			break;
4332 		case RMSGN:
4333 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4334 			    RD_MSGNODIS;
4335 			break;
4336 		}
4337 
4338 		switch (arg & RPROTMASK) {
4339 		case RPROTNORM:
4340 			stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4341 			break;
4342 
4343 		case RPROTDAT:
4344 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4345 			    RD_PROTDAT);
4346 			break;
4347 
4348 		case RPROTDIS:
4349 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4350 			    RD_PROTDIS);
4351 			break;
4352 		}
4353 		mutex_exit(&stp->sd_lock);
4354 		return (0);
4355 
4356 	case I_GRDOPT:
4357 		/*
4358 		 * Get read option and return the value
4359 		 * to spot pointed to by arg
4360 		 */
4361 	    {
4362 		int rdopt;
4363 
4364 		rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4365 		    ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4366 		rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4367 		    ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4368 
4369 		return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4370 		    copyflag));
4371 	    }
4372 
4373 	case I_SERROPT:
4374 		/*
4375 		 * Set error options
4376 		 *
4377 		 * RERRNORM - persistent read errors
4378 		 * RERRNONPERSIST - non-persistent read errors
4379 		 * WERRNORM - persistent write errors
4380 		 * WERRNONPERSIST - non-persistent write errors
4381 		 */
4382 		if (arg & ~(RERRMASK | WERRMASK))
4383 			return (EINVAL);
4384 
4385 		mutex_enter(&stp->sd_lock);
4386 		switch (arg & RERRMASK) {
4387 		case RERRNORM:
4388 			stp->sd_flag &= ~STRDERRNONPERSIST;
4389 			break;
4390 		case RERRNONPERSIST:
4391 			stp->sd_flag |= STRDERRNONPERSIST;
4392 			break;
4393 		}
4394 		switch (arg & WERRMASK) {
4395 		case WERRNORM:
4396 			stp->sd_flag &= ~STWRERRNONPERSIST;
4397 			break;
4398 		case WERRNONPERSIST:
4399 			stp->sd_flag |= STWRERRNONPERSIST;
4400 			break;
4401 		}
4402 		mutex_exit(&stp->sd_lock);
4403 		return (0);
4404 
4405 	case I_GERROPT:
4406 		/*
4407 		 * Get error option and return the value
4408 		 * to spot pointed to by arg
4409 		 */
4410 	    {
4411 		int erropt = 0;
4412 
4413 		erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4414 			RERRNORM;
4415 		erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4416 			WERRNORM;
4417 		return (strcopyout(&erropt, (void *)arg, sizeof (int),
4418 		    copyflag));
4419 	    }
4420 
4421 	case I_SETSIG:
4422 		/*
4423 		 * Register the calling proc to receive the SIGPOLL
4424 		 * signal based on the events given in arg.  If
4425 		 * arg is zero, remove the proc from register list.
4426 		 */
4427 	    {
4428 		strsig_t *ssp, *pssp;
4429 		struct pid *pidp;
4430 
4431 		pssp = NULL;
4432 		pidp = curproc->p_pidp;
4433 		/*
4434 		 * Hold sd_lock to prevent traversal of sd_siglist while
4435 		 * it is modified.
4436 		 */
4437 		mutex_enter(&stp->sd_lock);
4438 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4439 			pssp = ssp, ssp = ssp->ss_next)
4440 			;
4441 
4442 		if (arg) {
4443 			if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4444 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4445 				mutex_exit(&stp->sd_lock);
4446 				return (EINVAL);
4447 			}
4448 			if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4449 				mutex_exit(&stp->sd_lock);
4450 				return (EINVAL);
4451 			}
4452 
4453 			/*
4454 			 * If proc not already registered, add it
4455 			 * to list.
4456 			 */
4457 			if (!ssp) {
4458 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4459 				ssp->ss_pidp = pidp;
4460 				ssp->ss_pid = pidp->pid_id;
4461 				ssp->ss_next = NULL;
4462 				if (pssp)
4463 					pssp->ss_next = ssp;
4464 				else
4465 					stp->sd_siglist = ssp;
4466 				mutex_enter(&pidlock);
4467 				PID_HOLD(pidp);
4468 				mutex_exit(&pidlock);
4469 			}
4470 
4471 			/*
4472 			 * Set events.
4473 			 */
4474 			ssp->ss_events = (int)arg;
4475 		} else {
4476 			/*
4477 			 * Remove proc from register list.
4478 			 */
4479 			if (ssp) {
4480 				mutex_enter(&pidlock);
4481 				PID_RELE(pidp);
4482 				mutex_exit(&pidlock);
4483 				if (pssp)
4484 					pssp->ss_next = ssp->ss_next;
4485 				else
4486 					stp->sd_siglist = ssp->ss_next;
4487 				kmem_free(ssp, sizeof (strsig_t));
4488 			} else {
4489 				mutex_exit(&stp->sd_lock);
4490 				return (EINVAL);
4491 			}
4492 		}
4493 
4494 		/*
4495 		 * Recalculate OR of sig events.
4496 		 */
4497 		stp->sd_sigflags = 0;
4498 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4499 			stp->sd_sigflags |= ssp->ss_events;
4500 		mutex_exit(&stp->sd_lock);
4501 		return (0);
4502 	    }
4503 
4504 	case I_GETSIG:
4505 		/*
4506 		 * Return (in arg) the current registration of events
4507 		 * for which the calling proc is to be signaled.
4508 		 */
4509 	    {
4510 		struct strsig *ssp;
4511 		struct pid  *pidp;
4512 
4513 		pidp = curproc->p_pidp;
4514 		mutex_enter(&stp->sd_lock);
4515 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4516 			if (ssp->ss_pidp == pidp) {
4517 				error = strcopyout(&ssp->ss_events, (void *)arg,
4518 				    sizeof (int), copyflag);
4519 				mutex_exit(&stp->sd_lock);
4520 				return (error);
4521 			}
4522 		mutex_exit(&stp->sd_lock);
4523 		return (EINVAL);
4524 	    }
4525 
4526 	case I_ESETSIG:
4527 		/*
4528 		 * Register the ss_pid to receive the SIGPOLL
4529 		 * signal based on the events is ss_events arg.  If
4530 		 * ss_events is zero, remove the proc from register list.
4531 		 */
4532 	{
4533 		struct strsig *ssp, *pssp;
4534 		struct proc *proc;
4535 		struct pid  *pidp;
4536 		pid_t pid;
4537 		struct strsigset ss;
4538 
4539 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4540 		if (error)
4541 			return (error);
4542 
4543 		pid = ss.ss_pid;
4544 
4545 		if (ss.ss_events != 0) {
4546 			/*
4547 			 * Permissions check by sending signal 0.
4548 			 * Note that when kill fails it does a set_errno
4549 			 * causing the system call to fail.
4550 			 */
4551 			error = kill(pid, 0);
4552 			if (error) {
4553 				return (error);
4554 			}
4555 		}
4556 		mutex_enter(&pidlock);
4557 		if (pid == 0)
4558 			proc = curproc;
4559 		else if (pid < 0)
4560 			proc = pgfind(-pid);
4561 		else
4562 			proc = prfind(pid);
4563 		if (proc == NULL) {
4564 			mutex_exit(&pidlock);
4565 			return (ESRCH);
4566 		}
4567 		if (pid < 0)
4568 			pidp = proc->p_pgidp;
4569 		else
4570 			pidp = proc->p_pidp;
4571 		ASSERT(pidp);
4572 		/*
4573 		 * Get a hold on the pid structure while referencing it.
4574 		 * There is a separate PID_HOLD should it be inserted
4575 		 * in the list below.
4576 		 */
4577 		PID_HOLD(pidp);
4578 		mutex_exit(&pidlock);
4579 
4580 		pssp = NULL;
4581 		/*
4582 		 * Hold sd_lock to prevent traversal of sd_siglist while
4583 		 * it is modified.
4584 		 */
4585 		mutex_enter(&stp->sd_lock);
4586 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4587 				pssp = ssp, ssp = ssp->ss_next)
4588 			;
4589 
4590 		if (ss.ss_events) {
4591 			if (ss.ss_events &
4592 			    ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4593 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4594 				mutex_exit(&stp->sd_lock);
4595 				mutex_enter(&pidlock);
4596 				PID_RELE(pidp);
4597 				mutex_exit(&pidlock);
4598 				return (EINVAL);
4599 			}
4600 			if ((ss.ss_events & S_BANDURG) &&
4601 			    !(ss.ss_events & S_RDBAND)) {
4602 				mutex_exit(&stp->sd_lock);
4603 				mutex_enter(&pidlock);
4604 				PID_RELE(pidp);
4605 				mutex_exit(&pidlock);
4606 				return (EINVAL);
4607 			}
4608 
4609 			/*
4610 			 * If proc not already registered, add it
4611 			 * to list.
4612 			 */
4613 			if (!ssp) {
4614 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4615 				ssp->ss_pidp = pidp;
4616 				ssp->ss_pid = pid;
4617 				ssp->ss_next = NULL;
4618 				if (pssp)
4619 					pssp->ss_next = ssp;
4620 				else
4621 					stp->sd_siglist = ssp;
4622 				mutex_enter(&pidlock);
4623 				PID_HOLD(pidp);
4624 				mutex_exit(&pidlock);
4625 			}
4626 
4627 			/*
4628 			 * Set events.
4629 			 */
4630 			ssp->ss_events = ss.ss_events;
4631 		} else {
4632 			/*
4633 			 * Remove proc from register list.
4634 			 */
4635 			if (ssp) {
4636 				mutex_enter(&pidlock);
4637 				PID_RELE(pidp);
4638 				mutex_exit(&pidlock);
4639 				if (pssp)
4640 					pssp->ss_next = ssp->ss_next;
4641 				else
4642 					stp->sd_siglist = ssp->ss_next;
4643 				kmem_free(ssp, sizeof (strsig_t));
4644 			} else {
4645 				mutex_exit(&stp->sd_lock);
4646 				mutex_enter(&pidlock);
4647 				PID_RELE(pidp);
4648 				mutex_exit(&pidlock);
4649 				return (EINVAL);
4650 			}
4651 		}
4652 
4653 		/*
4654 		 * Recalculate OR of sig events.
4655 		 */
4656 		stp->sd_sigflags = 0;
4657 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4658 			stp->sd_sigflags |= ssp->ss_events;
4659 		mutex_exit(&stp->sd_lock);
4660 		mutex_enter(&pidlock);
4661 		PID_RELE(pidp);
4662 		mutex_exit(&pidlock);
4663 		return (0);
4664 	    }
4665 
4666 	case I_EGETSIG:
4667 		/*
4668 		 * Return (in arg) the current registration of events
4669 		 * for which the calling proc is to be signaled.
4670 		 */
4671 	    {
4672 		struct strsig *ssp;
4673 		struct proc *proc;
4674 		pid_t pid;
4675 		struct pid  *pidp;
4676 		struct strsigset ss;
4677 
4678 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4679 		if (error)
4680 			return (error);
4681 
4682 		pid = ss.ss_pid;
4683 		mutex_enter(&pidlock);
4684 		if (pid == 0)
4685 			proc = curproc;
4686 		else if (pid < 0)
4687 			proc = pgfind(-pid);
4688 		else
4689 			proc = prfind(pid);
4690 		if (proc == NULL) {
4691 			mutex_exit(&pidlock);
4692 			return (ESRCH);
4693 		}
4694 		if (pid < 0)
4695 			pidp = proc->p_pgidp;
4696 		else
4697 			pidp = proc->p_pidp;
4698 
4699 		/* Prevent the pidp from being reassigned */
4700 		PID_HOLD(pidp);
4701 		mutex_exit(&pidlock);
4702 
4703 		mutex_enter(&stp->sd_lock);
4704 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4705 			if (ssp->ss_pid == pid) {
4706 				ss.ss_pid = ssp->ss_pid;
4707 				ss.ss_events = ssp->ss_events;
4708 				error = strcopyout(&ss, (void *)arg,
4709 				    sizeof (struct strsigset), copyflag);
4710 				mutex_exit(&stp->sd_lock);
4711 				mutex_enter(&pidlock);
4712 				PID_RELE(pidp);
4713 				mutex_exit(&pidlock);
4714 				return (error);
4715 			}
4716 		mutex_exit(&stp->sd_lock);
4717 		mutex_enter(&pidlock);
4718 		PID_RELE(pidp);
4719 		mutex_exit(&pidlock);
4720 		return (EINVAL);
4721 	    }
4722 
4723 	case I_PEEK:
4724 	    {
4725 		STRUCT_DECL(strpeek, strpeek);
4726 		size_t n;
4727 		mblk_t *fmp, *tmp_mp = NULL;
4728 
4729 		STRUCT_INIT(strpeek, flag);
4730 
4731 		error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4732 		    STRUCT_SIZE(strpeek), copyflag);
4733 		if (error)
4734 			return (error);
4735 
4736 		mutex_enter(QLOCK(rdq));
4737 		/*
4738 		 * Skip the invalid messages
4739 		 */
4740 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4741 			if (mp->b_datap->db_type != M_SIG)
4742 				break;
4743 
4744 		/*
4745 		 * If user has requested to peek at a high priority message
4746 		 * and first message is not, return 0
4747 		 */
4748 		if (mp != NULL) {
4749 			if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
4750 			    queclass(mp) == QNORM) {
4751 				*rvalp = 0;
4752 				mutex_exit(QLOCK(rdq));
4753 				return (0);
4754 			}
4755 		} else if (stp->sd_struiordq == NULL ||
4756 		    (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
4757 			/*
4758 			 * No mblks to look at at the streamhead and
4759 			 * 1). This isn't a synch stream or
4760 			 * 2). This is a synch stream but caller wants high
4761 			 *	priority messages which is not supported by
4762 			 *	the synch stream. (it only supports QNORM)
4763 			 */
4764 			*rvalp = 0;
4765 			mutex_exit(QLOCK(rdq));
4766 			return (0);
4767 		}
4768 
4769 		fmp = mp;
4770 
4771 		if (mp && mp->b_datap->db_type == M_PASSFP) {
4772 			mutex_exit(QLOCK(rdq));
4773 			return (EBADMSG);
4774 		}
4775 
4776 		ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
4777 		    mp->b_datap->db_type == M_PROTO ||
4778 		    mp->b_datap->db_type == M_DATA);
4779 
4780 		if (mp && mp->b_datap->db_type == M_PCPROTO) {
4781 			STRUCT_FSET(strpeek, flags, RS_HIPRI);
4782 		} else {
4783 			STRUCT_FSET(strpeek, flags, 0);
4784 		}
4785 
4786 
4787 		if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
4788 			mutex_exit(QLOCK(rdq));
4789 			return (ENOSR);
4790 		}
4791 		mutex_exit(QLOCK(rdq));
4792 
4793 		/*
4794 		 * set mp = tmp_mp, so that I_PEEK processing can continue.
4795 		 * tmp_mp is used to free the dup'd message.
4796 		 */
4797 		mp = tmp_mp;
4798 
4799 		uio.uio_fmode = 0;
4800 		uio.uio_extflg = UIO_COPY_CACHED;
4801 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
4802 		    UIO_SYSSPACE;
4803 		uio.uio_limit = 0;
4804 		/*
4805 		 * First process PROTO blocks, if any.
4806 		 * If user doesn't want to get ctl info by setting maxlen <= 0,
4807 		 * then set len to -1/0 and skip control blocks part.
4808 		 */
4809 		if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
4810 			STRUCT_FSET(strpeek, ctlbuf.len, -1);
4811 		else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
4812 			STRUCT_FSET(strpeek, ctlbuf.len, 0);
4813 		else {
4814 			int	ctl_part = 0;
4815 
4816 			iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
4817 			iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
4818 			uio.uio_iov = &iov;
4819 			uio.uio_resid = iov.iov_len;
4820 			uio.uio_loffset = 0;
4821 			uio.uio_iovcnt = 1;
4822 			while (mp && mp->b_datap->db_type != M_DATA &&
4823 			    uio.uio_resid >= 0) {
4824 				ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
4825 				    mp->b_datap->db_type == M_PROTO :
4826 				    mp->b_datap->db_type == M_PCPROTO);
4827 
4828 				if ((n = MIN(uio.uio_resid,
4829 				    mp->b_wptr - mp->b_rptr)) != 0 &&
4830 				    (error = uiomove((char *)mp->b_rptr, n,
4831 				    UIO_READ, &uio)) != 0) {
4832 					freemsg(tmp_mp);
4833 					return (error);
4834 				}
4835 				ctl_part = 1;
4836 				mp = mp->b_cont;
4837 			}
4838 			/* No ctl message */
4839 			if (ctl_part == 0)
4840 				STRUCT_FSET(strpeek, ctlbuf.len, -1);
4841 			else
4842 				STRUCT_FSET(strpeek, ctlbuf.len,
4843 				    STRUCT_FGET(strpeek, ctlbuf.maxlen) -
4844 				    uio.uio_resid);
4845 		}
4846 
4847 		/*
4848 		 * Now process DATA blocks, if any.
4849 		 * If user doesn't want to get data info by setting maxlen <= 0,
4850 		 * then set len to -1/0 and skip data blocks part.
4851 		 */
4852 		if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
4853 			STRUCT_FSET(strpeek, databuf.len, -1);
4854 		else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
4855 			STRUCT_FSET(strpeek, databuf.len, 0);
4856 		else {
4857 			int	data_part = 0;
4858 
4859 			iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
4860 			iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
4861 			uio.uio_iov = &iov;
4862 			uio.uio_resid = iov.iov_len;
4863 			uio.uio_loffset = 0;
4864 			uio.uio_iovcnt = 1;
4865 			while (mp && uio.uio_resid) {
4866 				if (mp->b_datap->db_type == M_DATA) {
4867 					if ((n = MIN(uio.uio_resid,
4868 					    mp->b_wptr - mp->b_rptr)) != 0 &&
4869 					    (error = uiomove((char *)mp->b_rptr,
4870 						n, UIO_READ, &uio)) != 0) {
4871 						freemsg(tmp_mp);
4872 						return (error);
4873 					}
4874 					data_part = 1;
4875 				}
4876 				ASSERT(data_part == 0 ||
4877 				    mp->b_datap->db_type == M_DATA);
4878 				mp = mp->b_cont;
4879 			}
4880 			/* No data message */
4881 			if (data_part == 0)
4882 				STRUCT_FSET(strpeek, databuf.len, -1);
4883 			else
4884 				STRUCT_FSET(strpeek, databuf.len,
4885 				    STRUCT_FGET(strpeek, databuf.maxlen) -
4886 				    uio.uio_resid);
4887 		}
4888 		freemsg(tmp_mp);
4889 
4890 		/*
4891 		 * It is a synch stream and user wants to get
4892 		 * data (maxlen > 0).
4893 		 * uio setup is done by the codes that process DATA
4894 		 * blocks above.
4895 		 */
4896 		if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
4897 			infod_t infod;
4898 
4899 			infod.d_cmd = INFOD_COPYOUT;
4900 			infod.d_res = 0;
4901 			infod.d_uiop = &uio;
4902 			error = infonext(rdq, &infod);
4903 			if (error == EINVAL || error == EBUSY)
4904 				error = 0;
4905 			if (error)
4906 				return (error);
4907 			STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
4908 			    databuf.maxlen) - uio.uio_resid);
4909 			if (STRUCT_FGET(strpeek, databuf.len) == 0) {
4910 				/*
4911 				 * No data found by the infonext().
4912 				 */
4913 				STRUCT_FSET(strpeek, databuf.len, -1);
4914 			}
4915 		}
4916 		error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
4917 		    STRUCT_SIZE(strpeek), copyflag);
4918 		if (error) {
4919 			return (error);
4920 		}
4921 		/*
4922 		 * If there is no message retrieved, set return code to 0
4923 		 * otherwise, set it to 1.
4924 		 */
4925 		if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
4926 		    STRUCT_FGET(strpeek, databuf.len) == -1)
4927 			*rvalp = 0;
4928 		else
4929 			*rvalp = 1;
4930 		return (0);
4931 	    }
4932 
4933 	case I_FDINSERT:
4934 	    {
4935 		STRUCT_DECL(strfdinsert, strfdinsert);
4936 		struct file *resftp;
4937 		struct stdata *resstp;
4938 		t_uscalar_t	ival;
4939 		ssize_t msgsize;
4940 		struct strbuf mctl;
4941 
4942 		STRUCT_INIT(strfdinsert, flag);
4943 		if (stp->sd_flag & STRHUP)
4944 			return (ENXIO);
4945 		/*
4946 		 * STRDERR, STWRERR and STPLEX tested above.
4947 		 */
4948 		error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
4949 		    STRUCT_SIZE(strfdinsert), copyflag);
4950 		if (error)
4951 			return (error);
4952 
4953 		if (STRUCT_FGET(strfdinsert, offset) < 0 ||
4954 		    (STRUCT_FGET(strfdinsert, offset) %
4955 		    sizeof (t_uscalar_t)) != 0)
4956 			return (EINVAL);
4957 		if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
4958 			if ((resstp = resftp->f_vnode->v_stream) == NULL) {
4959 				releasef(STRUCT_FGET(strfdinsert, fildes));
4960 				return (EINVAL);
4961 			}
4962 		} else
4963 			return (EINVAL);
4964 
4965 		mutex_enter(&resstp->sd_lock);
4966 		if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
4967 			error = strgeterr(resstp,
4968 					STRDERR|STWRERR|STRHUP|STPLEX, 0);
4969 			if (error != 0) {
4970 				mutex_exit(&resstp->sd_lock);
4971 				releasef(STRUCT_FGET(strfdinsert, fildes));
4972 				return (error);
4973 			}
4974 		}
4975 		mutex_exit(&resstp->sd_lock);
4976 
4977 #ifdef	_ILP32
4978 		{
4979 			queue_t	*q;
4980 			queue_t	*mate = NULL;
4981 
4982 			/* get read queue of stream terminus */
4983 			claimstr(resstp->sd_wrq);
4984 			for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
4985 			    q = q->q_next)
4986 				if (!STRMATED(resstp) && STREAM(q) != resstp &&
4987 				    mate == NULL) {
4988 					ASSERT(q->q_qinfo->qi_srvp);
4989 					ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
4990 					claimstr(q);
4991 					mate = q;
4992 				}
4993 			q = _RD(q);
4994 			if (mate)
4995 				releasestr(mate);
4996 			releasestr(resstp->sd_wrq);
4997 			ival = (t_uscalar_t)q;
4998 		}
4999 #else
5000 		ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5001 #endif	/* _ILP32 */
5002 
5003 		if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5004 		    STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5005 			releasef(STRUCT_FGET(strfdinsert, fildes));
5006 			return (EINVAL);
5007 		}
5008 
5009 		/*
5010 		 * Check for legal flag value.
5011 		 */
5012 		if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5013 			releasef(STRUCT_FGET(strfdinsert, fildes));
5014 			return (EINVAL);
5015 		}
5016 
5017 		/* get these values from those cached in the stream head */
5018 		mutex_enter(QLOCK(stp->sd_wrq));
5019 		rmin = stp->sd_qn_minpsz;
5020 		rmax = stp->sd_qn_maxpsz;
5021 		mutex_exit(QLOCK(stp->sd_wrq));
5022 
5023 		/*
5024 		 * Make sure ctl and data sizes together fall within
5025 		 * the limits of the max and min receive packet sizes
5026 		 * and do not exceed system limit.  A negative data
5027 		 * length means that no data part is to be sent.
5028 		 */
5029 		ASSERT((rmax >= 0) || (rmax == INFPSZ));
5030 		if (rmax == 0) {
5031 			releasef(STRUCT_FGET(strfdinsert, fildes));
5032 			return (ERANGE);
5033 		}
5034 		if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5035 			msgsize = 0;
5036 		if ((msgsize < rmin) ||
5037 		    ((msgsize > rmax) && (rmax != INFPSZ)) ||
5038 		    (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5039 			releasef(STRUCT_FGET(strfdinsert, fildes));
5040 			return (ERANGE);
5041 		}
5042 
5043 		mutex_enter(&stp->sd_lock);
5044 		while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5045 		    !canputnext(stp->sd_wrq)) {
5046 			if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5047 			    flag, -1, &done)) != 0 || done) {
5048 				mutex_exit(&stp->sd_lock);
5049 				releasef(STRUCT_FGET(strfdinsert, fildes));
5050 				return (error);
5051 			}
5052 			if (stp->sd_sidp != NULL &&
5053 			    stp->sd_vnode->v_type != VFIFO) {
5054 				mutex_exit(&stp->sd_lock);
5055 				if (error = straccess(stp, access)) {
5056 					releasef(
5057 					    STRUCT_FGET(strfdinsert, fildes));
5058 					return (error);
5059 				}
5060 				mutex_enter(&stp->sd_lock);
5061 			}
5062 		}
5063 		mutex_exit(&stp->sd_lock);
5064 
5065 		/*
5066 		 * Copy strfdinsert.ctlbuf into native form of
5067 		 * ctlbuf to pass down into strmakemsg().
5068 		 */
5069 		mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5070 		mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5071 		mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5072 
5073 		iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5074 		iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5075 		uio.uio_iov = &iov;
5076 		uio.uio_iovcnt = 1;
5077 		uio.uio_loffset = 0;
5078 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5079 		    UIO_SYSSPACE;
5080 		uio.uio_fmode = 0;
5081 		uio.uio_extflg = UIO_COPY_CACHED;
5082 		uio.uio_resid = iov.iov_len;
5083 		if ((error = strmakemsg(&mctl,
5084 		    &msgsize, &uio, stp,
5085 		    STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5086 			STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5087 			releasef(STRUCT_FGET(strfdinsert, fildes));
5088 			return (error);
5089 		}
5090 
5091 		STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5092 
5093 		/*
5094 		 * Place the possibly reencoded queue pointer 'offset' bytes
5095 		 * from the start of the control portion of the message.
5096 		 */
5097 		*((t_uscalar_t *)(mp->b_rptr +
5098 		    STRUCT_FGET(strfdinsert, offset))) = ival;
5099 
5100 		/*
5101 		 * Put message downstream.
5102 		 */
5103 		stream_willservice(stp);
5104 		putnext(stp->sd_wrq, mp);
5105 		stream_runservice(stp);
5106 		releasef(STRUCT_FGET(strfdinsert, fildes));
5107 		return (error);
5108 	    }
5109 
5110 	case I_SENDFD:
5111 	    {
5112 		struct file *fp;
5113 
5114 		if ((fp = getf((int)arg)) == NULL)
5115 			return (EBADF);
5116 		error = do_sendfp(stp, fp, crp);
5117 #ifdef C2_AUDIT
5118 		if (audit_active) {
5119 			audit_fdsend((int)arg, fp, error);
5120 		}
5121 #endif
5122 		releasef((int)arg);
5123 		return (error);
5124 	    }
5125 
5126 	case I_RECVFD:
5127 	case I_E_RECVFD:
5128 	    {
5129 		struct k_strrecvfd *srf;
5130 		int i, fd;
5131 
5132 		mutex_enter(&stp->sd_lock);
5133 		while (!(mp = getq(rdq))) {
5134 			if (stp->sd_flag & (STRHUP|STREOF)) {
5135 				mutex_exit(&stp->sd_lock);
5136 				return (ENXIO);
5137 			}
5138 			if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5139 			    flag, -1, &done)) != 0 || done) {
5140 				mutex_exit(&stp->sd_lock);
5141 				return (error);
5142 			}
5143 			if (stp->sd_sidp != NULL &&
5144 			    stp->sd_vnode->v_type != VFIFO) {
5145 				mutex_exit(&stp->sd_lock);
5146 				if (error = straccess(stp, access))
5147 					return (error);
5148 				mutex_enter(&stp->sd_lock);
5149 			}
5150 		}
5151 		if (mp->b_datap->db_type != M_PASSFP) {
5152 			putback(stp, rdq, mp, mp->b_band);
5153 			mutex_exit(&stp->sd_lock);
5154 			return (EBADMSG);
5155 		}
5156 		mutex_exit(&stp->sd_lock);
5157 
5158 		srf = (struct k_strrecvfd *)mp->b_rptr;
5159 		if ((fd = ufalloc(0)) == -1) {
5160 			mutex_enter(&stp->sd_lock);
5161 			putback(stp, rdq, mp, mp->b_band);
5162 			mutex_exit(&stp->sd_lock);
5163 			return (EMFILE);
5164 		}
5165 		if (cmd == I_RECVFD) {
5166 			struct o_strrecvfd	ostrfd;
5167 
5168 			/* check to see if uid/gid values are too large. */
5169 
5170 			if (srf->uid > (o_uid_t)USHRT_MAX ||
5171 			    srf->gid > (o_gid_t)USHRT_MAX) {
5172 				mutex_enter(&stp->sd_lock);
5173 				putback(stp, rdq, mp, mp->b_band);
5174 				mutex_exit(&stp->sd_lock);
5175 				setf(fd, NULL);	/* release fd entry */
5176 				return (EOVERFLOW);
5177 			}
5178 
5179 			ostrfd.fd = fd;
5180 			ostrfd.uid = (o_uid_t)srf->uid;
5181 			ostrfd.gid = (o_gid_t)srf->gid;
5182 
5183 			/* Null the filler bits */
5184 			for (i = 0; i < 8; i++)
5185 				ostrfd.fill[i] = 0;
5186 
5187 			error = strcopyout(&ostrfd, (void *)arg,
5188 			    sizeof (struct o_strrecvfd), copyflag);
5189 		} else {		/* I_E_RECVFD */
5190 			struct strrecvfd	strfd;
5191 
5192 			strfd.fd = fd;
5193 			strfd.uid = srf->uid;
5194 			strfd.gid = srf->gid;
5195 
5196 			/* null the filler bits */
5197 			for (i = 0; i < 8; i++)
5198 				strfd.fill[i] = 0;
5199 
5200 			error = strcopyout(&strfd, (void *)arg,
5201 			    sizeof (struct strrecvfd), copyflag);
5202 		}
5203 
5204 		if (error) {
5205 			setf(fd, NULL);	/* release fd entry */
5206 			mutex_enter(&stp->sd_lock);
5207 			putback(stp, rdq, mp, mp->b_band);
5208 			mutex_exit(&stp->sd_lock);
5209 			return (error);
5210 		}
5211 #ifdef C2_AUDIT
5212 		if (audit_active) {
5213 			audit_fdrecv(fd, srf->fp);
5214 		}
5215 #endif
5216 
5217 		/*
5218 		 * Always increment f_count since the freemsg() below will
5219 		 * always call free_passfp() which performs a closef().
5220 		 */
5221 		mutex_enter(&srf->fp->f_tlock);
5222 		srf->fp->f_count++;
5223 		mutex_exit(&srf->fp->f_tlock);
5224 		setf(fd, srf->fp);
5225 		freemsg(mp);
5226 		return (0);
5227 	    }
5228 
5229 	case I_SWROPT:
5230 		/*
5231 		 * Set/clear the write options. arg is a bit
5232 		 * mask with any of the following bits set...
5233 		 * 	SNDZERO - send zero length message
5234 		 *	SNDPIPE - send sigpipe to process if
5235 		 *		sd_werror is set and process is
5236 		 *		doing a write or putmsg.
5237 		 * The new stream head write options should reflect
5238 		 * what is in arg.
5239 		 */
5240 		if (arg & ~(SNDZERO|SNDPIPE))
5241 			return (EINVAL);
5242 
5243 		mutex_enter(&stp->sd_lock);
5244 		stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5245 		if (arg & SNDZERO)
5246 			stp->sd_wput_opt |= SW_SNDZERO;
5247 		if (arg & SNDPIPE)
5248 			stp->sd_wput_opt |= SW_SIGPIPE;
5249 		mutex_exit(&stp->sd_lock);
5250 		return (0);
5251 
5252 	case I_GWROPT:
5253 	    {
5254 		int wropt = 0;
5255 
5256 		if (stp->sd_wput_opt & SW_SNDZERO)
5257 			wropt |= SNDZERO;
5258 		if (stp->sd_wput_opt & SW_SIGPIPE)
5259 			wropt |= SNDPIPE;
5260 		return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5261 		    copyflag));
5262 	    }
5263 
5264 	case I_LIST:
5265 		/*
5266 		 * Returns all the modules found on this stream,
5267 		 * upto the driver. If argument is NULL, return the
5268 		 * number of modules (including driver). If argument
5269 		 * is not NULL, copy the names into the structure
5270 		 * provided.
5271 		 */
5272 
5273 	    {
5274 		queue_t *q;
5275 		int num_modules, space_allocated;
5276 		STRUCT_DECL(str_list, strlist);
5277 		struct str_mlist *mlist_ptr;
5278 
5279 		if (arg == NULL) { /* Return number of modules plus driver */
5280 			q = stp->sd_wrq;
5281 			if (stp->sd_vnode->v_type == VFIFO) {
5282 				*rvalp = stp->sd_pushcnt;
5283 			} else {
5284 				*rvalp = stp->sd_pushcnt + 1;
5285 			}
5286 		} else {
5287 			STRUCT_INIT(strlist, flag);
5288 
5289 			error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5290 			    STRUCT_SIZE(strlist), copyflag);
5291 			if (error)
5292 				return (error);
5293 
5294 			space_allocated = STRUCT_FGET(strlist, sl_nmods);
5295 			if ((space_allocated) <= 0)
5296 				return (EINVAL);
5297 			claimstr(stp->sd_wrq);
5298 			q = stp->sd_wrq;
5299 			num_modules = 0;
5300 			while (_SAMESTR(q) && (space_allocated != 0)) {
5301 				char *name =
5302 				    q->q_next->q_qinfo->qi_minfo->mi_idname;
5303 
5304 				mlist_ptr = STRUCT_FGETP(strlist, sl_modlist);
5305 
5306 				error = strcopyout(name, mlist_ptr,
5307 				    strlen(name) + 1, copyflag);
5308 
5309 				if (error) {
5310 					releasestr(stp->sd_wrq);
5311 					return (error);
5312 				}
5313 				q = q->q_next;
5314 				space_allocated--;
5315 				num_modules++;
5316 				mlist_ptr =
5317 				    (struct str_mlist *)((uintptr_t)mlist_ptr +
5318 				    sizeof (struct str_mlist));
5319 				STRUCT_FSETP(strlist, sl_modlist, mlist_ptr);
5320 			}
5321 			releasestr(stp->sd_wrq);
5322 			error = strcopyout(&num_modules, (void *)arg,
5323 			    sizeof (int), copyflag);
5324 		}
5325 		return (error);
5326 	    }
5327 
5328 	case I_CKBAND:
5329 	    {
5330 		queue_t *q;
5331 		qband_t *qbp;
5332 
5333 		if ((arg < 0) || (arg >= NBAND))
5334 			return (EINVAL);
5335 		q = _RD(stp->sd_wrq);
5336 		mutex_enter(QLOCK(q));
5337 		if (arg > (int)q->q_nband) {
5338 			*rvalp = 0;
5339 		} else {
5340 			if (arg == 0) {
5341 				if (q->q_first)
5342 					*rvalp = 1;
5343 				else
5344 					*rvalp = 0;
5345 			} else {
5346 				qbp = q->q_bandp;
5347 				while (--arg > 0)
5348 					qbp = qbp->qb_next;
5349 				if (qbp->qb_first)
5350 					*rvalp = 1;
5351 				else
5352 					*rvalp = 0;
5353 			}
5354 		}
5355 		mutex_exit(QLOCK(q));
5356 		return (0);
5357 	    }
5358 
5359 	case I_GETBAND:
5360 	    {
5361 		int intpri;
5362 		queue_t *q;
5363 
5364 		q = _RD(stp->sd_wrq);
5365 		mutex_enter(QLOCK(q));
5366 		mp = q->q_first;
5367 		if (!mp) {
5368 			mutex_exit(QLOCK(q));
5369 			return (ENODATA);
5370 		}
5371 		intpri = (int)mp->b_band;
5372 		error = strcopyout(&intpri, (void *)arg, sizeof (int),
5373 		    copyflag);
5374 		mutex_exit(QLOCK(q));
5375 		return (error);
5376 	    }
5377 
5378 	case I_ATMARK:
5379 	    {
5380 		queue_t *q;
5381 
5382 		if (arg & ~(ANYMARK|LASTMARK))
5383 			return (EINVAL);
5384 		q = _RD(stp->sd_wrq);
5385 		mutex_enter(&stp->sd_lock);
5386 		if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5387 			*rvalp = 1;
5388 		} else {
5389 			mutex_enter(QLOCK(q));
5390 			mp = q->q_first;
5391 
5392 			if (mp == NULL)
5393 				*rvalp = 0;
5394 			else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5395 				*rvalp = 1;
5396 			else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5397 				*rvalp = 1;
5398 			else
5399 				*rvalp = 0;
5400 			mutex_exit(QLOCK(q));
5401 		}
5402 		mutex_exit(&stp->sd_lock);
5403 		return (0);
5404 	    }
5405 
5406 	case I_CANPUT:
5407 	    {
5408 		char band;
5409 
5410 		if ((arg < 0) || (arg >= NBAND))
5411 			return (EINVAL);
5412 		band = (char)arg;
5413 		*rvalp = bcanputnext(stp->sd_wrq, band);
5414 		return (0);
5415 	    }
5416 
5417 	case I_SETCLTIME:
5418 	    {
5419 		int closetime;
5420 
5421 		error = strcopyin((void *)arg, &closetime, sizeof (int),
5422 		    copyflag);
5423 		if (error)
5424 			return (error);
5425 		if (closetime < 0)
5426 			return (EINVAL);
5427 
5428 		stp->sd_closetime = closetime;
5429 		return (0);
5430 	    }
5431 
5432 	case I_GETCLTIME:
5433 	    {
5434 		int closetime;
5435 
5436 		closetime = stp->sd_closetime;
5437 		return (strcopyout(&closetime, (void *)arg, sizeof (int),
5438 		    copyflag));
5439 	    }
5440 
5441 	case TIOCGSID:
5442 	{
5443 		pid_t sid;
5444 
5445 		mutex_enter(&pidlock);
5446 		if (stp->sd_sidp == NULL) {
5447 			mutex_exit(&pidlock);
5448 			return (ENOTTY);
5449 		}
5450 		sid = stp->sd_sidp->pid_id;
5451 		mutex_exit(&pidlock);
5452 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5453 		    copyflag));
5454 	}
5455 
5456 	case TIOCSPGRP:
5457 	{
5458 		pid_t pgrp;
5459 		proc_t *q;
5460 		pid_t	sid, fg_pgid, bg_pgid;
5461 
5462 		if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5463 		    copyflag))
5464 			return (error);
5465 		mutex_enter(&stp->sd_lock);
5466 		mutex_enter(&pidlock);
5467 		if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5468 			mutex_exit(&pidlock);
5469 			mutex_exit(&stp->sd_lock);
5470 			return (ENOTTY);
5471 		}
5472 		if (pgrp == stp->sd_pgidp->pid_id) {
5473 			mutex_exit(&pidlock);
5474 			mutex_exit(&stp->sd_lock);
5475 			return (0);
5476 		}
5477 		if (pgrp <= 0 || pgrp >= maxpid) {
5478 			mutex_exit(&pidlock);
5479 			mutex_exit(&stp->sd_lock);
5480 			return (EINVAL);
5481 		}
5482 		if ((q = pgfind(pgrp)) == NULL ||
5483 		    q->p_sessp != ttoproc(curthread)->p_sessp) {
5484 			mutex_exit(&pidlock);
5485 			mutex_exit(&stp->sd_lock);
5486 			return (EPERM);
5487 		}
5488 		sid = stp->sd_sidp->pid_id;
5489 		fg_pgid = q->p_pgrp;
5490 		bg_pgid = stp->sd_pgidp->pid_id;
5491 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5492 		PID_RELE(stp->sd_pgidp);
5493 		stp->sd_pgidp = q->p_pgidp;
5494 		PID_HOLD(stp->sd_pgidp);
5495 		mutex_exit(&pidlock);
5496 		mutex_exit(&stp->sd_lock);
5497 		return (0);
5498 	}
5499 
5500 	case TIOCGPGRP:
5501 	{
5502 		pid_t pgrp;
5503 
5504 		mutex_enter(&pidlock);
5505 		if (stp->sd_sidp == NULL) {
5506 			mutex_exit(&pidlock);
5507 			return (ENOTTY);
5508 		}
5509 		pgrp = stp->sd_pgidp->pid_id;
5510 		mutex_exit(&pidlock);
5511 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5512 		    copyflag));
5513 	}
5514 
5515 	case FIONBIO:
5516 	case FIOASYNC:
5517 		return (0);	/* handled by the upper layer */
5518 	}
5519 }
5520 
5521 /*
5522  * Custom free routine used for M_PASSFP messages.
5523  */
5524 static void
5525 free_passfp(struct k_strrecvfd *srf)
5526 {
5527 	(void) closef(srf->fp);
5528 	kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5529 }
5530 
5531 /* ARGSUSED */
5532 int
5533 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5534 {
5535 	queue_t *qp, *nextqp;
5536 	struct k_strrecvfd *srf;
5537 	mblk_t *mp;
5538 	frtn_t *frtnp;
5539 	size_t bufsize;
5540 	queue_t	*mate = NULL;
5541 	syncq_t	*sq = NULL;
5542 	int retval = 0;
5543 
5544 	if (stp->sd_flag & STRHUP)
5545 		return (ENXIO);
5546 
5547 	claimstr(stp->sd_wrq);
5548 
5549 	/* Fastpath, we have a pipe, and we are already mated, use it. */
5550 	if (STRMATED(stp)) {
5551 		qp = _RD(stp->sd_mate->sd_wrq);
5552 		claimstr(qp);
5553 		mate = qp;
5554 	} else { /* Not already mated. */
5555 
5556 		/*
5557 		 * Walk the stream to the end of this one.
5558 		 * assumes that the claimstr() will prevent
5559 		 * plumbing between the stream head and the
5560 		 * driver from changing
5561 		 */
5562 		qp = stp->sd_wrq;
5563 
5564 		/*
5565 		 * Loop until we reach the end of this stream.
5566 		 * On completion, qp points to the write queue
5567 		 * at the end of the stream, or the read queue
5568 		 * at the stream head if this is a fifo.
5569 		 */
5570 		while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5571 			;
5572 
5573 		/*
5574 		 * Just in case we get a q_next which is NULL, but
5575 		 * not at the end of the stream.  This is actually
5576 		 * broken, so we set an assert to catch it in
5577 		 * debug, and set an error and return if not debug.
5578 		 */
5579 		ASSERT(qp);
5580 		if (qp == NULL) {
5581 			releasestr(stp->sd_wrq);
5582 			return (EINVAL);
5583 		}
5584 
5585 		/*
5586 		 * Enter the syncq for the driver, so (hopefully)
5587 		 * the queue values will not change on us.
5588 		 * XXXX - This will only prevent the race IFF only
5589 		 *   the write side modifies the q_next member, and
5590 		 *   the put procedure is protected by at least
5591 		 *   MT_PERQ.
5592 		 */
5593 		if ((sq = qp->q_syncq) != NULL)
5594 			entersq(sq, SQ_PUT);
5595 
5596 		/* Now get the q_next value from this qp. */
5597 		nextqp = qp->q_next;
5598 
5599 		/*
5600 		 * If nextqp exists and the other stream is different
5601 		 * from this one claim the stream, set the mate, and
5602 		 * get the read queue at the stream head of the other
5603 		 * stream.  Assumes that nextqp was at least valid when
5604 		 * we got it.  Hopefully the entersq of the driver
5605 		 * will prevent it from changing on us.
5606 		 */
5607 		if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5608 			ASSERT(qp->q_qinfo->qi_srvp);
5609 			ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5610 			ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5611 			claimstr(nextqp);
5612 
5613 			/* Make sure we still have a q_next */
5614 			if (nextqp != qp->q_next) {
5615 				releasestr(stp->sd_wrq);
5616 				releasestr(nextqp);
5617 				return (EINVAL);
5618 			}
5619 
5620 			qp = _RD(STREAM(nextqp)->sd_wrq);
5621 			mate = qp;
5622 		}
5623 		/* If we entered the synq above, leave it. */
5624 		if (sq != NULL)
5625 			leavesq(sq, SQ_PUT);
5626 	} /*  STRMATED(STP)  */
5627 
5628 	/* XXX prevents substitution of the ops vector */
5629 	if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5630 		retval = EINVAL;
5631 		goto out;
5632 	}
5633 
5634 	if (qp->q_flag & QFULL) {
5635 		retval = EAGAIN;
5636 		goto out;
5637 	}
5638 
5639 	/*
5640 	 * Since M_PASSFP messages include a file descriptor, we use
5641 	 * esballoc() and specify a custom free routine (free_passfp()) that
5642 	 * will close the descriptor as part of freeing the message.  For
5643 	 * convenience, we stash the frtn_t right after the data block.
5644 	 */
5645 	bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5646 	srf = kmem_alloc(bufsize, KM_NOSLEEP);
5647 	if (srf == NULL) {
5648 		retval = EAGAIN;
5649 		goto out;
5650 	}
5651 
5652 	frtnp = (frtn_t *)(srf + 1);
5653 	frtnp->free_arg = (caddr_t)srf;
5654 	frtnp->free_func = free_passfp;
5655 
5656 	mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5657 	if (mp == NULL) {
5658 		kmem_free(srf, bufsize);
5659 		retval = EAGAIN;
5660 		goto out;
5661 	}
5662 	mp->b_wptr += sizeof (struct k_strrecvfd);
5663 	mp->b_datap->db_type = M_PASSFP;
5664 
5665 	srf->fp = fp;
5666 	srf->uid = crgetuid(curthread->t_cred);
5667 	srf->gid = crgetgid(curthread->t_cred);
5668 	mutex_enter(&fp->f_tlock);
5669 	fp->f_count++;
5670 	mutex_exit(&fp->f_tlock);
5671 
5672 	put(qp, mp);
5673 out:
5674 	releasestr(stp->sd_wrq);
5675 	if (mate)
5676 		releasestr(mate);
5677 	return (retval);
5678 }
5679 
5680 /*
5681  * Send an ioctl message downstream and wait for acknowledgement.
5682  * flags may be set to either U_TO_K or K_TO_K and a combination
5683  * of STR_NOERROR or STR_NOSIG
5684  * STR_NOSIG: Signals are essentially ignored or held and have
5685  *	no effect for the duration of the call.
5686  * STR_NOERROR: Ignores stream head read, write and hup errors.
5687  *	Additionally, if an existing ioctl times out, it is assumed
5688  *	lost and and this ioctl will continue as if the previous ioctl had
5689  *	finished.  ETIME may be returned if this ioctl times out (i.e.
5690  *	ic_timout is not INFTIM).  Non-stream head errors may be returned if
5691  *	the ioc_error indicates that the driver/module had problems,
5692  *	an EFAULT was found when accessing user data, a lack of
5693  * 	resources, etc.
5694  */
5695 int
5696 strdoioctl(
5697 	struct stdata *stp,
5698 	struct strioctl *strioc,
5699 	int fflags,		/* file flags with model info */
5700 	int flag,
5701 	cred_t *crp,
5702 	int *rvalp)
5703 {
5704 	mblk_t *bp;
5705 	struct iocblk *iocbp;
5706 	struct copyreq *reqp;
5707 	struct copyresp *resp;
5708 	int id;
5709 	int transparent = 0;
5710 	int error = 0;
5711 	int len = 0;
5712 	caddr_t taddr;
5713 	int copyflag = (flag & (U_TO_K | K_TO_K));
5714 	int sigflag = (flag & STR_NOSIG);
5715 	int errs;
5716 	uint_t waitflags;
5717 
5718 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5719 	ASSERT((fflags & FMODELS) != 0);
5720 
5721 	TRACE_2(TR_FAC_STREAMS_FR,
5722 		TR_STRDOIOCTL,
5723 		"strdoioctl:stp %p strioc %p", stp, strioc);
5724 	if (strioc->ic_len == TRANSPARENT) {	/* send arg in M_DATA block */
5725 		transparent = 1;
5726 		strioc->ic_len = sizeof (intptr_t);
5727 	}
5728 
5729 	if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5730 		return (EINVAL);
5731 
5732 	if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5733 	    crp)) == NULL)
5734 			return (error);
5735 
5736 	bzero(bp->b_wptr, sizeof (union ioctypes));
5737 
5738 	iocbp = (struct iocblk *)bp->b_wptr;
5739 	iocbp->ioc_count = strioc->ic_len;
5740 	iocbp->ioc_cmd = strioc->ic_cmd;
5741 	iocbp->ioc_flag = (fflags & FMODELS);
5742 
5743 	crhold(crp);
5744 	iocbp->ioc_cr = crp;
5745 	DB_TYPE(bp) = M_IOCTL;
5746 	DB_CPID(bp) = curproc->p_pid;
5747 	bp->b_wptr += sizeof (struct iocblk);
5748 
5749 	if (flag & STR_NOERROR)
5750 		errs = STPLEX;
5751 	else
5752 		errs = STRHUP|STRDERR|STWRERR|STPLEX;
5753 
5754 	/*
5755 	 * If there is data to copy into ioctl block, do so.
5756 	 */
5757 	if (iocbp->ioc_count > 0) {
5758 		if (transparent)
5759 			/*
5760 			 * Note: STR_NOERROR does not have an effect
5761 			 * in putiocd()
5762 			 */
5763 			id = K_TO_K | sigflag;
5764 		else
5765 			id = flag;
5766 		if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
5767 			freemsg(bp);
5768 			crfree(crp);
5769 			return (error);
5770 		}
5771 
5772 		/*
5773 		 * We could have slept copying in user pages.
5774 		 * Recheck the stream head state (the other end
5775 		 * of a pipe could have gone away).
5776 		 */
5777 		if (stp->sd_flag & errs) {
5778 			mutex_enter(&stp->sd_lock);
5779 			error = strgeterr(stp, errs, 0);
5780 			mutex_exit(&stp->sd_lock);
5781 			if (error != 0) {
5782 				freemsg(bp);
5783 				crfree(crp);
5784 				return (error);
5785 			}
5786 		}
5787 	}
5788 	if (transparent)
5789 		iocbp->ioc_count = TRANSPARENT;
5790 
5791 	/*
5792 	 * Block for up to STRTIMOUT milliseconds if there is an outstanding
5793 	 * ioctl for this stream already running.  All processes
5794 	 * sleeping here will be awakened as a result of an ACK
5795 	 * or NAK being received for the outstanding ioctl, or
5796 	 * as a result of the timer expiring on the outstanding
5797 	 * ioctl (a failure), or as a result of any waiting
5798 	 * process's timer expiring (also a failure).
5799 	 */
5800 
5801 	error = 0;
5802 	mutex_enter(&stp->sd_lock);
5803 	while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) {
5804 		clock_t cv_rval;
5805 
5806 		TRACE_0(TR_FAC_STREAMS_FR,
5807 			TR_STRDOIOCTL_WAIT,
5808 			"strdoioctl sleeps - IOCWAIT");
5809 		cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
5810 		    STRTIMOUT, sigflag);
5811 		if (cv_rval <= 0) {
5812 			if (cv_rval == 0) {
5813 				error = EINTR;
5814 			} else {
5815 				if (flag & STR_NOERROR) {
5816 					/*
5817 					 * Terminating current ioctl in
5818 					 * progress -- assume it got lost and
5819 					 * wake up the other thread so that the
5820 					 * operation completes.
5821 					 */
5822 					if (!(stp->sd_flag & IOCWAITNE)) {
5823 						stp->sd_flag |= IOCWAITNE;
5824 						cv_broadcast(&stp->sd_monitor);
5825 					}
5826 					/*
5827 					 * Otherwise, there's a running
5828 					 * STR_NOERROR -- we have no choice
5829 					 * here but to wait forever (or until
5830 					 * interrupted).
5831 					 */
5832 				} else {
5833 					/*
5834 					 * pending ioctl has caused
5835 					 * us to time out
5836 					 */
5837 					error = ETIME;
5838 				}
5839 			}
5840 		} else if ((stp->sd_flag & errs)) {
5841 			error = strgeterr(stp, errs, 0);
5842 		}
5843 		if (error) {
5844 			mutex_exit(&stp->sd_lock);
5845 			freemsg(bp);
5846 			crfree(crp);
5847 			return (error);
5848 		}
5849 	}
5850 
5851 	/*
5852 	 * Have control of ioctl mechanism.
5853 	 * Send down ioctl packet and wait for response.
5854 	 */
5855 	if (stp->sd_iocblk != (mblk_t *)-1) {
5856 		freemsg(stp->sd_iocblk);
5857 	}
5858 	stp->sd_iocblk = NULL;
5859 
5860 	/*
5861 	 * If this is marked with 'noerror' (internal; mostly
5862 	 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
5863 	 * in here by setting IOCWAITNE.
5864 	 */
5865 	waitflags = IOCWAIT;
5866 	if (flag & STR_NOERROR)
5867 		waitflags |= IOCWAITNE;
5868 
5869 	stp->sd_flag |= waitflags;
5870 
5871 	/*
5872 	 * Assign sequence number.
5873 	 */
5874 	iocbp->ioc_id = stp->sd_iocid = getiocseqno();
5875 
5876 	mutex_exit(&stp->sd_lock);
5877 
5878 	TRACE_1(TR_FAC_STREAMS_FR,
5879 		TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
5880 	stream_willservice(stp);
5881 	putnext(stp->sd_wrq, bp);
5882 	stream_runservice(stp);
5883 
5884 	/*
5885 	 * Timed wait for acknowledgment.  The wait time is limited by the
5886 	 * timeout value, which must be a positive integer (number of
5887 	 * milliseconds to wait, or 0 (use default value of STRTIMOUT
5888 	 * milliseconds), or -1 (wait forever).  This will be awakened
5889 	 * either by an ACK/NAK message arriving, the timer expiring, or
5890 	 * the timer expiring on another ioctl waiting for control of the
5891 	 * mechanism.
5892 	 */
5893 waitioc:
5894 	mutex_enter(&stp->sd_lock);
5895 
5896 
5897 	/*
5898 	 * If the reply has already arrived, don't sleep.  If awakened from
5899 	 * the sleep, fail only if the reply has not arrived by then.
5900 	 * Otherwise, process the reply.
5901 	 */
5902 	while (!stp->sd_iocblk) {
5903 		clock_t cv_rval;
5904 
5905 		if (stp->sd_flag & errs) {
5906 			error = strgeterr(stp, errs, 0);
5907 			if (error != 0) {
5908 				stp->sd_flag &= ~waitflags;
5909 				cv_broadcast(&stp->sd_iocmonitor);
5910 				mutex_exit(&stp->sd_lock);
5911 				crfree(crp);
5912 				return (error);
5913 			}
5914 		}
5915 
5916 		TRACE_0(TR_FAC_STREAMS_FR,
5917 			TR_STRDOIOCTL_WAIT2,
5918 			"strdoioctl sleeps awaiting reply");
5919 		ASSERT(error == 0);
5920 
5921 		cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
5922 		    (strioc->ic_timout ?
5923 		    strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
5924 
5925 		/*
5926 		 * There are four possible cases here: interrupt, timeout,
5927 		 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
5928 		 * valid M_IOCTL reply).
5929 		 *
5930 		 * If we've been awakened by a STR_NOERROR ioctl on some other
5931 		 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
5932 		 * will be set.  Pretend as if we just timed out.  Note that
5933 		 * this other thread waited at least STRTIMOUT before trying to
5934 		 * awaken our thread, so this is indistinguishable (even for
5935 		 * INFTIM) from the case where we failed with ETIME waiting on
5936 		 * IOCWAIT in the prior loop.
5937 		 */
5938 		if (cv_rval > 0 && !(flag & STR_NOERROR) &&
5939 		    stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
5940 			cv_rval = -1;
5941 		}
5942 
5943 		/*
5944 		 * note: STR_NOERROR does not protect
5945 		 * us here.. use ic_timout < 0
5946 		 */
5947 		if (cv_rval <= 0) {
5948 			if (cv_rval == 0) {
5949 				error = EINTR;
5950 			} else {
5951 				error =  ETIME;
5952 			}
5953 			/*
5954 			 * A message could have come in after we were scheduled
5955 			 * but before we were actually run.
5956 			 */
5957 			bp = stp->sd_iocblk;
5958 			stp->sd_iocblk = NULL;
5959 			if (bp != NULL) {
5960 				if ((bp->b_datap->db_type == M_COPYIN) ||
5961 				    (bp->b_datap->db_type == M_COPYOUT)) {
5962 					mutex_exit(&stp->sd_lock);
5963 					if (bp->b_cont) {
5964 						freemsg(bp->b_cont);
5965 						bp->b_cont = NULL;
5966 					}
5967 					bp->b_datap->db_type = M_IOCDATA;
5968 					bp->b_wptr = bp->b_rptr +
5969 						sizeof (struct copyresp);
5970 					resp = (struct copyresp *)bp->b_rptr;
5971 					resp->cp_rval =
5972 					    (caddr_t)1; /* failure */
5973 					stream_willservice(stp);
5974 					putnext(stp->sd_wrq, bp);
5975 					stream_runservice(stp);
5976 					mutex_enter(&stp->sd_lock);
5977 				} else {
5978 					freemsg(bp);
5979 				}
5980 			}
5981 			stp->sd_flag &= ~waitflags;
5982 			cv_broadcast(&stp->sd_iocmonitor);
5983 			mutex_exit(&stp->sd_lock);
5984 			crfree(crp);
5985 			return (error);
5986 		}
5987 	}
5988 	bp = stp->sd_iocblk;
5989 	/*
5990 	 * Note: it is strictly impossible to get here with sd_iocblk set to
5991 	 * -1.  This is because the initial loop above doesn't allow any new
5992 	 * ioctls into the fray until all others have passed this point.
5993 	 */
5994 	ASSERT(bp != NULL && bp != (mblk_t *)-1);
5995 	TRACE_1(TR_FAC_STREAMS_FR,
5996 		TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
5997 	if ((bp->b_datap->db_type == M_IOCACK) ||
5998 	    (bp->b_datap->db_type == M_IOCNAK)) {
5999 		/* for detection of duplicate ioctl replies */
6000 		stp->sd_iocblk = (mblk_t *)-1;
6001 		stp->sd_flag &= ~waitflags;
6002 		cv_broadcast(&stp->sd_iocmonitor);
6003 		mutex_exit(&stp->sd_lock);
6004 	} else {
6005 		/*
6006 		 * flags not cleared here because we're still doing
6007 		 * copy in/out for ioctl.
6008 		 */
6009 		stp->sd_iocblk = NULL;
6010 		mutex_exit(&stp->sd_lock);
6011 	}
6012 
6013 
6014 	/*
6015 	 * Have received acknowledgment.
6016 	 */
6017 
6018 	switch (bp->b_datap->db_type) {
6019 	case M_IOCACK:
6020 		/*
6021 		 * Positive ack.
6022 		 */
6023 		iocbp = (struct iocblk *)bp->b_rptr;
6024 
6025 		/*
6026 		 * Set error if indicated.
6027 		 */
6028 		if (iocbp->ioc_error) {
6029 			error = iocbp->ioc_error;
6030 			break;
6031 		}
6032 
6033 		/*
6034 		 * Set return value.
6035 		 */
6036 		*rvalp = iocbp->ioc_rval;
6037 
6038 		/*
6039 		 * Data may have been returned in ACK message (ioc_count > 0).
6040 		 * If so, copy it out to the user's buffer.
6041 		 */
6042 		if (iocbp->ioc_count && !transparent) {
6043 			if (error = getiocd(bp, strioc->ic_dp, copyflag))
6044 				break;
6045 		}
6046 		if (!transparent) {
6047 			if (len)	/* an M_COPYOUT was used with I_STR */
6048 				strioc->ic_len = len;
6049 			else
6050 				strioc->ic_len = (int)iocbp->ioc_count;
6051 		}
6052 		break;
6053 
6054 	case M_IOCNAK:
6055 		/*
6056 		 * Negative ack.
6057 		 *
6058 		 * The only thing to do is set error as specified
6059 		 * in neg ack packet.
6060 		 */
6061 		iocbp = (struct iocblk *)bp->b_rptr;
6062 
6063 		error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6064 		break;
6065 
6066 	case M_COPYIN:
6067 		/*
6068 		 * Driver or module has requested user ioctl data.
6069 		 */
6070 		reqp = (struct copyreq *)bp->b_rptr;
6071 
6072 		/*
6073 		 * M_COPYIN should *never* have a message attached, though
6074 		 * it's harmless if it does -- thus, panic on a DEBUG
6075 		 * kernel and just free it on a non-DEBUG build.
6076 		 */
6077 		ASSERT(bp->b_cont == NULL);
6078 		if (bp->b_cont != NULL) {
6079 			freemsg(bp->b_cont);
6080 			bp->b_cont = NULL;
6081 		}
6082 
6083 		error = putiocd(bp, reqp->cq_addr, flag, crp);
6084 		if (error && bp->b_cont) {
6085 			freemsg(bp->b_cont);
6086 			bp->b_cont = NULL;
6087 		}
6088 
6089 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6090 		bp->b_datap->db_type = M_IOCDATA;
6091 
6092 		mblk_setcred(bp, crp);
6093 		DB_CPID(bp) = curproc->p_pid;
6094 		resp = (struct copyresp *)bp->b_rptr;
6095 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6096 		resp->cp_flag = (fflags & FMODELS);
6097 
6098 		stream_willservice(stp);
6099 		putnext(stp->sd_wrq, bp);
6100 		stream_runservice(stp);
6101 
6102 		if (error) {
6103 			mutex_enter(&stp->sd_lock);
6104 			stp->sd_flag &= ~waitflags;
6105 			cv_broadcast(&stp->sd_iocmonitor);
6106 			mutex_exit(&stp->sd_lock);
6107 			crfree(crp);
6108 			return (error);
6109 		}
6110 
6111 		goto waitioc;
6112 
6113 	case M_COPYOUT:
6114 		/*
6115 		 * Driver or module has ioctl data for a user.
6116 		 */
6117 		reqp = (struct copyreq *)bp->b_rptr;
6118 		ASSERT(bp->b_cont != NULL);
6119 
6120 		/*
6121 		 * Always (transparent or non-transparent )
6122 		 * use the address specified in the request
6123 		 */
6124 		taddr = reqp->cq_addr;
6125 		if (!transparent)
6126 			len = (int)reqp->cq_size;
6127 
6128 		/* copyout data to the provided address */
6129 		error = getiocd(bp, taddr, copyflag);
6130 
6131 		freemsg(bp->b_cont);
6132 		bp->b_cont = NULL;
6133 
6134 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6135 		bp->b_datap->db_type = M_IOCDATA;
6136 
6137 		mblk_setcred(bp, crp);
6138 		DB_CPID(bp) = curproc->p_pid;
6139 		resp = (struct copyresp *)bp->b_rptr;
6140 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6141 		resp->cp_flag = (fflags & FMODELS);
6142 
6143 		stream_willservice(stp);
6144 		putnext(stp->sd_wrq, bp);
6145 		stream_runservice(stp);
6146 
6147 		if (error) {
6148 			mutex_enter(&stp->sd_lock);
6149 			stp->sd_flag &= ~waitflags;
6150 			cv_broadcast(&stp->sd_iocmonitor);
6151 			mutex_exit(&stp->sd_lock);
6152 			crfree(crp);
6153 			return (error);
6154 		}
6155 		goto waitioc;
6156 
6157 	default:
6158 		ASSERT(0);
6159 		mutex_enter(&stp->sd_lock);
6160 		stp->sd_flag &= ~waitflags;
6161 		cv_broadcast(&stp->sd_iocmonitor);
6162 		mutex_exit(&stp->sd_lock);
6163 		break;
6164 	}
6165 
6166 	freemsg(bp);
6167 	crfree(crp);
6168 	return (error);
6169 }
6170 
6171 /*
6172  * For the SunOS keyboard driver.
6173  * Return the next available "ioctl" sequence number.
6174  * Exported, so that streams modules can send "ioctl" messages
6175  * downstream from their open routine.
6176  */
6177 int
6178 getiocseqno(void)
6179 {
6180 	int	i;
6181 
6182 	mutex_enter(&strresources);
6183 	i = ++ioc_id;
6184 	mutex_exit(&strresources);
6185 	return (i);
6186 }
6187 
6188 /*
6189  * Get the next message from the read queue.  If the message is
6190  * priority, STRPRI will have been set by strrput().  This flag
6191  * should be reset only when the entire message at the front of the
6192  * queue as been consumed.
6193  *
6194  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6195  */
6196 int
6197 strgetmsg(
6198 	struct vnode *vp,
6199 	struct strbuf *mctl,
6200 	struct strbuf *mdata,
6201 	unsigned char *prip,
6202 	int *flagsp,
6203 	int fmode,
6204 	rval_t *rvp)
6205 {
6206 	struct stdata *stp;
6207 	mblk_t *bp, *nbp;
6208 	mblk_t *savemp = NULL;
6209 	mblk_t *savemptail = NULL;
6210 	uint_t old_sd_flag;
6211 	int flg;
6212 	int more = 0;
6213 	int error = 0;
6214 	char first = 1;
6215 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6216 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6217 	unsigned char pri = 0;
6218 	queue_t *q;
6219 	int	pr = 0;			/* Partial read successful */
6220 	struct uio uios;
6221 	struct uio *uiop = &uios;
6222 	struct iovec iovs;
6223 	unsigned char type;
6224 
6225 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6226 		"strgetmsg:%p", vp);
6227 
6228 	ASSERT(vp->v_stream);
6229 	stp = vp->v_stream;
6230 	rvp->r_val1 = 0;
6231 
6232 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6233 		if (error = straccess(stp, JCREAD))
6234 			return (error);
6235 
6236 	/* Fast check of flags before acquiring the lock */
6237 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6238 		mutex_enter(&stp->sd_lock);
6239 		error = strgeterr(stp, STRDERR|STPLEX, 0);
6240 		mutex_exit(&stp->sd_lock);
6241 		if (error != 0)
6242 			return (error);
6243 	}
6244 
6245 	switch (*flagsp) {
6246 	case MSG_HIPRI:
6247 		if (*prip != 0)
6248 			return (EINVAL);
6249 		break;
6250 
6251 	case MSG_ANY:
6252 	case MSG_BAND:
6253 		break;
6254 
6255 	default:
6256 		return (EINVAL);
6257 	}
6258 	/*
6259 	 * Setup uio and iov for data part
6260 	 */
6261 	iovs.iov_base = mdata->buf;
6262 	iovs.iov_len = mdata->maxlen;
6263 	uios.uio_iov = &iovs;
6264 	uios.uio_iovcnt = 1;
6265 	uios.uio_loffset = 0;
6266 	uios.uio_segflg = UIO_USERSPACE;
6267 	uios.uio_fmode = 0;
6268 	uios.uio_extflg = UIO_COPY_CACHED;
6269 	uios.uio_resid = mdata->maxlen;
6270 	uios.uio_offset = 0;
6271 
6272 	q = _RD(stp->sd_wrq);
6273 	mutex_enter(&stp->sd_lock);
6274 	old_sd_flag = stp->sd_flag;
6275 	mark = 0;
6276 	for (;;) {
6277 		int done = 0;
6278 		mblk_t *q_first = q->q_first;
6279 
6280 		/*
6281 		 * Get the next message of appropriate priority
6282 		 * from the stream head.  If the caller is interested
6283 		 * in band or hipri messages, then they should already
6284 		 * be enqueued at the stream head.  On the other hand
6285 		 * if the caller wants normal (band 0) messages, they
6286 		 * might be deferred in a synchronous stream and they
6287 		 * will need to be pulled up.
6288 		 *
6289 		 * After we have dequeued a message, we might find that
6290 		 * it was a deferred M_SIG that was enqueued at the
6291 		 * stream head.  It must now be posted as part of the
6292 		 * read by calling strsignal_nolock().
6293 		 *
6294 		 * Also note that strrput does not enqueue an M_PCSIG,
6295 		 * and there cannot be more than one hipri message,
6296 		 * so there was no need to have the M_PCSIG case.
6297 		 *
6298 		 * At some time it might be nice to try and wrap the
6299 		 * functionality of kstrgetmsg() and strgetmsg() into
6300 		 * a common routine so to reduce the amount of replicated
6301 		 * code (since they are extremely similar).
6302 		 */
6303 		if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6304 			/* Asking for normal, band0 data */
6305 			bp = strget(stp, q, uiop, first, &error);
6306 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6307 			if (bp != NULL) {
6308 				if (bp->b_datap->db_type == M_SIG) {
6309 					strsignal_nolock(stp, *bp->b_rptr,
6310 					    (int32_t)bp->b_band);
6311 					continue;
6312 				} else {
6313 					break;
6314 				}
6315 			}
6316 			if (error != 0) {
6317 				goto getmout;
6318 			}
6319 
6320 		/*
6321 		 * We can't depend on the value of STRPRI here because
6322 		 * the stream head may be in transit. Therefore, we
6323 		 * must look at the type of the first message to
6324 		 * determine if a high priority messages is waiting
6325 		 */
6326 		} else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6327 			    q_first->b_datap->db_type >= QPCTL &&
6328 			    (bp = getq_noenab(q)) != NULL) {
6329 			/* Asked for HIPRI and got one */
6330 			ASSERT(bp->b_datap->db_type >= QPCTL);
6331 			break;
6332 		} else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6333 			    ((q_first->b_band >= *prip) ||
6334 			    q_first->b_datap->db_type >= QPCTL) &&
6335 			    (bp = getq_noenab(q)) != NULL) {
6336 			/*
6337 			 * Asked for at least band "prip" and got either at
6338 			 * least that band or a hipri message.
6339 			 */
6340 			ASSERT(bp->b_band >= *prip ||
6341 				bp->b_datap->db_type >= QPCTL);
6342 			if (bp->b_datap->db_type == M_SIG) {
6343 				strsignal_nolock(stp, *bp->b_rptr,
6344 				    (int32_t)bp->b_band);
6345 				continue;
6346 			} else {
6347 				break;
6348 			}
6349 		}
6350 
6351 		/* No data. Time to sleep? */
6352 		qbackenable(q, 0);
6353 
6354 		/*
6355 		 * If STRHUP or STREOF, return 0 length control and data.
6356 		 * If resid is 0, then a read(fd,buf,0) was done. Do not
6357 		 * sleep to satisfy this request because by default we have
6358 		 * zero bytes to return.
6359 		 */
6360 		if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6361 		    mdata->maxlen == 0)) {
6362 			mctl->len = mdata->len = 0;
6363 			*flagsp = 0;
6364 			mutex_exit(&stp->sd_lock);
6365 			return (0);
6366 		}
6367 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6368 			"strgetmsg calls strwaitq:%p, %p",
6369 			vp, uiop);
6370 		if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6371 		    &done)) != 0) || done) {
6372 			TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6373 				"strgetmsg error or done:%p, %p",
6374 				vp, uiop);
6375 			mutex_exit(&stp->sd_lock);
6376 			return (error);
6377 		}
6378 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6379 			"strgetmsg awakes:%p, %p", vp, uiop);
6380 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
6381 			mutex_exit(&stp->sd_lock);
6382 			if (error = straccess(stp, JCREAD))
6383 				return (error);
6384 			mutex_enter(&stp->sd_lock);
6385 		}
6386 		first = 0;
6387 	}
6388 	ASSERT(bp != NULL);
6389 	/*
6390 	 * Extract any mark information. If the message is not completely
6391 	 * consumed this information will be put in the mblk
6392 	 * that is putback.
6393 	 * If MSGMARKNEXT is set and the message is completely consumed
6394 	 * the STRATMARK flag will be set below. Likewise, if
6395 	 * MSGNOTMARKNEXT is set and the message is
6396 	 * completely consumed STRNOTATMARK will be set.
6397 	 */
6398 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6399 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6400 		(MSGMARKNEXT|MSGNOTMARKNEXT));
6401 	if (mark != 0 && bp == stp->sd_mark) {
6402 		mark |= _LASTMARK;
6403 		stp->sd_mark = NULL;
6404 	}
6405 	/*
6406 	 * keep track of the original message type and priority
6407 	 */
6408 	pri = bp->b_band;
6409 	type = bp->b_datap->db_type;
6410 	if (type == M_PASSFP) {
6411 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6412 			stp->sd_mark = bp;
6413 		bp->b_flag |= mark & ~_LASTMARK;
6414 		putback(stp, q, bp, pri);
6415 		qbackenable(q, pri);
6416 		mutex_exit(&stp->sd_lock);
6417 		return (EBADMSG);
6418 	}
6419 	ASSERT(type != M_SIG);
6420 
6421 	/*
6422 	 * Set this flag so strrput will not generate signals. Need to
6423 	 * make sure this flag is cleared before leaving this routine
6424 	 * else signals will stop being sent.
6425 	 */
6426 	stp->sd_flag |= STRGETINPROG;
6427 	mutex_exit(&stp->sd_lock);
6428 
6429 	if (STREAM_NEEDSERVICE(stp))
6430 		stream_runservice(stp);
6431 
6432 	/*
6433 	 * Set HIPRI flag if message is priority.
6434 	 */
6435 	if (type >= QPCTL)
6436 		flg = MSG_HIPRI;
6437 	else
6438 		flg = MSG_BAND;
6439 
6440 	/*
6441 	 * First process PROTO or PCPROTO blocks, if any.
6442 	 */
6443 	if (mctl->maxlen >= 0 && type != M_DATA) {
6444 		size_t	n, bcnt;
6445 		char	*ubuf;
6446 
6447 		bcnt = mctl->maxlen;
6448 		ubuf = mctl->buf;
6449 		while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6450 			if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6451 			    copyout(bp->b_rptr, ubuf, n)) {
6452 				error = EFAULT;
6453 				mutex_enter(&stp->sd_lock);
6454 				/*
6455 				 * clear stream head pri flag based on
6456 				 * first message type
6457 				 */
6458 				if (type >= QPCTL) {
6459 					ASSERT(type == M_PCPROTO);
6460 					stp->sd_flag &= ~STRPRI;
6461 				}
6462 				more = 0;
6463 				freemsg(bp);
6464 				goto getmout;
6465 			}
6466 			ubuf += n;
6467 			bp->b_rptr += n;
6468 			if (bp->b_rptr >= bp->b_wptr) {
6469 				nbp = bp;
6470 				bp = bp->b_cont;
6471 				freeb(nbp);
6472 			}
6473 			ASSERT(n <= bcnt);
6474 			bcnt -= n;
6475 			if (bcnt == 0)
6476 				break;
6477 		}
6478 		mctl->len = mctl->maxlen - bcnt;
6479 	} else
6480 		mctl->len = -1;
6481 
6482 	if (bp && bp->b_datap->db_type != M_DATA) {
6483 		/*
6484 		 * More PROTO blocks in msg.
6485 		 */
6486 		more |= MORECTL;
6487 		savemp = bp;
6488 		while (bp && bp->b_datap->db_type != M_DATA) {
6489 			savemptail = bp;
6490 			bp = bp->b_cont;
6491 		}
6492 		savemptail->b_cont = NULL;
6493 	}
6494 
6495 	/*
6496 	 * Now process DATA blocks, if any.
6497 	 */
6498 	if (mdata->maxlen >= 0 && bp) {
6499 		/*
6500 		 * struiocopyout will consume a potential zero-length
6501 		 * M_DATA even if uio_resid is zero.
6502 		 */
6503 		size_t oldresid = uiop->uio_resid;
6504 
6505 		bp = struiocopyout(bp, uiop, &error);
6506 		if (error != 0) {
6507 			mutex_enter(&stp->sd_lock);
6508 			/*
6509 			 * clear stream head hi pri flag based on
6510 			 * first message
6511 			 */
6512 			if (type >= QPCTL) {
6513 				ASSERT(type == M_PCPROTO);
6514 				stp->sd_flag &= ~STRPRI;
6515 			}
6516 			more = 0;
6517 			freemsg(savemp);
6518 			goto getmout;
6519 		}
6520 		/*
6521 		 * (pr == 1) indicates a partial read.
6522 		 */
6523 		if (oldresid > uiop->uio_resid)
6524 			pr = 1;
6525 		mdata->len = mdata->maxlen - uiop->uio_resid;
6526 	} else
6527 		mdata->len = -1;
6528 
6529 	if (bp) {			/* more data blocks in msg */
6530 		more |= MOREDATA;
6531 		if (savemp)
6532 			savemptail->b_cont = bp;
6533 		else
6534 			savemp = bp;
6535 	}
6536 
6537 	mutex_enter(&stp->sd_lock);
6538 	if (savemp) {
6539 		if (pr && (savemp->b_datap->db_type == M_DATA) &&
6540 		    msgnodata(savemp)) {
6541 			/*
6542 			 * Avoid queuing a zero-length tail part of
6543 			 * a message. pr=1 indicates that we read some of
6544 			 * the message.
6545 			 */
6546 			freemsg(savemp);
6547 			more &= ~MOREDATA;
6548 			/*
6549 			 * clear stream head hi pri flag based on
6550 			 * first message
6551 			 */
6552 			if (type >= QPCTL) {
6553 				ASSERT(type == M_PCPROTO);
6554 				stp->sd_flag &= ~STRPRI;
6555 			}
6556 		} else {
6557 			savemp->b_band = pri;
6558 			/*
6559 			 * If the first message was HIPRI and the one we're
6560 			 * putting back isn't, then clear STRPRI, otherwise
6561 			 * set STRPRI again.  Note that we must set STRPRI
6562 			 * again since the flush logic in strrput_nondata()
6563 			 * may have cleared it while we had sd_lock dropped.
6564 			 */
6565 			if (type >= QPCTL) {
6566 				ASSERT(type == M_PCPROTO);
6567 				if (queclass(savemp) < QPCTL)
6568 					stp->sd_flag &= ~STRPRI;
6569 				else
6570 					stp->sd_flag |= STRPRI;
6571 			} else if (queclass(savemp) >= QPCTL) {
6572 				/*
6573 				 * The first message was not a HIPRI message,
6574 				 * but the one we are about to putback is.
6575 				 * For simplicitly, we do not allow for HIPRI
6576 				 * messages to be embedded in the message
6577 				 * body, so just force it to same type as
6578 				 * first message.
6579 				 */
6580 				ASSERT(type == M_DATA || type == M_PROTO);
6581 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6582 				savemp->b_datap->db_type = type;
6583 			}
6584 			if (mark != 0) {
6585 				savemp->b_flag |= mark & ~_LASTMARK;
6586 				if ((mark & _LASTMARK) &&
6587 				    (stp->sd_mark == NULL)) {
6588 					/*
6589 					 * If another marked message arrived
6590 					 * while sd_lock was not held sd_mark
6591 					 * would be non-NULL.
6592 					 */
6593 					stp->sd_mark = savemp;
6594 				}
6595 			}
6596 			putback(stp, q, savemp, pri);
6597 		}
6598 	} else {
6599 		/*
6600 		 * The complete message was consumed.
6601 		 *
6602 		 * If another M_PCPROTO arrived while sd_lock was not held
6603 		 * it would have been discarded since STRPRI was still set.
6604 		 *
6605 		 * Move the MSG*MARKNEXT information
6606 		 * to the stream head just in case
6607 		 * the read queue becomes empty.
6608 		 * clear stream head hi pri flag based on
6609 		 * first message
6610 		 *
6611 		 * If the stream head was at the mark
6612 		 * (STRATMARK) before we dropped sd_lock above
6613 		 * and some data was consumed then we have
6614 		 * moved past the mark thus STRATMARK is
6615 		 * cleared. However, if a message arrived in
6616 		 * strrput during the copyout above causing
6617 		 * STRATMARK to be set we can not clear that
6618 		 * flag.
6619 		 */
6620 		if (type >= QPCTL) {
6621 			ASSERT(type == M_PCPROTO);
6622 			stp->sd_flag &= ~STRPRI;
6623 		}
6624 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
6625 			if (mark & MSGMARKNEXT) {
6626 				stp->sd_flag &= ~STRNOTATMARK;
6627 				stp->sd_flag |= STRATMARK;
6628 			} else if (mark & MSGNOTMARKNEXT) {
6629 				stp->sd_flag &= ~STRATMARK;
6630 				stp->sd_flag |= STRNOTATMARK;
6631 			} else {
6632 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
6633 			}
6634 		} else if (pr && (old_sd_flag & STRATMARK)) {
6635 			stp->sd_flag &= ~STRATMARK;
6636 		}
6637 	}
6638 
6639 	*flagsp = flg;
6640 	*prip = pri;
6641 
6642 	/*
6643 	 * Getmsg cleanup processing - if the state of the queue has changed
6644 	 * some signals may need to be sent and/or poll awakened.
6645 	 */
6646 getmout:
6647 	qbackenable(q, pri);
6648 
6649 	/*
6650 	 * We dropped the stream head lock above. Send all M_SIG messages
6651 	 * before processing stream head for SIGPOLL messages.
6652 	 */
6653 	ASSERT(MUTEX_HELD(&stp->sd_lock));
6654 	while ((bp = q->q_first) != NULL &&
6655 	    (bp->b_datap->db_type == M_SIG)) {
6656 		/*
6657 		 * sd_lock is held so the content of the read queue can not
6658 		 * change.
6659 		 */
6660 		bp = getq(q);
6661 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
6662 
6663 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
6664 		mutex_exit(&stp->sd_lock);
6665 		freemsg(bp);
6666 		if (STREAM_NEEDSERVICE(stp))
6667 			stream_runservice(stp);
6668 		mutex_enter(&stp->sd_lock);
6669 	}
6670 
6671 	/*
6672 	 * stream head cannot change while we make the determination
6673 	 * whether or not to send a signal. Drop the flag to allow strrput
6674 	 * to send firstmsgsigs again.
6675 	 */
6676 	stp->sd_flag &= ~STRGETINPROG;
6677 
6678 	/*
6679 	 * If the type of message at the front of the queue changed
6680 	 * due to the receive the appropriate signals and pollwakeup events
6681 	 * are generated. The type of changes are:
6682 	 *	Processed a hipri message, q_first is not hipri.
6683 	 *	Processed a band X message, and q_first is band Y.
6684 	 * The generated signals and pollwakeups are identical to what
6685 	 * strrput() generates should the message that is now on q_first
6686 	 * arrive to an empty read queue.
6687 	 *
6688 	 * Note: only strrput will send a signal for a hipri message.
6689 	 */
6690 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
6691 		strsigset_t signals = 0;
6692 		strpollset_t pollwakeups = 0;
6693 
6694 		if (flg & MSG_HIPRI) {
6695 			/*
6696 			 * Removed a hipri message. Regular data at
6697 			 * the front of  the queue.
6698 			 */
6699 			if (bp->b_band == 0) {
6700 				signals = S_INPUT | S_RDNORM;
6701 				pollwakeups = POLLIN | POLLRDNORM;
6702 			} else {
6703 				signals = S_INPUT | S_RDBAND;
6704 				pollwakeups = POLLIN | POLLRDBAND;
6705 			}
6706 		} else if (pri != bp->b_band) {
6707 			/*
6708 			 * The band is different for the new q_first.
6709 			 */
6710 			if (bp->b_band == 0) {
6711 				signals = S_RDNORM;
6712 				pollwakeups = POLLIN | POLLRDNORM;
6713 			} else {
6714 				signals = S_RDBAND;
6715 				pollwakeups = POLLIN | POLLRDBAND;
6716 			}
6717 		}
6718 
6719 		if (pollwakeups != 0) {
6720 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
6721 				if (!(stp->sd_rput_opt & SR_POLLIN))
6722 					goto no_pollwake;
6723 				stp->sd_rput_opt &= ~SR_POLLIN;
6724 			}
6725 			mutex_exit(&stp->sd_lock);
6726 			pollwakeup(&stp->sd_pollist, pollwakeups);
6727 			mutex_enter(&stp->sd_lock);
6728 		}
6729 no_pollwake:
6730 
6731 		if (stp->sd_sigflags & signals)
6732 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
6733 	}
6734 	mutex_exit(&stp->sd_lock);
6735 
6736 	rvp->r_val1 = more;
6737 	return (error);
6738 #undef	_LASTMARK
6739 }
6740 
6741 /*
6742  * Get the next message from the read queue.  If the message is
6743  * priority, STRPRI will have been set by strrput().  This flag
6744  * should be reset only when the entire message at the front of the
6745  * queue as been consumed.
6746  *
6747  * If uiop is NULL all data is returned in mctlp.
6748  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
6749  * not enabled.
6750  * The timeout parameter is in milliseconds; -1 for infinity.
6751  * This routine handles the consolidation private flags:
6752  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
6753  *	MSG_DELAYERROR	Defer the error check until the queue is empty.
6754  *	MSG_HOLDSIG	Hold signals while waiting for data.
6755  *	MSG_IPEEK	Only peek at messages.
6756  *	MSG_DISCARDTAIL	Discard the tail M_DATA part of the message
6757  *			that doesn't fit.
6758  *	MSG_NOMARK	If the message is marked leave it on the queue.
6759  *
6760  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6761  */
6762 int
6763 kstrgetmsg(
6764 	struct vnode *vp,
6765 	mblk_t **mctlp,
6766 	struct uio *uiop,
6767 	unsigned char *prip,
6768 	int *flagsp,
6769 	clock_t timout,
6770 	rval_t *rvp)
6771 {
6772 	struct stdata *stp;
6773 	mblk_t *bp, *nbp;
6774 	mblk_t *savemp = NULL;
6775 	mblk_t *savemptail = NULL;
6776 	int flags;
6777 	uint_t old_sd_flag;
6778 	int flg;
6779 	int more = 0;
6780 	int error = 0;
6781 	char first = 1;
6782 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6783 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6784 	unsigned char pri = 0;
6785 	queue_t *q;
6786 	int	pr = 0;			/* Partial read successful */
6787 	unsigned char type;
6788 
6789 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
6790 		"kstrgetmsg:%p", vp);
6791 
6792 	ASSERT(vp->v_stream);
6793 	stp = vp->v_stream;
6794 	rvp->r_val1 = 0;
6795 
6796 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6797 		if (error = straccess(stp, JCREAD))
6798 			return (error);
6799 
6800 	flags = *flagsp;
6801 	/* Fast check of flags before acquiring the lock */
6802 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6803 		if ((stp->sd_flag & STPLEX) ||
6804 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
6805 			mutex_enter(&stp->sd_lock);
6806 			error = strgeterr(stp, STRDERR|STPLEX,
6807 					(flags & MSG_IPEEK));
6808 			mutex_exit(&stp->sd_lock);
6809 			if (error != 0)
6810 				return (error);
6811 		}
6812 	}
6813 
6814 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
6815 	case MSG_HIPRI:
6816 		if (*prip != 0)
6817 			return (EINVAL);
6818 		break;
6819 
6820 	case MSG_ANY:
6821 	case MSG_BAND:
6822 		break;
6823 
6824 	default:
6825 		return (EINVAL);
6826 	}
6827 
6828 retry:
6829 	q = _RD(stp->sd_wrq);
6830 	mutex_enter(&stp->sd_lock);
6831 	old_sd_flag = stp->sd_flag;
6832 	mark = 0;
6833 	for (;;) {
6834 		int done = 0;
6835 		int waitflag;
6836 		int fmode;
6837 		mblk_t *q_first = q->q_first;
6838 
6839 		/*
6840 		 * This section of the code operates just like the code
6841 		 * in strgetmsg().  There is a comment there about what
6842 		 * is going on here.
6843 		 */
6844 		if (!(flags & (MSG_HIPRI|MSG_BAND))) {
6845 			/* Asking for normal, band0 data */
6846 			bp = strget(stp, q, uiop, first, &error);
6847 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6848 			if (bp != NULL) {
6849 				if (bp->b_datap->db_type == M_SIG) {
6850 					strsignal_nolock(stp, *bp->b_rptr,
6851 					    (int32_t)bp->b_band);
6852 					continue;
6853 				} else {
6854 					break;
6855 				}
6856 			}
6857 			if (error != 0) {
6858 				goto getmout;
6859 			}
6860 		/*
6861 		 * We can't depend on the value of STRPRI here because
6862 		 * the stream head may be in transit. Therefore, we
6863 		 * must look at the type of the first message to
6864 		 * determine if a high priority messages is waiting
6865 		 */
6866 		} else if ((flags & MSG_HIPRI) && q_first != NULL &&
6867 			    q_first->b_datap->db_type >= QPCTL &&
6868 			    (bp = getq_noenab(q)) != NULL) {
6869 			ASSERT(bp->b_datap->db_type >= QPCTL);
6870 			break;
6871 		} else if ((flags & MSG_BAND) && q_first != NULL &&
6872 			    ((q_first->b_band >= *prip) ||
6873 			    q_first->b_datap->db_type >= QPCTL) &&
6874 			    (bp = getq_noenab(q)) != NULL) {
6875 			/*
6876 			 * Asked for at least band "prip" and got either at
6877 			 * least that band or a hipri message.
6878 			 */
6879 			ASSERT(bp->b_band >= *prip ||
6880 				bp->b_datap->db_type >= QPCTL);
6881 			if (bp->b_datap->db_type == M_SIG) {
6882 				strsignal_nolock(stp, *bp->b_rptr,
6883 				    (int32_t)bp->b_band);
6884 				continue;
6885 			} else {
6886 				break;
6887 			}
6888 		}
6889 
6890 		/* No data. Time to sleep? */
6891 		qbackenable(q, 0);
6892 
6893 		/*
6894 		 * Delayed error notification?
6895 		 */
6896 		if ((stp->sd_flag & (STRDERR|STPLEX)) &&
6897 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
6898 			error = strgeterr(stp, STRDERR|STPLEX,
6899 					(flags & MSG_IPEEK));
6900 			if (error != 0) {
6901 				mutex_exit(&stp->sd_lock);
6902 				return (error);
6903 			}
6904 		}
6905 
6906 		/*
6907 		 * If STRHUP or STREOF, return 0 length control and data.
6908 		 * If a read(fd,buf,0) has been done, do not sleep, just
6909 		 * return.
6910 		 *
6911 		 * If mctlp == NULL and uiop == NULL, then the code will
6912 		 * do the strwaitq. This is an understood way of saying
6913 		 * sleep "polling" until a message is received.
6914 		 */
6915 		if ((stp->sd_flag & (STRHUP|STREOF)) ||
6916 		    (uiop != NULL && uiop->uio_resid == 0)) {
6917 			if (mctlp != NULL)
6918 				*mctlp = NULL;
6919 			*flagsp = 0;
6920 			mutex_exit(&stp->sd_lock);
6921 			return (0);
6922 		}
6923 
6924 		waitflag = GETWAIT;
6925 		if (flags &
6926 		    (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
6927 			if (flags & MSG_HOLDSIG)
6928 				waitflag |= STR_NOSIG;
6929 			if (flags & MSG_IGNERROR)
6930 				waitflag |= STR_NOERROR;
6931 			if (flags & MSG_IPEEK)
6932 				waitflag |= STR_PEEK;
6933 			if (flags & MSG_DELAYERROR)
6934 				waitflag |= STR_DELAYERR;
6935 		}
6936 		if (uiop != NULL)
6937 			fmode = uiop->uio_fmode;
6938 		else
6939 			fmode = 0;
6940 
6941 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
6942 			"kstrgetmsg calls strwaitq:%p, %p",
6943 			vp, uiop);
6944 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
6945 		    fmode, timout, &done)) != 0) || done) {
6946 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
6947 				"kstrgetmsg error or done:%p, %p",
6948 				vp, uiop);
6949 			mutex_exit(&stp->sd_lock);
6950 			return (error);
6951 		}
6952 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
6953 			"kstrgetmsg awakes:%p, %p", vp, uiop);
6954 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
6955 			mutex_exit(&stp->sd_lock);
6956 			if (error = straccess(stp, JCREAD))
6957 				return (error);
6958 			mutex_enter(&stp->sd_lock);
6959 		}
6960 		first = 0;
6961 	}
6962 	ASSERT(bp != NULL);
6963 	/*
6964 	 * Extract any mark information. If the message is not completely
6965 	 * consumed this information will be put in the mblk
6966 	 * that is putback.
6967 	 * If MSGMARKNEXT is set and the message is completely consumed
6968 	 * the STRATMARK flag will be set below. Likewise, if
6969 	 * MSGNOTMARKNEXT is set and the message is
6970 	 * completely consumed STRNOTATMARK will be set.
6971 	 */
6972 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6973 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6974 		(MSGMARKNEXT|MSGNOTMARKNEXT));
6975 	pri = bp->b_band;
6976 	if (mark != 0) {
6977 		/*
6978 		 * If the caller doesn't want the mark return.
6979 		 * Used to implement MSG_WAITALL in sockets.
6980 		 */
6981 		if (flags & MSG_NOMARK) {
6982 			putback(stp, q, bp, pri);
6983 			qbackenable(q, pri);
6984 			mutex_exit(&stp->sd_lock);
6985 			return (EWOULDBLOCK);
6986 		}
6987 		if (bp == stp->sd_mark) {
6988 			mark |= _LASTMARK;
6989 			stp->sd_mark = NULL;
6990 		}
6991 	}
6992 
6993 	/*
6994 	 * keep track of the first message type
6995 	 */
6996 	type = bp->b_datap->db_type;
6997 
6998 	if (bp->b_datap->db_type == M_PASSFP) {
6999 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7000 			stp->sd_mark = bp;
7001 		bp->b_flag |= mark & ~_LASTMARK;
7002 		putback(stp, q, bp, pri);
7003 		qbackenable(q, pri);
7004 		mutex_exit(&stp->sd_lock);
7005 		return (EBADMSG);
7006 	}
7007 	ASSERT(type != M_SIG);
7008 
7009 	if (flags & MSG_IPEEK) {
7010 		/*
7011 		 * Clear any struioflag - we do the uiomove over again
7012 		 * when peeking since it simplifies the code.
7013 		 *
7014 		 * Dup the message and put the original back on the queue.
7015 		 * If dupmsg() fails, try again with copymsg() to see if
7016 		 * there is indeed a shortage of memory.  dupmsg() may fail
7017 		 * if db_ref in any of the messages reaches its limit.
7018 		 */
7019 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7020 			/*
7021 			 * Restore the state of the stream head since we
7022 			 * need to drop sd_lock (strwaitbuf is sleeping).
7023 			 */
7024 			size_t size = msgdsize(bp);
7025 
7026 			if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7027 				stp->sd_mark = bp;
7028 			bp->b_flag |= mark & ~_LASTMARK;
7029 			putback(stp, q, bp, pri);
7030 			mutex_exit(&stp->sd_lock);
7031 			error = strwaitbuf(size, BPRI_HI);
7032 			if (error) {
7033 				/*
7034 				 * There is no net change to the queue thus
7035 				 * no need to qbackenable.
7036 				 */
7037 				return (error);
7038 			}
7039 			goto retry;
7040 		}
7041 
7042 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7043 			stp->sd_mark = bp;
7044 		bp->b_flag |= mark & ~_LASTMARK;
7045 		putback(stp, q, bp, pri);
7046 		bp = nbp;
7047 	}
7048 
7049 	/*
7050 	 * Set this flag so strrput will not generate signals. Need to
7051 	 * make sure this flag is cleared before leaving this routine
7052 	 * else signals will stop being sent.
7053 	 */
7054 	stp->sd_flag |= STRGETINPROG;
7055 	mutex_exit(&stp->sd_lock);
7056 
7057 	if (STREAM_NEEDSERVICE(stp))
7058 		stream_runservice(stp);
7059 
7060 	/*
7061 	 * Set HIPRI flag if message is priority.
7062 	 */
7063 	if (type >= QPCTL)
7064 		flg = MSG_HIPRI;
7065 	else
7066 		flg = MSG_BAND;
7067 
7068 	/*
7069 	 * First process PROTO or PCPROTO blocks, if any.
7070 	 */
7071 	if (mctlp != NULL && type != M_DATA) {
7072 		mblk_t *nbp;
7073 
7074 		*mctlp = bp;
7075 		while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7076 			bp = bp->b_cont;
7077 		nbp = bp->b_cont;
7078 		bp->b_cont = NULL;
7079 		bp = nbp;
7080 	}
7081 
7082 	if (bp && bp->b_datap->db_type != M_DATA) {
7083 		/*
7084 		 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7085 		 */
7086 		more |= MORECTL;
7087 		savemp = bp;
7088 		while (bp && bp->b_datap->db_type != M_DATA) {
7089 			savemptail = bp;
7090 			bp = bp->b_cont;
7091 		}
7092 		savemptail->b_cont = NULL;
7093 	}
7094 
7095 	/*
7096 	 * Now process DATA blocks, if any.
7097 	 */
7098 	if (uiop == NULL) {
7099 		/* Append data to tail of mctlp */
7100 		if (mctlp != NULL) {
7101 			mblk_t **mpp = mctlp;
7102 
7103 			while (*mpp != NULL)
7104 				mpp = &((*mpp)->b_cont);
7105 			*mpp = bp;
7106 			bp = NULL;
7107 		}
7108 	} else if (uiop->uio_resid >= 0 && bp) {
7109 		size_t oldresid = uiop->uio_resid;
7110 
7111 		/*
7112 		 * If a streams message is likely to consist
7113 		 * of many small mblks, it is pulled up into
7114 		 * one continuous chunk of memory.
7115 		 * see longer comment at top of page
7116 		 * by mblk_pull_len declaration.
7117 		 */
7118 
7119 		if (MBLKL(bp) < mblk_pull_len) {
7120 			(void) pullupmsg(bp, -1);
7121 		}
7122 
7123 		bp = struiocopyout(bp, uiop, &error);
7124 		if (error != 0) {
7125 			if (mctlp != NULL) {
7126 				freemsg(*mctlp);
7127 				*mctlp = NULL;
7128 			} else
7129 				freemsg(savemp);
7130 			mutex_enter(&stp->sd_lock);
7131 			/*
7132 			 * clear stream head hi pri flag based on
7133 			 * first message
7134 			 */
7135 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7136 				ASSERT(type == M_PCPROTO);
7137 				stp->sd_flag &= ~STRPRI;
7138 			}
7139 			more = 0;
7140 			goto getmout;
7141 		}
7142 		/*
7143 		 * (pr == 1) indicates a partial read.
7144 		 */
7145 		if (oldresid > uiop->uio_resid)
7146 			pr = 1;
7147 	}
7148 
7149 	if (bp) {			/* more data blocks in msg */
7150 		more |= MOREDATA;
7151 		if (savemp)
7152 			savemptail->b_cont = bp;
7153 		else
7154 			savemp = bp;
7155 	}
7156 
7157 	mutex_enter(&stp->sd_lock);
7158 	if (savemp) {
7159 		if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7160 			/*
7161 			 * When MSG_DISCARDTAIL is set or
7162 			 * when peeking discard any tail. When peeking this
7163 			 * is the tail of the dup that was copied out - the
7164 			 * message has already been putback on the queue.
7165 			 * Return MOREDATA to the caller even though the data
7166 			 * is discarded. This is used by sockets (to
7167 			 * set MSG_TRUNC).
7168 			 */
7169 			freemsg(savemp);
7170 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7171 				ASSERT(type == M_PCPROTO);
7172 				stp->sd_flag &= ~STRPRI;
7173 			}
7174 		} else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7175 			    msgnodata(savemp)) {
7176 			/*
7177 			 * Avoid queuing a zero-length tail part of
7178 			 * a message. pr=1 indicates that we read some of
7179 			 * the message.
7180 			 */
7181 			freemsg(savemp);
7182 			more &= ~MOREDATA;
7183 			if (type >= QPCTL) {
7184 				ASSERT(type == M_PCPROTO);
7185 				stp->sd_flag &= ~STRPRI;
7186 			}
7187 		} else {
7188 			savemp->b_band = pri;
7189 			/*
7190 			 * If the first message was HIPRI and the one we're
7191 			 * putting back isn't, then clear STRPRI, otherwise
7192 			 * set STRPRI again.  Note that we must set STRPRI
7193 			 * again since the flush logic in strrput_nondata()
7194 			 * may have cleared it while we had sd_lock dropped.
7195 			 */
7196 			if (type >= QPCTL) {
7197 				ASSERT(type == M_PCPROTO);
7198 				if (queclass(savemp) < QPCTL)
7199 					stp->sd_flag &= ~STRPRI;
7200 				else
7201 					stp->sd_flag |= STRPRI;
7202 			} else if (queclass(savemp) >= QPCTL) {
7203 				/*
7204 				 * The first message was not a HIPRI message,
7205 				 * but the one we are about to putback is.
7206 				 * For simplicitly, we do not allow for HIPRI
7207 				 * messages to be embedded in the message
7208 				 * body, so just force it to same type as
7209 				 * first message.
7210 				 */
7211 				ASSERT(type == M_DATA || type == M_PROTO);
7212 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7213 				savemp->b_datap->db_type = type;
7214 			}
7215 			if (mark != 0) {
7216 				if ((mark & _LASTMARK) &&
7217 				    (stp->sd_mark == NULL)) {
7218 					/*
7219 					 * If another marked message arrived
7220 					 * while sd_lock was not held sd_mark
7221 					 * would be non-NULL.
7222 					 */
7223 					stp->sd_mark = savemp;
7224 				}
7225 				savemp->b_flag |= mark & ~_LASTMARK;
7226 			}
7227 			putback(stp, q, savemp, pri);
7228 		}
7229 	} else if (!(flags & MSG_IPEEK)) {
7230 		/*
7231 		 * The complete message was consumed.
7232 		 *
7233 		 * If another M_PCPROTO arrived while sd_lock was not held
7234 		 * it would have been discarded since STRPRI was still set.
7235 		 *
7236 		 * Move the MSG*MARKNEXT information
7237 		 * to the stream head just in case
7238 		 * the read queue becomes empty.
7239 		 * clear stream head hi pri flag based on
7240 		 * first message
7241 		 *
7242 		 * If the stream head was at the mark
7243 		 * (STRATMARK) before we dropped sd_lock above
7244 		 * and some data was consumed then we have
7245 		 * moved past the mark thus STRATMARK is
7246 		 * cleared. However, if a message arrived in
7247 		 * strrput during the copyout above causing
7248 		 * STRATMARK to be set we can not clear that
7249 		 * flag.
7250 		 * XXX A "perimeter" would help by single-threading strrput,
7251 		 * strread, strgetmsg and kstrgetmsg.
7252 		 */
7253 		if (type >= QPCTL) {
7254 			ASSERT(type == M_PCPROTO);
7255 			stp->sd_flag &= ~STRPRI;
7256 		}
7257 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7258 			if (mark & MSGMARKNEXT) {
7259 				stp->sd_flag &= ~STRNOTATMARK;
7260 				stp->sd_flag |= STRATMARK;
7261 			} else if (mark & MSGNOTMARKNEXT) {
7262 				stp->sd_flag &= ~STRATMARK;
7263 				stp->sd_flag |= STRNOTATMARK;
7264 			} else {
7265 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7266 			}
7267 		} else if (pr && (old_sd_flag & STRATMARK)) {
7268 			stp->sd_flag &= ~STRATMARK;
7269 		}
7270 	}
7271 
7272 	*flagsp = flg;
7273 	*prip = pri;
7274 
7275 	/*
7276 	 * Getmsg cleanup processing - if the state of the queue has changed
7277 	 * some signals may need to be sent and/or poll awakened.
7278 	 */
7279 getmout:
7280 	qbackenable(q, pri);
7281 
7282 	/*
7283 	 * We dropped the stream head lock above. Send all M_SIG messages
7284 	 * before processing stream head for SIGPOLL messages.
7285 	 */
7286 	ASSERT(MUTEX_HELD(&stp->sd_lock));
7287 	while ((bp = q->q_first) != NULL &&
7288 	    (bp->b_datap->db_type == M_SIG)) {
7289 		/*
7290 		 * sd_lock is held so the content of the read queue can not
7291 		 * change.
7292 		 */
7293 		bp = getq(q);
7294 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7295 
7296 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
7297 		mutex_exit(&stp->sd_lock);
7298 		freemsg(bp);
7299 		if (STREAM_NEEDSERVICE(stp))
7300 			stream_runservice(stp);
7301 		mutex_enter(&stp->sd_lock);
7302 	}
7303 
7304 	/*
7305 	 * stream head cannot change while we make the determination
7306 	 * whether or not to send a signal. Drop the flag to allow strrput
7307 	 * to send firstmsgsigs again.
7308 	 */
7309 	stp->sd_flag &= ~STRGETINPROG;
7310 
7311 	/*
7312 	 * If the type of message at the front of the queue changed
7313 	 * due to the receive the appropriate signals and pollwakeup events
7314 	 * are generated. The type of changes are:
7315 	 *	Processed a hipri message, q_first is not hipri.
7316 	 *	Processed a band X message, and q_first is band Y.
7317 	 * The generated signals and pollwakeups are identical to what
7318 	 * strrput() generates should the message that is now on q_first
7319 	 * arrive to an empty read queue.
7320 	 *
7321 	 * Note: only strrput will send a signal for a hipri message.
7322 	 */
7323 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7324 		strsigset_t signals = 0;
7325 		strpollset_t pollwakeups = 0;
7326 
7327 		if (flg & MSG_HIPRI) {
7328 			/*
7329 			 * Removed a hipri message. Regular data at
7330 			 * the front of  the queue.
7331 			 */
7332 			if (bp->b_band == 0) {
7333 				signals = S_INPUT | S_RDNORM;
7334 				pollwakeups = POLLIN | POLLRDNORM;
7335 			} else {
7336 				signals = S_INPUT | S_RDBAND;
7337 				pollwakeups = POLLIN | POLLRDBAND;
7338 			}
7339 		} else if (pri != bp->b_band) {
7340 			/*
7341 			 * The band is different for the new q_first.
7342 			 */
7343 			if (bp->b_band == 0) {
7344 				signals = S_RDNORM;
7345 				pollwakeups = POLLIN | POLLRDNORM;
7346 			} else {
7347 				signals = S_RDBAND;
7348 				pollwakeups = POLLIN | POLLRDBAND;
7349 			}
7350 		}
7351 
7352 		if (pollwakeups != 0) {
7353 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
7354 				if (!(stp->sd_rput_opt & SR_POLLIN))
7355 					goto no_pollwake;
7356 				stp->sd_rput_opt &= ~SR_POLLIN;
7357 			}
7358 			mutex_exit(&stp->sd_lock);
7359 			pollwakeup(&stp->sd_pollist, pollwakeups);
7360 			mutex_enter(&stp->sd_lock);
7361 		}
7362 no_pollwake:
7363 
7364 		if (stp->sd_sigflags & signals)
7365 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7366 	}
7367 	mutex_exit(&stp->sd_lock);
7368 
7369 	rvp->r_val1 = more;
7370 	return (error);
7371 #undef	_LASTMARK
7372 }
7373 
7374 /*
7375  * Put a message downstream.
7376  *
7377  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7378  */
7379 int
7380 strputmsg(
7381 	struct vnode *vp,
7382 	struct strbuf *mctl,
7383 	struct strbuf *mdata,
7384 	unsigned char pri,
7385 	int flag,
7386 	int fmode)
7387 {
7388 	struct stdata *stp;
7389 	queue_t *wqp;
7390 	mblk_t *mp;
7391 	ssize_t msgsize;
7392 	ssize_t rmin, rmax;
7393 	int error;
7394 	struct uio uios;
7395 	struct uio *uiop = &uios;
7396 	struct iovec iovs;
7397 	int xpg4 = 0;
7398 
7399 	ASSERT(vp->v_stream);
7400 	stp = vp->v_stream;
7401 	wqp = stp->sd_wrq;
7402 
7403 	/*
7404 	 * If it is an XPG4 application, we need to send
7405 	 * SIGPIPE below
7406 	 */
7407 
7408 	xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7409 	flag &= ~MSG_XPG4;
7410 
7411 #ifdef C2_AUDIT
7412 	if (audit_active)
7413 		audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7414 #endif
7415 
7416 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7417 		if (error = straccess(stp, JCWRITE))
7418 			return (error);
7419 
7420 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7421 		mutex_enter(&stp->sd_lock);
7422 		error = strwriteable(stp, B_FALSE, xpg4);
7423 		mutex_exit(&stp->sd_lock);
7424 		if (error != 0)
7425 			return (error);
7426 	}
7427 
7428 	/*
7429 	 * Check for legal flag value.
7430 	 */
7431 	switch (flag) {
7432 	case MSG_HIPRI:
7433 		if ((mctl->len < 0) || (pri != 0))
7434 			return (EINVAL);
7435 		break;
7436 	case MSG_BAND:
7437 		break;
7438 
7439 	default:
7440 		return (EINVAL);
7441 	}
7442 
7443 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7444 		"strputmsg in:stp %p", stp);
7445 
7446 	/* get these values from those cached in the stream head */
7447 	rmin = stp->sd_qn_minpsz;
7448 	rmax = stp->sd_qn_maxpsz;
7449 
7450 	/*
7451 	 * Make sure ctl and data sizes together fall within the
7452 	 * limits of the max and min receive packet sizes and do
7453 	 * not exceed system limit.
7454 	 */
7455 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7456 	if (rmax == 0) {
7457 		return (ERANGE);
7458 	}
7459 	/*
7460 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7461 	 * Needed to prevent partial failures in the strmakedata loop.
7462 	 */
7463 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7464 		rmax = stp->sd_maxblk;
7465 
7466 	if ((msgsize = mdata->len) < 0) {
7467 		msgsize = 0;
7468 		rmin = 0;	/* no range check for NULL data part */
7469 	}
7470 	if ((msgsize < rmin) ||
7471 	    ((msgsize > rmax) && (rmax != INFPSZ)) ||
7472 	    (mctl->len > strctlsz)) {
7473 		return (ERANGE);
7474 	}
7475 
7476 	/*
7477 	 * Setup uio and iov for data part
7478 	 */
7479 	iovs.iov_base = mdata->buf;
7480 	iovs.iov_len = msgsize;
7481 	uios.uio_iov = &iovs;
7482 	uios.uio_iovcnt = 1;
7483 	uios.uio_loffset = 0;
7484 	uios.uio_segflg = UIO_USERSPACE;
7485 	uios.uio_fmode = fmode;
7486 	uios.uio_extflg = UIO_COPY_DEFAULT;
7487 	uios.uio_resid = msgsize;
7488 	uios.uio_offset = 0;
7489 
7490 	/* Ignore flow control in strput for HIPRI */
7491 	if (flag & MSG_HIPRI)
7492 		flag |= MSG_IGNFLOW;
7493 
7494 	for (;;) {
7495 		int done = 0;
7496 
7497 		/*
7498 		 * strput will always free the ctl mblk - even when strput
7499 		 * fails.
7500 		 */
7501 		if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7502 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7503 				"strputmsg out:stp %p out %d error %d",
7504 				stp, 1, error);
7505 			return (error);
7506 		}
7507 		/*
7508 		 * Verify that the whole message can be transferred by
7509 		 * strput.
7510 		 */
7511 		ASSERT(stp->sd_maxblk == INFPSZ ||
7512 			stp->sd_maxblk >= mdata->len);
7513 
7514 		msgsize = mdata->len;
7515 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7516 		mdata->len = msgsize;
7517 
7518 		if (error == 0)
7519 			break;
7520 
7521 		if (error != EWOULDBLOCK)
7522 			goto out;
7523 
7524 		mutex_enter(&stp->sd_lock);
7525 		/*
7526 		 * Check for a missed wakeup.
7527 		 * Needed since strput did not hold sd_lock across
7528 		 * the canputnext.
7529 		 */
7530 		if (bcanputnext(wqp, pri)) {
7531 			/* Try again */
7532 			mutex_exit(&stp->sd_lock);
7533 			continue;
7534 		}
7535 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7536 			"strputmsg wait:stp %p waits pri %d", stp, pri);
7537 		if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7538 		    &done)) != 0) || done) {
7539 			mutex_exit(&stp->sd_lock);
7540 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7541 				"strputmsg out:q %p out %d error %d",
7542 				stp, 0, error);
7543 			return (error);
7544 		}
7545 		TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7546 			"strputmsg wake:stp %p wakes", stp);
7547 		mutex_exit(&stp->sd_lock);
7548 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7549 			if (error = straccess(stp, JCWRITE))
7550 				return (error);
7551 	}
7552 out:
7553 	/*
7554 	 * For historic reasons, applications expect EAGAIN
7555 	 * when data mblk could not be allocated. so change
7556 	 * ENOMEM back to EAGAIN
7557 	 */
7558 	if (error == ENOMEM)
7559 		error = EAGAIN;
7560 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7561 		"strputmsg out:stp %p out %d error %d", stp, 2, error);
7562 	return (error);
7563 }
7564 
7565 /*
7566  * Put a message downstream.
7567  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7568  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7569  * and the fmode parameter.
7570  *
7571  * This routine handles the consolidation private flags:
7572  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
7573  *	MSG_HOLDSIG	Hold signals while waiting for data.
7574  *	MSG_IGNFLOW	Don't check streams flow control.
7575  *
7576  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7577  */
7578 int
7579 kstrputmsg(
7580 	struct vnode *vp,
7581 	mblk_t *mctl,
7582 	struct uio *uiop,
7583 	ssize_t msgsize,
7584 	unsigned char pri,
7585 	int flag,
7586 	int fmode)
7587 {
7588 	struct stdata *stp;
7589 	queue_t *wqp;
7590 	ssize_t rmin, rmax;
7591 	int error;
7592 
7593 	ASSERT(vp->v_stream);
7594 	stp = vp->v_stream;
7595 	wqp = stp->sd_wrq;
7596 #ifdef C2_AUDIT
7597 	if (audit_active)
7598 		audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
7599 #endif
7600 	if (mctl == NULL)
7601 		return (EINVAL);
7602 
7603 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7604 		if (error = straccess(stp, JCWRITE)) {
7605 			freemsg(mctl);
7606 			return (error);
7607 		}
7608 	}
7609 
7610 	if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
7611 		if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7612 			mutex_enter(&stp->sd_lock);
7613 			error = strwriteable(stp, B_FALSE, B_TRUE);
7614 			mutex_exit(&stp->sd_lock);
7615 			if (error != 0) {
7616 				freemsg(mctl);
7617 				return (error);
7618 			}
7619 		}
7620 	}
7621 
7622 	/*
7623 	 * Check for legal flag value.
7624 	 */
7625 	switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
7626 	case MSG_HIPRI:
7627 		if (pri != 0) {
7628 			freemsg(mctl);
7629 			return (EINVAL);
7630 		}
7631 		break;
7632 	case MSG_BAND:
7633 		break;
7634 	default:
7635 		freemsg(mctl);
7636 		return (EINVAL);
7637 	}
7638 
7639 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
7640 		"kstrputmsg in:stp %p", stp);
7641 
7642 	/* get these values from those cached in the stream head */
7643 	rmin = stp->sd_qn_minpsz;
7644 	rmax = stp->sd_qn_maxpsz;
7645 
7646 	/*
7647 	 * Make sure ctl and data sizes together fall within the
7648 	 * limits of the max and min receive packet sizes and do
7649 	 * not exceed system limit.
7650 	 */
7651 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7652 	if (rmax == 0) {
7653 		freemsg(mctl);
7654 		return (ERANGE);
7655 	}
7656 	/*
7657 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7658 	 * Needed to prevent partial failures in the strmakedata loop.
7659 	 */
7660 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7661 		rmax = stp->sd_maxblk;
7662 
7663 	if (uiop == NULL) {
7664 		msgsize = -1;
7665 		rmin = -1;	/* no range check for NULL data part */
7666 	} else {
7667 		/* Use uio flags as well as the fmode parameter flags */
7668 		fmode |= uiop->uio_fmode;
7669 
7670 		if ((msgsize < rmin) ||
7671 		    ((msgsize > rmax) && (rmax != INFPSZ))) {
7672 			freemsg(mctl);
7673 			return (ERANGE);
7674 		}
7675 	}
7676 
7677 	/* Ignore flow control in strput for HIPRI */
7678 	if (flag & MSG_HIPRI)
7679 		flag |= MSG_IGNFLOW;
7680 
7681 	for (;;) {
7682 		int done = 0;
7683 		int waitflag;
7684 		mblk_t *mp;
7685 
7686 		/*
7687 		 * strput will always free the ctl mblk - even when strput
7688 		 * fails. If MSG_IGNFLOW is set then any error returned
7689 		 * will cause us to break the loop, so we don't need a copy
7690 		 * of the message. If MSG_IGNFLOW is not set, then we can
7691 		 * get hit by flow control and be forced to try again. In
7692 		 * this case we need to have a copy of the message. We
7693 		 * do this using copymsg since the message may get modified
7694 		 * by something below us.
7695 		 *
7696 		 * We've observed that many TPI providers do not check db_ref
7697 		 * on the control messages but blindly reuse them for the
7698 		 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
7699 		 * friendly to such providers than using dupmsg. Also, note
7700 		 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
7701 		 * Only data messages are subject to flow control, hence
7702 		 * subject to this copymsg.
7703 		 */
7704 		if (flag & MSG_IGNFLOW) {
7705 			mp = mctl;
7706 			mctl = NULL;
7707 		} else {
7708 			do {
7709 				/*
7710 				 * If a message has a free pointer, the message
7711 				 * must be dupmsg to maintain this pointer.
7712 				 * Code using this facility must be sure
7713 				 * that modules below will not change the
7714 				 * contents of the dblk without checking db_ref
7715 				 * first. If db_ref is > 1, then the module
7716 				 * needs to do a copymsg first. Otherwise,
7717 				 * the contents of the dblk may become
7718 				 * inconsistent because the freesmg/freeb below
7719 				 * may end up calling atomic_add_32_nv.
7720 				 * The atomic_add_32_nv in freeb (accessing
7721 				 * all of db_ref, db_type, db_flags, and
7722 				 * db_struioflag) does not prevent other threads
7723 				 * from concurrently trying to modify e.g.
7724 				 * db_type.
7725 				 */
7726 				if (mctl->b_datap->db_frtnp != NULL)
7727 					mp = dupmsg(mctl);
7728 				else
7729 					mp = copymsg(mctl);
7730 
7731 				if (mp != NULL)
7732 					break;
7733 
7734 				error = strwaitbuf(msgdsize(mctl), BPRI_MED);
7735 				if (error) {
7736 					freemsg(mctl);
7737 					return (error);
7738 				}
7739 			} while (mp == NULL);
7740 		}
7741 		/*
7742 		 * Verify that all of msgsize can be transferred by
7743 		 * strput.
7744 		 */
7745 		ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
7746 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7747 		if (error == 0)
7748 			break;
7749 
7750 		if (error != EWOULDBLOCK)
7751 			goto out;
7752 
7753 		/*
7754 		 * IF MSG_IGNFLOW is set we should have broken out of loop
7755 		 * above.
7756 		 */
7757 		ASSERT(!(flag & MSG_IGNFLOW));
7758 		mutex_enter(&stp->sd_lock);
7759 		/*
7760 		 * Check for a missed wakeup.
7761 		 * Needed since strput did not hold sd_lock across
7762 		 * the canputnext.
7763 		 */
7764 		if (bcanputnext(wqp, pri)) {
7765 			/* Try again */
7766 			mutex_exit(&stp->sd_lock);
7767 			continue;
7768 		}
7769 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
7770 			"kstrputmsg wait:stp %p waits pri %d", stp, pri);
7771 
7772 		waitflag = WRITEWAIT;
7773 		if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
7774 			if (flag & MSG_HOLDSIG)
7775 				waitflag |= STR_NOSIG;
7776 			if (flag & MSG_IGNERROR)
7777 				waitflag |= STR_NOERROR;
7778 		}
7779 		if (((error = strwaitq(stp, waitflag,
7780 		    (ssize_t)0, fmode, -1, &done)) != 0) || done) {
7781 			mutex_exit(&stp->sd_lock);
7782 			TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7783 				"kstrputmsg out:stp %p out %d error %d",
7784 				stp, 0, error);
7785 			freemsg(mctl);
7786 			return (error);
7787 		}
7788 		TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
7789 			"kstrputmsg wake:stp %p wakes", stp);
7790 		mutex_exit(&stp->sd_lock);
7791 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7792 			if (error = straccess(stp, JCWRITE)) {
7793 				freemsg(mctl);
7794 				return (error);
7795 			}
7796 		}
7797 	}
7798 out:
7799 	freemsg(mctl);
7800 	/*
7801 	 * For historic reasons, applications expect EAGAIN
7802 	 * when data mblk could not be allocated. so change
7803 	 * ENOMEM back to EAGAIN
7804 	 */
7805 	if (error == ENOMEM)
7806 		error = EAGAIN;
7807 	TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7808 		"kstrputmsg out:stp %p out %d error %d", stp, 2, error);
7809 	return (error);
7810 }
7811 
7812 /*
7813  * Determines whether the necessary conditions are set on a stream
7814  * for it to be readable, writeable, or have exceptions.
7815  *
7816  * strpoll handles the consolidation private events:
7817  *	POLLNOERR	Do not return POLLERR even if there are stream
7818  *			head errors.
7819  *			Used by sockfs.
7820  *	POLLRDDATA	Do not return POLLIN unless at least one message on
7821  *			the queue contains one or more M_DATA mblks. Thus
7822  *			when this flag is set a queue with only
7823  *			M_PROTO/M_PCPROTO mblks does not return POLLIN.
7824  *			Used by sockfs to ignore T_EXDATA_IND messages.
7825  *
7826  * Note: POLLRDDATA assumes that synch streams only return messages with
7827  * an M_DATA attached (i.e. not messages consisting of only
7828  * an M_PROTO/M_PCPROTO part).
7829  */
7830 int
7831 strpoll(
7832 	struct stdata *stp,
7833 	short events_arg,
7834 	int anyyet,
7835 	short *reventsp,
7836 	struct pollhead **phpp)
7837 {
7838 	int events = (ushort_t)events_arg;
7839 	int retevents = 0;
7840 	mblk_t *mp;
7841 	qband_t *qbp;
7842 	long sd_flags = stp->sd_flag;
7843 	int headlocked = 0;
7844 
7845 	/*
7846 	 * For performance, a single 'if' tests for most possible edge
7847 	 * conditions in one shot
7848 	 */
7849 	if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
7850 		if (sd_flags & STPLEX) {
7851 			*reventsp = POLLNVAL;
7852 			return (EINVAL);
7853 		}
7854 		if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
7855 		    (sd_flags & STRDERR)) ||
7856 		    ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
7857 		    (sd_flags & STWRERR))) {
7858 			if (!(events & POLLNOERR)) {
7859 				*reventsp = POLLERR;
7860 				return (0);
7861 			}
7862 		}
7863 	}
7864 	if (sd_flags & STRHUP) {
7865 		retevents |= POLLHUP;
7866 	} else if (events & (POLLWRNORM | POLLWRBAND)) {
7867 		queue_t *tq;
7868 		queue_t	*qp = stp->sd_wrq;
7869 
7870 		claimstr(qp);
7871 		/* Find next module forward that has a service procedure */
7872 		tq = qp->q_next->q_nfsrv;
7873 		ASSERT(tq != NULL);
7874 
7875 		polllock(&stp->sd_pollist, QLOCK(tq));
7876 		if (events & POLLWRNORM) {
7877 			queue_t *sqp;
7878 
7879 			if (tq->q_flag & QFULL)
7880 				/* ensure backq svc procedure runs */
7881 				tq->q_flag |= QWANTW;
7882 			else if ((sqp = stp->sd_struiowrq) != NULL) {
7883 				/* Check sync stream barrier write q */
7884 				mutex_exit(QLOCK(tq));
7885 				polllock(&stp->sd_pollist, QLOCK(sqp));
7886 				if (sqp->q_flag & QFULL)
7887 					/* ensure pollwakeup() is done */
7888 					sqp->q_flag |= QWANTWSYNC;
7889 				else
7890 					retevents |= POLLOUT;
7891 				/* More write events to process ??? */
7892 				if (! (events & POLLWRBAND)) {
7893 					mutex_exit(QLOCK(sqp));
7894 					releasestr(qp);
7895 					goto chkrd;
7896 				}
7897 				mutex_exit(QLOCK(sqp));
7898 				polllock(&stp->sd_pollist, QLOCK(tq));
7899 			} else
7900 				retevents |= POLLOUT;
7901 		}
7902 		if (events & POLLWRBAND) {
7903 			qbp = tq->q_bandp;
7904 			if (qbp) {
7905 				while (qbp) {
7906 					if (qbp->qb_flag & QB_FULL)
7907 						qbp->qb_flag |= QB_WANTW;
7908 					else
7909 						retevents |= POLLWRBAND;
7910 					qbp = qbp->qb_next;
7911 				}
7912 			} else {
7913 				retevents |= POLLWRBAND;
7914 			}
7915 		}
7916 		mutex_exit(QLOCK(tq));
7917 		releasestr(qp);
7918 	}
7919 chkrd:
7920 	if (sd_flags & STRPRI) {
7921 		retevents |= (events & POLLPRI);
7922 	} else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
7923 		queue_t	*qp = _RD(stp->sd_wrq);
7924 		int normevents = (events & (POLLIN | POLLRDNORM));
7925 
7926 		/*
7927 		 * Note: Need to do polllock() here since ps_lock may be
7928 		 * held. See bug 4191544.
7929 		 */
7930 		polllock(&stp->sd_pollist, &stp->sd_lock);
7931 		headlocked = 1;
7932 		mp = qp->q_first;
7933 		while (mp) {
7934 			/*
7935 			 * For POLLRDDATA we scan b_cont and b_next until we
7936 			 * find an M_DATA.
7937 			 */
7938 			if ((events & POLLRDDATA) &&
7939 			    mp->b_datap->db_type != M_DATA) {
7940 				mblk_t *nmp = mp->b_cont;
7941 
7942 				while (nmp != NULL &&
7943 				    nmp->b_datap->db_type != M_DATA)
7944 					nmp = nmp->b_cont;
7945 				if (nmp == NULL) {
7946 					mp = mp->b_next;
7947 					continue;
7948 				}
7949 			}
7950 			if (mp->b_band == 0)
7951 				retevents |= normevents;
7952 			else
7953 				retevents |= (events & (POLLIN | POLLRDBAND));
7954 			break;
7955 		}
7956 		if (! (retevents & normevents) &&
7957 		    (stp->sd_wakeq & RSLEEP)) {
7958 			/*
7959 			 * Sync stream barrier read queue has data.
7960 			 */
7961 			retevents |= normevents;
7962 		}
7963 		/* Treat eof as normal data */
7964 		if (sd_flags & STREOF)
7965 			retevents |= normevents;
7966 	}
7967 
7968 	*reventsp = (short)retevents;
7969 	if (retevents) {
7970 		if (headlocked)
7971 			mutex_exit(&stp->sd_lock);
7972 		return (0);
7973 	}
7974 
7975 	/*
7976 	 * If poll() has not found any events yet, set up event cell
7977 	 * to wake up the poll if a requested event occurs on this
7978 	 * stream.  Check for collisions with outstanding poll requests.
7979 	 */
7980 	if (!anyyet) {
7981 		*phpp = &stp->sd_pollist;
7982 		if (headlocked == 0) {
7983 			polllock(&stp->sd_pollist, &stp->sd_lock);
7984 			headlocked = 1;
7985 		}
7986 		stp->sd_rput_opt |= SR_POLLIN;
7987 	}
7988 	if (headlocked)
7989 		mutex_exit(&stp->sd_lock);
7990 	return (0);
7991 }
7992 
7993 /*
7994  * The purpose of putback() is to assure sleeping polls/reads
7995  * are awakened when there are no new messages arriving at the,
7996  * stream head, and a message is placed back on the read queue.
7997  *
7998  * sd_lock must be held when messages are placed back on stream
7999  * head.  (getq() holds sd_lock when it removes messages from
8000  * the queue)
8001  */
8002 
8003 static void
8004 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8005 {
8006 	ASSERT(MUTEX_HELD(&stp->sd_lock));
8007 	(void) putbq(q, bp);
8008 	/*
8009 	 * A message may have come in when the sd_lock was dropped in the
8010 	 * calling routine. If this is the case and STR*ATMARK info was
8011 	 * received, need to move that from the stream head to the q_last
8012 	 * so that SIOCATMARK can return the proper value.
8013 	 */
8014 	if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8015 		unsigned short *flagp = &q->q_last->b_flag;
8016 		uint_t b_flag = (uint_t)*flagp;
8017 
8018 		if (stp->sd_flag & STRATMARK) {
8019 			b_flag &= ~MSGNOTMARKNEXT;
8020 			b_flag |= MSGMARKNEXT;
8021 			stp->sd_flag &= ~STRATMARK;
8022 		} else {
8023 			b_flag &= ~MSGMARKNEXT;
8024 			b_flag |= MSGNOTMARKNEXT;
8025 			stp->sd_flag &= ~STRNOTATMARK;
8026 		}
8027 		*flagp = (unsigned short) b_flag;
8028 	}
8029 
8030 #ifdef	DEBUG
8031 	/*
8032 	 * Make sure that the flags are not messed up.
8033 	 */
8034 	{
8035 		mblk_t *mp;
8036 		mp = q->q_last;
8037 		while (mp != NULL) {
8038 			ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8039 			    (MSGMARKNEXT|MSGNOTMARKNEXT));
8040 			mp = mp->b_cont;
8041 		}
8042 	}
8043 #endif
8044 	if (q->q_first == bp) {
8045 		short pollevents;
8046 
8047 		if (stp->sd_flag & RSLEEP) {
8048 			stp->sd_flag &= ~RSLEEP;
8049 			cv_broadcast(&q->q_wait);
8050 		}
8051 		if (stp->sd_flag & STRPRI) {
8052 			pollevents = POLLPRI;
8053 		} else {
8054 			if (band == 0) {
8055 				if (!(stp->sd_rput_opt & SR_POLLIN))
8056 					return;
8057 				stp->sd_rput_opt &= ~SR_POLLIN;
8058 				pollevents = POLLIN | POLLRDNORM;
8059 			} else {
8060 				pollevents = POLLIN | POLLRDBAND;
8061 			}
8062 		}
8063 		mutex_exit(&stp->sd_lock);
8064 		pollwakeup(&stp->sd_pollist, pollevents);
8065 		mutex_enter(&stp->sd_lock);
8066 	}
8067 }
8068 
8069 /*
8070  * Return the held vnode attached to the stream head of a
8071  * given queue
8072  * It is the responsibility of the calling routine to ensure
8073  * that the queue does not go away (e.g. pop).
8074  */
8075 vnode_t *
8076 strq2vp(queue_t *qp)
8077 {
8078 	vnode_t *vp;
8079 	vp = STREAM(qp)->sd_vnode;
8080 	ASSERT(vp != NULL);
8081 	VN_HOLD(vp);
8082 	return (vp);
8083 }
8084 
8085 /*
8086  * return the stream head write queue for the given vp
8087  * It is the responsibility of the calling routine to ensure
8088  * that the stream or vnode do not close.
8089  */
8090 queue_t *
8091 strvp2wq(vnode_t *vp)
8092 {
8093 	ASSERT(vp->v_stream != NULL);
8094 	return (vp->v_stream->sd_wrq);
8095 }
8096 
8097 /*
8098  * pollwakeup stream head
8099  * It is the responsibility of the calling routine to ensure
8100  * that the stream or vnode do not close.
8101  */
8102 void
8103 strpollwakeup(vnode_t *vp, short event)
8104 {
8105 	ASSERT(vp->v_stream);
8106 	pollwakeup(&vp->v_stream->sd_pollist, event);
8107 }
8108 
8109 /*
8110  * Mate the stream heads of two vnodes together. If the two vnodes are the
8111  * same, we just make the write-side point at the read-side -- otherwise,
8112  * we do a full mate.  Only works on vnodes associated with streams that are
8113  * still being built and thus have only a stream head.
8114  */
8115 void
8116 strmate(vnode_t *vp1, vnode_t *vp2)
8117 {
8118 	queue_t *wrq1 = strvp2wq(vp1);
8119 	queue_t *wrq2 = strvp2wq(vp2);
8120 
8121 	/*
8122 	 * Verify that there are no modules on the stream yet.  We also
8123 	 * rely on the stream head always having a service procedure to
8124 	 * avoid tweaking q_nfsrv.
8125 	 */
8126 	ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8127 	ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8128 	ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8129 
8130 	/*
8131 	 * If the queues are the same, just twist; otherwise do a full mate.
8132 	 */
8133 	if (wrq1 == wrq2) {
8134 		wrq1->q_next = _RD(wrq1);
8135 	} else {
8136 		wrq1->q_next = _RD(wrq2);
8137 		wrq2->q_next = _RD(wrq1);
8138 		STREAM(wrq1)->sd_mate = STREAM(wrq2);
8139 		STREAM(wrq1)->sd_flag |= STRMATE;
8140 		STREAM(wrq2)->sd_mate = STREAM(wrq1);
8141 		STREAM(wrq2)->sd_flag |= STRMATE;
8142 	}
8143 }
8144 
8145 /*
8146  * XXX will go away when console is correctly fixed.
8147  * Clean up the console PIDS, from previous I_SETSIG,
8148  * called only for cnopen which never calls strclean().
8149  */
8150 void
8151 str_cn_clean(struct vnode *vp)
8152 {
8153 	strsig_t *ssp, *pssp, *tssp;
8154 	struct stdata *stp;
8155 	struct pid  *pidp;
8156 	int update = 0;
8157 
8158 	ASSERT(vp->v_stream);
8159 	stp = vp->v_stream;
8160 	pssp = NULL;
8161 	mutex_enter(&stp->sd_lock);
8162 	ssp = stp->sd_siglist;
8163 	while (ssp) {
8164 		mutex_enter(&pidlock);
8165 		pidp = ssp->ss_pidp;
8166 		/*
8167 		 * Get rid of PID if the proc is gone.
8168 		 */
8169 		if (pidp->pid_prinactive) {
8170 			tssp = ssp->ss_next;
8171 			if (pssp)
8172 				pssp->ss_next = tssp;
8173 			else
8174 				stp->sd_siglist = tssp;
8175 			ASSERT(pidp->pid_ref <= 1);
8176 			PID_RELE(ssp->ss_pidp);
8177 			mutex_exit(&pidlock);
8178 			kmem_free(ssp, sizeof (strsig_t));
8179 			update = 1;
8180 			ssp = tssp;
8181 			continue;
8182 		} else
8183 			mutex_exit(&pidlock);
8184 		pssp = ssp;
8185 		ssp = ssp->ss_next;
8186 	}
8187 	if (update) {
8188 		stp->sd_sigflags = 0;
8189 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8190 			stp->sd_sigflags |= ssp->ss_events;
8191 	}
8192 	mutex_exit(&stp->sd_lock);
8193 }
8194 
8195 /*
8196  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8197  */
8198 static boolean_t
8199 msghasdata(mblk_t *bp)
8200 {
8201 	for (; bp; bp = bp->b_cont)
8202 		if (bp->b_datap->db_type == M_DATA) {
8203 			ASSERT(bp->b_wptr >= bp->b_rptr);
8204 			if (bp->b_wptr > bp->b_rptr)
8205 				return (B_TRUE);
8206 		}
8207 	return (B_FALSE);
8208 }
8209