xref: /titanic_44/usr/src/uts/common/os/streamio.c (revision ace1a5f11236a072fca1b5e0ea1416a083a9f2aa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/signal.h>
38 #include <sys/stat.h>
39 #include <sys/proc.h>
40 #include <sys/cred.h>
41 #include <sys/user.h>
42 #include <sys/vnode.h>
43 #include <sys/file.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/stropts.h>
47 #include <sys/tihdr.h>
48 #include <sys/var.h>
49 #include <sys/poll.h>
50 #include <sys/termio.h>
51 #include <sys/ttold.h>
52 #include <sys/systm.h>
53 #include <sys/uio.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sad.h>
56 #include <sys/priocntl.h>
57 #include <sys/jioctl.h>
58 #include <sys/procset.h>
59 #include <sys/session.h>
60 #include <sys/kmem.h>
61 #include <sys/filio.h>
62 #include <sys/vtrace.h>
63 #include <sys/debug.h>
64 #include <sys/strredir.h>
65 #include <sys/fs/fifonode.h>
66 #include <sys/fs/snode.h>
67 #include <sys/strlog.h>
68 #include <sys/strsun.h>
69 #include <sys/project.h>
70 #include <sys/kbio.h>
71 #include <sys/msio.h>
72 #include <sys/tty.h>
73 #include <sys/ptyvar.h>
74 #include <sys/vuid_event.h>
75 #include <sys/modctl.h>
76 #include <sys/sunddi.h>
77 #include <sys/sunldi_impl.h>
78 #include <sys/autoconf.h>
79 #include <sys/policy.h>
80 
81 /*
82  * what is mblk_pull_len?
83  *
84  * If a streams message consists of many short messages,
85  * a performance degradation occurs from copyout overhead.
86  * To decrease the per mblk overhead, messages that are
87  * likely to consist of many small mblks are pulled up into
88  * one continuous chunk of memory.
89  *
90  * To avoid the processing overhead of examining every
91  * mblk, a quick heuristic is used. If the first mblk in
92  * the message is shorter than mblk_pull_len, it is likely
93  * that the rest of the mblk will be short.
94  *
95  * This heuristic was decided upon after performance tests
96  * indicated that anything more complex slowed down the main
97  * code path.
98  */
99 #define	MBLK_PULL_LEN 64
100 uint32_t mblk_pull_len = MBLK_PULL_LEN;
101 
102 /*
103  * The sgttyb_handling flag controls the handling of the old BSD
104  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
105  *
106  * 0 - Emit no warnings at all and retain old, broken behavior.
107  * 1 - Emit no warnings and silently handle new semantics.
108  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
109  *     (once per system invocation).  Handle with new semantics.
110  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
111  *     made (so that offenders drop core and are easy to debug).
112  *
113  * The "new semantics" are that TIOCGETP returns B38400 for
114  * sg_[io]speed if the corresponding value is over B38400, and that
115  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
116  * bit rate."
117  */
118 int sgttyb_handling = 1;
119 static boolean_t sgttyb_complaint;
120 
121 /* don't push drcompat module by default on Style-2 streams */
122 static int push_drcompat = 0;
123 
124 /*
125  * id value used to distinguish between different ioctl messages
126  */
127 static uint32_t ioc_id;
128 
129 static void putback(struct stdata *, queue_t *, mblk_t *, int);
130 static void strcleanall(struct vnode *);
131 static int strwsrv(queue_t *);
132 
133 /*
134  * qinit and module_info structures for stream head read and write queues
135  */
136 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
137 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
138 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
139 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
140 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
141     FIFOLOWAT };
142 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
143 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
144 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
145 
146 extern kmutex_t	strresources;	/* protects global resources */
147 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
148 kmutex_t sad_lock;		/* protects sad drivers autopush */
149 
150 static boolean_t msghasdata(mblk_t *bp);
151 #define	msgnodata(bp) (!msghasdata(bp))
152 
153 /*
154  * Stream head locking notes:
155  *	There are four monitors associated with the stream head:
156  *	1. v_stream monitor: in stropen() and strclose() v_lock
157  *		is held while the association of vnode and stream
158  *		head is established or tested for.
159  *	2. open/close/push/pop monitor: sd_lock is held while each
160  *		thread bids for exclusive access to this monitor
161  *		for opening or closing a stream.  In addition, this
162  *		monitor is entered during pushes and pops.  This
163  *		guarantees that during plumbing operations there
164  *		is only one thread trying to change the plumbing.
165  *		Any other threads present in the stream are only
166  *		using the plumbing.
167  *	3. read/write monitor: in the case of read, a thread holds
168  *		sd_lock while trying to get data from the stream
169  *		head queue.  if there is none to fulfill a read
170  *		request, it sets RSLEEP and calls cv_wait_sig() down
171  *		in strwaitq() to await the arrival of new data.
172  *		when new data arrives in strrput(), sd_lock is acquired
173  *		before testing for RSLEEP and calling cv_broadcast().
174  *		the behavior of strwrite(), strwsrv(), and WSLEEP
175  *		mirror this.
176  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
177  *		thread is doing an ioctl at a time.
178  */
179 
180 static int
181 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
182     int anchor, cred_t *crp)
183 {
184 	int error;
185 	fmodsw_impl_t *fp;
186 
187 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
188 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
189 		return (error);
190 	}
191 	if (stp->sd_pushcnt >= nstrpush) {
192 		return (EINVAL);
193 	}
194 
195 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
196 		stp->sd_flag |= STREOPENFAIL;
197 		return (EINVAL);
198 	}
199 
200 	/*
201 	 * push new module and call its open routine via qattach
202 	 */
203 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
204 		return (error);
205 
206 	/*
207 	 * Check to see if caller wants a STREAMS anchor
208 	 * put at this place in the stream, and add if so.
209 	 */
210 	mutex_enter(&stp->sd_lock);
211 	if (anchor == stp->sd_pushcnt)
212 		stp->sd_anchor = stp->sd_pushcnt;
213 	mutex_exit(&stp->sd_lock);
214 
215 	return (0);
216 }
217 
218 /*
219  * Open a stream device.
220  */
221 int
222 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
223 {
224 	struct stdata *stp;
225 	queue_t *qp;
226 	int s;
227 	dev_t dummydev;
228 	struct autopush *ap;
229 	int error = 0;
230 	ssize_t	rmin, rmax;
231 	int cloneopen;
232 	queue_t *brq;
233 	major_t major;
234 
235 #ifdef C2_AUDIT
236 	if (audit_active)
237 		audit_stropen(vp, devp, flag, crp);
238 #endif
239 
240 	/*
241 	 * If the stream already exists, wait for any open in progress
242 	 * to complete, then call the open function of each module and
243 	 * driver in the stream.  Otherwise create the stream.
244 	 */
245 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
246 retry:
247 	mutex_enter(&vp->v_lock);
248 	if ((stp = vp->v_stream) != NULL) {
249 
250 		/*
251 		 * Waiting for stream to be created to device
252 		 * due to another open.
253 		 */
254 	    mutex_exit(&vp->v_lock);
255 
256 	    if (STRMATED(stp)) {
257 		struct stdata *strmatep = stp->sd_mate;
258 
259 		STRLOCKMATES(stp);
260 		if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
261 			if (flag & (FNDELAY|FNONBLOCK)) {
262 				error = EAGAIN;
263 				mutex_exit(&strmatep->sd_lock);
264 				goto ckreturn;
265 			}
266 			mutex_exit(&stp->sd_lock);
267 			if (!cv_wait_sig(&strmatep->sd_monitor,
268 			    &strmatep->sd_lock)) {
269 				error = EINTR;
270 				mutex_exit(&strmatep->sd_lock);
271 				mutex_enter(&stp->sd_lock);
272 				goto ckreturn;
273 			}
274 			mutex_exit(&strmatep->sd_lock);
275 			goto retry;
276 		}
277 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
278 			if (flag & (FNDELAY|FNONBLOCK)) {
279 				error = EAGAIN;
280 				mutex_exit(&strmatep->sd_lock);
281 				goto ckreturn;
282 			}
283 			mutex_exit(&strmatep->sd_lock);
284 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
285 				error = EINTR;
286 				goto ckreturn;
287 			}
288 			mutex_exit(&stp->sd_lock);
289 			goto retry;
290 		}
291 
292 		if (stp->sd_flag & (STRDERR|STWRERR)) {
293 			error = EIO;
294 			mutex_exit(&strmatep->sd_lock);
295 			goto ckreturn;
296 		}
297 
298 		stp->sd_flag |= STWOPEN;
299 		STRUNLOCKMATES(stp);
300 	    } else {
301 		mutex_enter(&stp->sd_lock);
302 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
303 			if (flag & (FNDELAY|FNONBLOCK)) {
304 				error = EAGAIN;
305 				goto ckreturn;
306 			}
307 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
308 				error = EINTR;
309 				goto ckreturn;
310 			}
311 			mutex_exit(&stp->sd_lock);
312 			goto retry;  /* could be clone! */
313 		}
314 
315 		if (stp->sd_flag & (STRDERR|STWRERR)) {
316 			error = EIO;
317 			goto ckreturn;
318 		}
319 
320 		stp->sd_flag |= STWOPEN;
321 		mutex_exit(&stp->sd_lock);
322 	    }
323 
324 		/*
325 		 * Open all modules and devices down stream to notify
326 		 * that another user is streaming.  For modules, set the
327 		 * last argument to MODOPEN and do not pass any open flags.
328 		 * Ignore dummydev since this is not the first open.
329 		 */
330 	    claimstr(stp->sd_wrq);
331 	    qp = stp->sd_wrq;
332 	    while (_SAMESTR(qp)) {
333 		qp = qp->q_next;
334 		if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
335 			break;
336 	    }
337 	    releasestr(stp->sd_wrq);
338 	    mutex_enter(&stp->sd_lock);
339 	    stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
340 	    stp->sd_rerror = 0;
341 	    stp->sd_werror = 0;
342 ckreturn:
343 	    cv_broadcast(&stp->sd_monitor);
344 	    mutex_exit(&stp->sd_lock);
345 	    return (error);
346 	}
347 
348 	/*
349 	 * This vnode isn't streaming.  SPECFS already
350 	 * checked for multiple vnodes pointing to the
351 	 * same stream, so create a stream to the driver.
352 	 */
353 	qp = allocq();
354 	stp = shalloc(qp);
355 
356 	/*
357 	 * Initialize stream head.  shalloc() has given us
358 	 * exclusive access, and we have the vnode locked;
359 	 * we can do whatever we want with stp.
360 	 */
361 	stp->sd_flag = STWOPEN;
362 	stp->sd_siglist = NULL;
363 	stp->sd_pollist.ph_list = NULL;
364 	stp->sd_sigflags = 0;
365 	stp->sd_mark = NULL;
366 	stp->sd_closetime = STRTIMOUT;
367 	stp->sd_sidp = NULL;
368 	stp->sd_pgidp = NULL;
369 	stp->sd_vnode = vp;
370 	stp->sd_rerror = 0;
371 	stp->sd_werror = 0;
372 	stp->sd_wroff = 0;
373 	stp->sd_iocblk = NULL;
374 	stp->sd_pushcnt = 0;
375 	stp->sd_qn_minpsz = 0;
376 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
377 	stp->sd_maxblk = INFPSZ;
378 	qp->q_ptr = _WR(qp)->q_ptr = stp;
379 	STREAM(qp) = STREAM(_WR(qp)) = stp;
380 	vp->v_stream = stp;
381 	mutex_exit(&vp->v_lock);
382 	if (vp->v_type == VFIFO) {
383 		stp->sd_flag |= OLDNDELAY;
384 		/*
385 		 * This means, both for pipes and fifos
386 		 * strwrite will send SIGPIPE if the other
387 		 * end is closed. For putmsg it depends
388 		 * on whether it is a XPG4_2 application
389 		 * or not
390 		 */
391 		stp->sd_wput_opt = SW_SIGPIPE;
392 
393 		/* setq might sleep in kmem_alloc - avoid holding locks. */
394 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
395 		    SQ_CI|SQ_CO, B_FALSE);
396 
397 		set_qend(qp);
398 		stp->sd_strtab = fifo_getinfo();
399 		_WR(qp)->q_nfsrv = _WR(qp);
400 		qp->q_nfsrv = qp;
401 		/*
402 		 * Wake up others that are waiting for stream to be created.
403 		 */
404 		mutex_enter(&stp->sd_lock);
405 		/*
406 		 * nothing is be pushed on stream yet, so
407 		 * optimized stream head packetsizes are just that
408 		 * of the read queue
409 		 */
410 		stp->sd_qn_minpsz = qp->q_minpsz;
411 		stp->sd_qn_maxpsz = qp->q_maxpsz;
412 		stp->sd_flag &= ~STWOPEN;
413 		goto fifo_opendone;
414 	}
415 	/* setq might sleep in kmem_alloc - avoid holding locks. */
416 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
417 
418 	set_qend(qp);
419 
420 	/*
421 	 * Open driver and create stream to it (via qattach).
422 	 */
423 	cloneopen = (getmajor(*devp) == clone_major);
424 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
425 		mutex_enter(&vp->v_lock);
426 		vp->v_stream = NULL;
427 		mutex_exit(&vp->v_lock);
428 		mutex_enter(&stp->sd_lock);
429 		cv_broadcast(&stp->sd_monitor);
430 		mutex_exit(&stp->sd_lock);
431 		freeq(_RD(qp));
432 		shfree(stp);
433 		return (error);
434 	}
435 	/*
436 	 * Set sd_strtab after open in order to handle clonable drivers
437 	 */
438 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
439 
440 	/*
441 	 * Historical note: dummydev used to be be prior to the initial
442 	 * open (via qattach above), which made the value seen
443 	 * inconsistent between an I_PUSH and an autopush of a module.
444 	 */
445 	dummydev = *devp;
446 
447 	/*
448 	 * For clone open of old style (Q not associated) network driver,
449 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
450 	 */
451 	brq = _RD(_WR(qp)->q_next);
452 	major = getmajor(*devp);
453 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
454 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
455 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp) != 0)
456 			cmn_err(CE_WARN, "cannot push " DRMODNAME
457 			    " streams module");
458 	}
459 
460 	/*
461 	 * check for autopush
462 	 */
463 	mutex_enter(&sad_lock);
464 	ap = strphash(getemajor(*devp));
465 #define	DEVT(ap)	makedevice(ap->ap_major, ap->ap_minor)
466 #define	DEVLT(ap)	makedevice(ap->ap_major, ap->ap_lastminor)
467 
468 	while (ap) {
469 		if (ap->ap_major == (getemajor(*devp))) {
470 			if (ap->ap_type == SAP_ALL)
471 				break;
472 			else if ((ap->ap_type == SAP_ONE) &&
473 			    (getminor(DEVT(ap)) == getminor(*devp)))
474 				break;
475 			else if (ap->ap_type == SAP_RANGE &&
476 			    getminor(*devp) >= getminor(DEVT(ap)) &&
477 			    getminor(*devp) <= getminor(DEVLT(ap)))
478 				break;
479 		}
480 		ap = ap->ap_nextp;
481 	}
482 	if (ap == NULL) {
483 		mutex_exit(&sad_lock);
484 		goto opendone;
485 	}
486 	ap->ap_cnt++;
487 	mutex_exit(&sad_lock);
488 	for (s = 0; s < ap->ap_npush; s++) {
489 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
490 		    ap->ap_anchor, crp);
491 		if (error != 0)
492 			break;
493 	}
494 	mutex_enter(&sad_lock);
495 	if (--(ap->ap_cnt) <= 0)
496 		ap_free(ap);
497 	mutex_exit(&sad_lock);
498 
499 	/*
500 	 * let specfs know that open failed part way through
501 	 */
502 
503 	if (error) {
504 		mutex_enter(&stp->sd_lock);
505 		stp->sd_flag |= STREOPENFAIL;
506 		mutex_exit(&stp->sd_lock);
507 	}
508 
509 opendone:
510 
511 	/*
512 	 * Wake up others that are waiting for stream to be created.
513 	 */
514 	mutex_enter(&stp->sd_lock);
515 	stp->sd_flag &= ~STWOPEN;
516 
517 	/*
518 	 * As a performance concern we are caching the values of
519 	 * q_minpsz and q_maxpsz of the module below the stream
520 	 * head in the stream head.
521 	 */
522 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
523 	rmin = stp->sd_wrq->q_next->q_minpsz;
524 	rmax = stp->sd_wrq->q_next->q_maxpsz;
525 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
526 
527 	/* do this processing here as a performance concern */
528 	if (strmsgsz != 0) {
529 		if (rmax == INFPSZ)
530 			rmax = strmsgsz;
531 		else
532 			rmax = MIN(strmsgsz, rmax);
533 	}
534 
535 	mutex_enter(QLOCK(stp->sd_wrq));
536 	stp->sd_qn_minpsz = rmin;
537 	stp->sd_qn_maxpsz = rmax;
538 	mutex_exit(QLOCK(stp->sd_wrq));
539 
540 fifo_opendone:
541 	cv_broadcast(&stp->sd_monitor);
542 	mutex_exit(&stp->sd_lock);
543 	return (error);
544 }
545 
546 static int strsink(queue_t *, mblk_t *);
547 static struct qinit deadrend = {
548 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
549 };
550 static struct qinit deadwend = {
551 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
552 };
553 
554 /*
555  * Close a stream.
556  * This is called from closef() on the last close of an open stream.
557  * Strclean() will already have removed the siglist and pollist
558  * information, so all that remains is to remove all multiplexor links
559  * for the stream, pop all the modules (and the driver), and free the
560  * stream structure.
561  */
562 
563 int
564 strclose(struct vnode *vp, int flag, cred_t *crp)
565 {
566 	struct stdata *stp;
567 	queue_t *qp;
568 	int rval;
569 	int freestp = 1;
570 	queue_t *rmq;
571 
572 #ifdef C2_AUDIT
573 	if (audit_active)
574 		audit_strclose(vp, flag, crp);
575 #endif
576 
577 	TRACE_1(TR_FAC_STREAMS_FR,
578 		TR_STRCLOSE, "strclose:%p", vp);
579 	ASSERT(vp->v_stream);
580 
581 	stp = vp->v_stream;
582 	ASSERT(!(stp->sd_flag & STPLEX));
583 	qp = stp->sd_wrq;
584 
585 	/*
586 	 * Needed so that strpoll will return non-zero for this fd.
587 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
588 	 */
589 	mutex_enter(&stp->sd_lock);
590 	stp->sd_flag |= STRHUP;
591 	mutex_exit(&stp->sd_lock);
592 
593 	/*
594 	 * Since we call pollwakeup in close() now, the poll list should
595 	 * be empty in most cases. The only exception is the layered devices
596 	 * (e.g. the console drivers with redirection modules pushed on top
597 	 * of it).
598 	 */
599 	if (stp->sd_pollist.ph_list != NULL) {
600 		pollwakeup(&stp->sd_pollist, POLLERR);
601 		pollhead_clean(&stp->sd_pollist);
602 	}
603 	ASSERT(stp->sd_pollist.ph_list == NULL);
604 	ASSERT(stp->sd_sidp == NULL);
605 	ASSERT(stp->sd_pgidp == NULL);
606 
607 	/*
608 	 * If the registered process or process group did not have an
609 	 * open instance of this stream then strclean would not be
610 	 * called. Thus at the time of closing all remaining siglist entries
611 	 * are removed.
612 	 */
613 	if (stp->sd_siglist != NULL)
614 		strcleanall(vp);
615 
616 	ASSERT(stp->sd_siglist == NULL);
617 	ASSERT(stp->sd_sigflags == 0);
618 
619 	if (STRMATED(stp)) {
620 		struct stdata *strmatep = stp->sd_mate;
621 		int waited = 1;
622 
623 		STRLOCKMATES(stp);
624 		while (waited) {
625 			waited = 0;
626 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
627 				mutex_exit(&strmatep->sd_lock);
628 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
629 				mutex_exit(&stp->sd_lock);
630 				STRLOCKMATES(stp);
631 				waited = 1;
632 			}
633 			while (strmatep->sd_flag &
634 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
635 				mutex_exit(&stp->sd_lock);
636 				cv_wait(&strmatep->sd_monitor,
637 				    &strmatep->sd_lock);
638 				mutex_exit(&strmatep->sd_lock);
639 				STRLOCKMATES(stp);
640 				waited = 1;
641 			}
642 		}
643 		stp->sd_flag |= STRCLOSE;
644 		STRUNLOCKMATES(stp);
645 	} else {
646 		mutex_enter(&stp->sd_lock);
647 		stp->sd_flag |= STRCLOSE;
648 		mutex_exit(&stp->sd_lock);
649 	}
650 
651 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
652 
653 	/* Check if an I_LINK was ever done on this stream */
654 	if (stp->sd_flag & STRHASLINKS) {
655 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval);
656 	}
657 
658 	while (_SAMESTR(qp)) {
659 		/*
660 		 * Holding sd_lock prevents q_next from changing in
661 		 * this stream.
662 		 */
663 		mutex_enter(&stp->sd_lock);
664 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
665 
666 			/*
667 			 * sleep until awakened by strwsrv() or timeout
668 			 */
669 			for (;;) {
670 				mutex_enter(QLOCK(qp->q_next));
671 				if (!(qp->q_next->q_mblkcnt)) {
672 					mutex_exit(QLOCK(qp->q_next));
673 					break;
674 				}
675 				stp->sd_flag |= WSLEEP;
676 
677 				/* ensure strwsrv gets enabled */
678 				qp->q_next->q_flag |= QWANTW;
679 				mutex_exit(QLOCK(qp->q_next));
680 				/* get out if we timed out or recv'd a signal */
681 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
682 				    stp->sd_closetime, 0) <= 0) {
683 					break;
684 				}
685 			}
686 			stp->sd_flag &= ~WSLEEP;
687 		}
688 		mutex_exit(&stp->sd_lock);
689 
690 		rmq = qp->q_next;
691 		if (rmq->q_flag & QISDRV) {
692 			ASSERT(!_SAMESTR(rmq));
693 			wait_sq_svc(_RD(qp)->q_syncq);
694 		}
695 
696 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
697 	}
698 
699 	/* Prevent qenable from re-enabling the stream head queue */
700 	disable_svc(_RD(qp));
701 
702 	/*
703 	 * Wait until service procedure of each queue is
704 	 * run, if QINSERVICE is set.
705 	 */
706 	wait_svc(_RD(qp));
707 
708 	/*
709 	 * Now, flush both queues.
710 	 */
711 	flushq(_RD(qp), FLUSHALL);
712 	flushq(qp, FLUSHALL);
713 
714 	/*
715 	 * If the write queue of the stream head is pointing to a
716 	 * read queue, we have a twisted stream.  If the read queue
717 	 * is alive, convert the stream head queues into a dead end.
718 	 * If the read queue is dead, free the dead pair.
719 	 */
720 	if (qp->q_next && !_SAMESTR(qp)) {
721 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
722 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
723 			shfree(qp->q_next->q_stream);
724 			freeq(qp->q_next);
725 			freeq(_RD(qp));
726 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
727 			freeq(_RD(qp));
728 		} else {				/* pipe */
729 			freestp = 0;
730 			/*
731 			 * The q_info pointers are never accessed when
732 			 * SQLOCK is held.
733 			 */
734 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
735 			mutex_enter(SQLOCK(qp->q_syncq));
736 			qp->q_qinfo = &deadwend;
737 			_RD(qp)->q_qinfo = &deadrend;
738 			mutex_exit(SQLOCK(qp->q_syncq));
739 		}
740 	} else {
741 		freeq(_RD(qp)); /* free stream head queue pair */
742 	}
743 
744 	mutex_enter(&vp->v_lock);
745 	if (stp->sd_iocblk) {
746 		if (stp->sd_iocblk != (mblk_t *)-1) {
747 			freemsg(stp->sd_iocblk);
748 		}
749 		stp->sd_iocblk = NULL;
750 	}
751 	stp->sd_vnode = NULL;
752 	vp->v_stream = NULL;
753 	mutex_exit(&vp->v_lock);
754 	mutex_enter(&stp->sd_lock);
755 	stp->sd_flag &= ~STRCLOSE;
756 	cv_broadcast(&stp->sd_monitor);
757 	mutex_exit(&stp->sd_lock);
758 
759 	if (freestp)
760 		shfree(stp);
761 	return (0);
762 }
763 
764 static int
765 strsink(queue_t *q, mblk_t *bp)
766 {
767 	struct copyresp *resp;
768 
769 	switch (bp->b_datap->db_type) {
770 	case M_FLUSH:
771 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
772 			*bp->b_rptr &= ~FLUSHR;
773 			bp->b_flag |= MSGNOLOOP;
774 			/*
775 			 * Protect against the driver passing up
776 			 * messages after it has done a qprocsoff.
777 			 */
778 			if (_OTHERQ(q)->q_next == NULL)
779 				freemsg(bp);
780 			else
781 				qreply(q, bp);
782 		} else {
783 			freemsg(bp);
784 		}
785 		break;
786 
787 	case M_COPYIN:
788 	case M_COPYOUT:
789 		if (bp->b_cont) {
790 			freemsg(bp->b_cont);
791 			bp->b_cont = NULL;
792 		}
793 		bp->b_datap->db_type = M_IOCDATA;
794 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
795 		resp = (struct copyresp *)bp->b_rptr;
796 		resp->cp_rval = (caddr_t)1;	/* failure */
797 		/*
798 		 * Protect against the driver passing up
799 		 * messages after it has done a qprocsoff.
800 		 */
801 		if (_OTHERQ(q)->q_next == NULL)
802 			freemsg(bp);
803 		else
804 			qreply(q, bp);
805 		break;
806 
807 	case M_IOCTL:
808 		if (bp->b_cont) {
809 			freemsg(bp->b_cont);
810 			bp->b_cont = NULL;
811 		}
812 		bp->b_datap->db_type = M_IOCNAK;
813 		/*
814 		 * Protect against the driver passing up
815 		 * messages after it has done a qprocsoff.
816 		 */
817 		if (_OTHERQ(q)->q_next == NULL)
818 			freemsg(bp);
819 		else
820 			qreply(q, bp);
821 		break;
822 
823 	default:
824 		freemsg(bp);
825 		break;
826 	}
827 
828 	return (0);
829 }
830 
831 /*
832  * Clean up after a process when it closes a stream.  This is called
833  * from closef for all closes, whereas strclose is called only for the
834  * last close on a stream.  The siglist is scanned for entries for the
835  * current process, and these are removed.
836  */
837 void
838 strclean(struct vnode *vp)
839 {
840 	strsig_t *ssp, *pssp, *tssp;
841 	stdata_t *stp;
842 	int update = 0;
843 
844 	TRACE_1(TR_FAC_STREAMS_FR,
845 		TR_STRCLEAN, "strclean:%p", vp);
846 	stp = vp->v_stream;
847 	pssp = NULL;
848 	mutex_enter(&stp->sd_lock);
849 	ssp = stp->sd_siglist;
850 	while (ssp) {
851 		if (ssp->ss_pidp == curproc->p_pidp) {
852 			tssp = ssp->ss_next;
853 			if (pssp)
854 				pssp->ss_next = tssp;
855 			else
856 				stp->sd_siglist = tssp;
857 			mutex_enter(&pidlock);
858 			PID_RELE(ssp->ss_pidp);
859 			mutex_exit(&pidlock);
860 			kmem_free(ssp, sizeof (strsig_t));
861 			update = 1;
862 			ssp = tssp;
863 		} else {
864 			pssp = ssp;
865 			ssp = ssp->ss_next;
866 		}
867 	}
868 	if (update) {
869 		stp->sd_sigflags = 0;
870 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
871 			stp->sd_sigflags |= ssp->ss_events;
872 	}
873 	mutex_exit(&stp->sd_lock);
874 }
875 
876 /*
877  * Used on the last close to remove any remaining items on the siglist.
878  * These could be present on the siglist due to I_ESETSIG calls that
879  * use process groups or processed that do not have an open file descriptor
880  * for this stream (Such entries would not be removed by strclean).
881  */
882 static void
883 strcleanall(struct vnode *vp)
884 {
885 	strsig_t *ssp, *nssp;
886 	stdata_t *stp;
887 
888 	stp = vp->v_stream;
889 	mutex_enter(&stp->sd_lock);
890 	ssp = stp->sd_siglist;
891 	stp->sd_siglist = NULL;
892 	while (ssp) {
893 		nssp = ssp->ss_next;
894 		mutex_enter(&pidlock);
895 		PID_RELE(ssp->ss_pidp);
896 		mutex_exit(&pidlock);
897 		kmem_free(ssp, sizeof (strsig_t));
898 		ssp = nssp;
899 	}
900 	stp->sd_sigflags = 0;
901 	mutex_exit(&stp->sd_lock);
902 }
903 
904 /*
905  * Retrieve the next message from the logical stream head read queue
906  * using either rwnext (if sync stream) or getq_noenab.
907  * It is the callers responsibility to call qbackenable after
908  * it is finished with the message. The caller should not call
909  * qbackenable until after any putback calls to avoid spurious backenabling.
910  */
911 mblk_t *
912 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
913     int *errorp)
914 {
915 	mblk_t *bp;
916 	int error;
917 
918 	ASSERT(MUTEX_HELD(&stp->sd_lock));
919 	/* Holding sd_lock prevents the read queue from changing  */
920 
921 	if (uiop != NULL && stp->sd_struiordq != NULL &&
922 	    q->q_first == NULL &&
923 	    (!first || (stp->sd_wakeq & RSLEEP))) {
924 		/*
925 		 * Stream supports rwnext() for the read side.
926 		 * If this is the first time we're called by e.g. strread
927 		 * only do the downcall if there is a deferred wakeup
928 		 * (registered in sd_wakeq).
929 		 */
930 		struiod_t uiod;
931 
932 		if (first)
933 			stp->sd_wakeq &= ~RSLEEP;
934 
935 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
936 			sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
937 		uiod.d_mp = 0;
938 		/*
939 		 * Mark that a thread is in rwnext on the read side
940 		 * to prevent strrput from nacking ioctls immediately.
941 		 * When the last concurrent rwnext returns
942 		 * the ioctls are nack'ed.
943 		 */
944 		ASSERT(MUTEX_HELD(&stp->sd_lock));
945 		stp->sd_struiodnak++;
946 		/*
947 		 * Note: rwnext will drop sd_lock.
948 		 */
949 		error = rwnext(q, &uiod);
950 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
951 		mutex_enter(&stp->sd_lock);
952 		stp->sd_struiodnak--;
953 		while (stp->sd_struiodnak == 0 &&
954 		    ((bp = stp->sd_struionak) != NULL)) {
955 			stp->sd_struionak = bp->b_next;
956 			bp->b_next = NULL;
957 			bp->b_datap->db_type = M_IOCNAK;
958 			/*
959 			 * Protect against the driver passing up
960 			 * messages after it has done a qprocsoff.
961 			 */
962 			if (_OTHERQ(q)->q_next == NULL)
963 				freemsg(bp);
964 			else {
965 				mutex_exit(&stp->sd_lock);
966 				qreply(q, bp);
967 				mutex_enter(&stp->sd_lock);
968 			}
969 		}
970 		ASSERT(MUTEX_HELD(&stp->sd_lock));
971 		if (error == 0 || error == EWOULDBLOCK) {
972 			if ((bp = uiod.d_mp) != NULL) {
973 				*errorp = 0;
974 				ASSERT(MUTEX_HELD(&stp->sd_lock));
975 				return (bp);
976 			}
977 			error = 0;
978 		} else if (error == EINVAL) {
979 			/*
980 			 * The stream plumbing must have
981 			 * changed while we were away, so
982 			 * just turn off rwnext()s.
983 			 */
984 			error = 0;
985 		} else if (error == EBUSY) {
986 			/*
987 			 * The module might have data in transit using putnext
988 			 * Fall back on waiting + getq.
989 			 */
990 			error = 0;
991 		} else {
992 			*errorp = error;
993 			ASSERT(MUTEX_HELD(&stp->sd_lock));
994 			return (NULL);
995 		}
996 		/*
997 		 * Try a getq in case a rwnext() generated mblk
998 		 * has bubbled up via strrput().
999 		 */
1000 	}
1001 	*errorp = 0;
1002 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1003 	return (getq_noenab(q));
1004 }
1005 
1006 /*
1007  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1008  * If the message does not fit in the uio the remainder of it is returned;
1009  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1010  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1011  * the error code, the message is consumed, and NULL is returned.
1012  */
1013 static mblk_t *
1014 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1015 {
1016 	int error;
1017 	ptrdiff_t n;
1018 	mblk_t *nbp;
1019 
1020 	ASSERT(bp->b_wptr >= bp->b_rptr);
1021 
1022 	do {
1023 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1024 			ASSERT(n > 0);
1025 
1026 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1027 			if (error != 0) {
1028 				freemsg(bp);
1029 				*errorp = error;
1030 				return (NULL);
1031 			}
1032 		}
1033 
1034 		bp->b_rptr += n;
1035 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1036 			nbp = bp;
1037 			bp = bp->b_cont;
1038 			freeb(nbp);
1039 		}
1040 	} while (bp != NULL && uiop->uio_resid > 0);
1041 
1042 	*errorp = 0;
1043 	return (bp);
1044 }
1045 
1046 /*
1047  * Read a stream according to the mode flags in sd_flag:
1048  *
1049  * (default mode)		- Byte stream, msg boundaries are ignored
1050  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
1051  *				any data remaining in msg
1052  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1053  *				any remaining data on head of read queue
1054  *
1055  * Consume readable messages on the front of the queue until
1056  * ttolwp(curthread)->lwp_count
1057  * is satisfied, the readable messages are exhausted, or a message
1058  * boundary is reached in a message mode.  If no data was read and
1059  * the stream was not opened with the NDELAY flag, block until data arrives.
1060  * Otherwise return the data read and update the count.
1061  *
1062  * In default mode a 0 length message signifies end-of-file and terminates
1063  * a read in progress.  The 0 length message is removed from the queue
1064  * only if it is the only message read (no data is read).
1065  *
1066  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1067  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1068  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1069  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1070  * are unlinked from and M_DATA blocks in the message, the protos are
1071  * thrown away, and the data is read.
1072  */
1073 /* ARGSUSED */
1074 int
1075 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1076 {
1077 	struct stdata *stp;
1078 	mblk_t *bp, *nbp;
1079 	queue_t *q;
1080 	int error = 0;
1081 	uint_t old_sd_flag;
1082 	int first;
1083 	char rflg;
1084 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
1085 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
1086 	short delim;
1087 	unsigned char pri = 0;
1088 	char waitflag;
1089 	unsigned char type;
1090 
1091 	TRACE_1(TR_FAC_STREAMS_FR,
1092 		TR_STRREAD_ENTER, "strread:%p", vp);
1093 	ASSERT(vp->v_stream);
1094 	stp = vp->v_stream;
1095 
1096 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
1097 		if (error = straccess(stp, JCREAD))
1098 			return (error);
1099 
1100 	mutex_enter(&stp->sd_lock);
1101 	if (stp->sd_flag & (STRDERR|STPLEX)) {
1102 		error = strgeterr(stp, STRDERR|STPLEX, 0);
1103 		if (error != 0) {
1104 			mutex_exit(&stp->sd_lock);
1105 			return (error);
1106 		}
1107 	}
1108 
1109 	/*
1110 	 * Loop terminates when uiop->uio_resid == 0.
1111 	 */
1112 	rflg = 0;
1113 	waitflag = READWAIT;
1114 	q = _RD(stp->sd_wrq);
1115 	for (;;) {
1116 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1117 		old_sd_flag = stp->sd_flag;
1118 		mark = 0;
1119 		delim = 0;
1120 		first = 1;
1121 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1122 			int done = 0;
1123 
1124 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1125 
1126 			if (error != 0)
1127 				goto oops;
1128 
1129 			if (stp->sd_flag & (STRHUP|STREOF)) {
1130 				goto oops;
1131 			}
1132 			if (rflg && !(stp->sd_flag & STRDELIM)) {
1133 				goto oops;
1134 			}
1135 			/*
1136 			 * If a read(fd,buf,0) has been done, there is no
1137 			 * need to sleep. We always have zero bytes to
1138 			 * return.
1139 			 */
1140 			if (uiop->uio_resid == 0) {
1141 				goto oops;
1142 			}
1143 
1144 			qbackenable(q, 0);
1145 
1146 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1147 				"strread calls strwaitq:%p, %p, %p",
1148 				vp, uiop, crp);
1149 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1150 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
1151 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1152 					"strread error or done:%p, %p, %p",
1153 					vp, uiop, crp);
1154 				if ((uiop->uio_fmode & FNDELAY) &&
1155 				    (stp->sd_flag & OLDNDELAY) &&
1156 				    (error == EAGAIN))
1157 					error = 0;
1158 				goto oops;
1159 			}
1160 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1161 				"strread awakes:%p, %p, %p", vp, uiop, crp);
1162 			if (stp->sd_sidp != NULL &&
1163 			    stp->sd_vnode->v_type != VFIFO) {
1164 				mutex_exit(&stp->sd_lock);
1165 				if (error = straccess(stp, JCREAD))
1166 					goto oops1;
1167 				mutex_enter(&stp->sd_lock);
1168 			}
1169 			first = 0;
1170 		}
1171 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1172 		ASSERT(bp);
1173 		pri = bp->b_band;
1174 		/*
1175 		 * Extract any mark information. If the message is not
1176 		 * completely consumed this information will be put in the mblk
1177 		 * that is putback.
1178 		 * If MSGMARKNEXT is set and the message is completely consumed
1179 		 * the STRATMARK flag will be set below. Likewise, if
1180 		 * MSGNOTMARKNEXT is set and the message is
1181 		 * completely consumed STRNOTATMARK will be set.
1182 		 *
1183 		 * For some unknown reason strread only breaks the read at the
1184 		 * last mark.
1185 		 */
1186 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1187 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1188 			(MSGMARKNEXT|MSGNOTMARKNEXT));
1189 		if (mark != 0 && bp == stp->sd_mark) {
1190 			if (rflg) {
1191 				putback(stp, q, bp, pri);
1192 				goto oops;
1193 			}
1194 			mark |= _LASTMARK;
1195 			stp->sd_mark = NULL;
1196 		}
1197 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1198 			delim = 1;
1199 		mutex_exit(&stp->sd_lock);
1200 
1201 		if (STREAM_NEEDSERVICE(stp))
1202 			stream_runservice(stp);
1203 
1204 		type = bp->b_datap->db_type;
1205 
1206 		switch (type) {
1207 
1208 		case M_DATA:
1209 ismdata:
1210 			if (msgnodata(bp)) {
1211 				if (mark || delim) {
1212 					freemsg(bp);
1213 				} else if (rflg) {
1214 
1215 					/*
1216 					 * If already read data put zero
1217 					 * length message back on queue else
1218 					 * free msg and return 0.
1219 					 */
1220 					bp->b_band = pri;
1221 					mutex_enter(&stp->sd_lock);
1222 					putback(stp, q, bp, pri);
1223 					mutex_exit(&stp->sd_lock);
1224 				} else {
1225 					freemsg(bp);
1226 				}
1227 				error =  0;
1228 				goto oops1;
1229 			}
1230 
1231 			rflg = 1;
1232 			waitflag |= NOINTR;
1233 			bp = struiocopyout(bp, uiop, &error);
1234 			if (error != 0)
1235 				goto oops1;
1236 
1237 			mutex_enter(&stp->sd_lock);
1238 			if (bp) {
1239 				/*
1240 				 * Have remaining data in message.
1241 				 * Free msg if in discard mode.
1242 				 */
1243 				if (stp->sd_read_opt & RD_MSGDIS) {
1244 					freemsg(bp);
1245 				} else {
1246 					bp->b_band = pri;
1247 					if ((mark & _LASTMARK) &&
1248 					    (stp->sd_mark == NULL))
1249 						stp->sd_mark = bp;
1250 					bp->b_flag |= mark & ~_LASTMARK;
1251 					if (delim)
1252 						bp->b_flag |= MSGDELIM;
1253 					if (msgnodata(bp))
1254 						freemsg(bp);
1255 					else
1256 						putback(stp, q, bp, pri);
1257 				}
1258 			} else {
1259 				/*
1260 				 * Consumed the complete message.
1261 				 * Move the MSG*MARKNEXT information
1262 				 * to the stream head just in case
1263 				 * the read queue becomes empty.
1264 				 *
1265 				 * If the stream head was at the mark
1266 				 * (STRATMARK) before we dropped sd_lock above
1267 				 * and some data was consumed then we have
1268 				 * moved past the mark thus STRATMARK is
1269 				 * cleared. However, if a message arrived in
1270 				 * strrput during the copyout above causing
1271 				 * STRATMARK to be set we can not clear that
1272 				 * flag.
1273 				 */
1274 				if (mark &
1275 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1276 					if (mark & MSGMARKNEXT) {
1277 						stp->sd_flag &= ~STRNOTATMARK;
1278 						stp->sd_flag |= STRATMARK;
1279 					} else if (mark & MSGNOTMARKNEXT) {
1280 						stp->sd_flag &= ~STRATMARK;
1281 						stp->sd_flag |= STRNOTATMARK;
1282 					} else {
1283 						stp->sd_flag &=
1284 						    ~(STRATMARK|STRNOTATMARK);
1285 					}
1286 				} else if (rflg && (old_sd_flag & STRATMARK)) {
1287 					stp->sd_flag &= ~STRATMARK;
1288 				}
1289 			}
1290 
1291 			/*
1292 			 * Check for signal messages at the front of the read
1293 			 * queue and generate the signal(s) if appropriate.
1294 			 * The only signal that can be on queue is M_SIG at
1295 			 * this point.
1296 			 */
1297 			while ((((bp = q->q_first)) != NULL) &&
1298 				(bp->b_datap->db_type == M_SIG)) {
1299 				bp = getq_noenab(q);
1300 				/*
1301 				 * sd_lock is held so the content of the
1302 				 * read queue can not change.
1303 				 */
1304 				ASSERT(bp != NULL &&
1305 					bp->b_datap->db_type == M_SIG);
1306 				strsignal_nolock(stp, *bp->b_rptr,
1307 					(int32_t)bp->b_band);
1308 				mutex_exit(&stp->sd_lock);
1309 				freemsg(bp);
1310 				if (STREAM_NEEDSERVICE(stp))
1311 					stream_runservice(stp);
1312 				mutex_enter(&stp->sd_lock);
1313 			}
1314 
1315 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1316 			    delim ||
1317 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1318 				goto oops;
1319 			}
1320 			continue;
1321 
1322 		case M_SIG:
1323 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1324 			freemsg(bp);
1325 			mutex_enter(&stp->sd_lock);
1326 			continue;
1327 
1328 		case M_PROTO:
1329 		case M_PCPROTO:
1330 			/*
1331 			 * Only data messages are readable.
1332 			 * Any others generate an error, unless
1333 			 * RD_PROTDIS or RD_PROTDAT is set.
1334 			 */
1335 			if (stp->sd_read_opt & RD_PROTDAT) {
1336 				for (nbp = bp; nbp; nbp = nbp->b_next) {
1337 				    if ((nbp->b_datap->db_type == M_PROTO) ||
1338 					(nbp->b_datap->db_type == M_PCPROTO))
1339 					nbp->b_datap->db_type = M_DATA;
1340 				    else
1341 					break;
1342 				}
1343 				/*
1344 				 * clear stream head hi pri flag based on
1345 				 * first message
1346 				 */
1347 				if (type == M_PCPROTO) {
1348 					mutex_enter(&stp->sd_lock);
1349 					stp->sd_flag &= ~STRPRI;
1350 					mutex_exit(&stp->sd_lock);
1351 				}
1352 				goto ismdata;
1353 			} else if (stp->sd_read_opt & RD_PROTDIS) {
1354 				/*
1355 				 * discard non-data messages
1356 				 */
1357 				while (bp &&
1358 				    ((bp->b_datap->db_type == M_PROTO) ||
1359 				    (bp->b_datap->db_type == M_PCPROTO))) {
1360 					nbp = unlinkb(bp);
1361 					freeb(bp);
1362 					bp = nbp;
1363 				}
1364 				/*
1365 				 * clear stream head hi pri flag based on
1366 				 * first message
1367 				 */
1368 				if (type == M_PCPROTO) {
1369 					mutex_enter(&stp->sd_lock);
1370 					stp->sd_flag &= ~STRPRI;
1371 					mutex_exit(&stp->sd_lock);
1372 				}
1373 				if (bp) {
1374 					bp->b_band = pri;
1375 					goto ismdata;
1376 				} else {
1377 					break;
1378 				}
1379 			}
1380 			/* FALLTHRU */
1381 		case M_PASSFP:
1382 			if ((bp->b_datap->db_type == M_PASSFP) &&
1383 			    (stp->sd_read_opt & RD_PROTDIS)) {
1384 				freemsg(bp);
1385 				break;
1386 			}
1387 			mutex_enter(&stp->sd_lock);
1388 			putback(stp, q, bp, pri);
1389 			mutex_exit(&stp->sd_lock);
1390 			if (rflg == 0)
1391 				error = EBADMSG;
1392 			goto oops1;
1393 
1394 		default:
1395 			/*
1396 			 * Garbage on stream head read queue.
1397 			 */
1398 			cmn_err(CE_WARN, "bad %x found at stream head\n",
1399 				bp->b_datap->db_type);
1400 			freemsg(bp);
1401 			goto oops1;
1402 		}
1403 		mutex_enter(&stp->sd_lock);
1404 	}
1405 oops:
1406 	mutex_exit(&stp->sd_lock);
1407 oops1:
1408 	qbackenable(q, pri);
1409 	return (error);
1410 #undef	_LASTMARK
1411 }
1412 
1413 /*
1414  * Default processing of M_PROTO/M_PCPROTO messages.
1415  * Determine which wakeups and signals are needed.
1416  * This can be replaced by a user-specified procedure for kernel users
1417  * of STREAMS.
1418  */
1419 /* ARGSUSED */
1420 mblk_t *
1421 strrput_proto(vnode_t *vp, mblk_t *mp,
1422     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1423     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1424 {
1425 	*wakeups = RSLEEP;
1426 	*allmsgsigs = 0;
1427 
1428 	switch (mp->b_datap->db_type) {
1429 	case M_PROTO:
1430 		if (mp->b_band == 0) {
1431 			*firstmsgsigs = S_INPUT | S_RDNORM;
1432 			*pollwakeups = POLLIN | POLLRDNORM;
1433 		} else {
1434 			*firstmsgsigs = S_INPUT | S_RDBAND;
1435 			*pollwakeups = POLLIN | POLLRDBAND;
1436 		}
1437 		break;
1438 	case M_PCPROTO:
1439 		*firstmsgsigs = S_HIPRI;
1440 		*pollwakeups = POLLPRI;
1441 		break;
1442 	}
1443 	return (mp);
1444 }
1445 
1446 /*
1447  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1448  * M_PASSFP messages.
1449  * Determine which wakeups and signals are needed.
1450  * This can be replaced by a user-specified procedure for kernel users
1451  * of STREAMS.
1452  */
1453 /* ARGSUSED */
1454 mblk_t *
1455 strrput_misc(vnode_t *vp, mblk_t *mp,
1456     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1457     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1458 {
1459 	*wakeups = 0;
1460 	*firstmsgsigs = 0;
1461 	*allmsgsigs = 0;
1462 	*pollwakeups = 0;
1463 	return (mp);
1464 }
1465 
1466 /*
1467  * Stream read put procedure.  Called from downstream driver/module
1468  * with messages for the stream head.  Data, protocol, and in-stream
1469  * signal messages are placed on the queue, others are handled directly.
1470  */
1471 int
1472 strrput(queue_t *q, mblk_t *bp)
1473 {
1474 	struct stdata	*stp;
1475 	ulong_t		rput_opt;
1476 	strwakeup_t	wakeups;
1477 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
1478 	strsigset_t	allmsgsigs;	/* Signals for all messages */
1479 	strsigset_t	signals;	/* Signals events to generate */
1480 	strpollset_t	pollwakeups;
1481 	mblk_t		*nextbp;
1482 	uchar_t		band = 0;
1483 	int		hipri_sig;
1484 
1485 	stp = (struct stdata *)q->q_ptr;
1486 	/*
1487 	 * Use rput_opt for optimized access to the SR_ flags except
1488 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
1489 	 * is modified by strpoll().
1490 	 */
1491 	rput_opt = stp->sd_rput_opt;
1492 
1493 	ASSERT(qclaimed(q));
1494 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1495 		"strrput called with message type:q %p bp %p", q, bp);
1496 
1497 	/*
1498 	 * Perform initial processing and pass to the parameterized functions.
1499 	 */
1500 	ASSERT(bp->b_next == NULL);
1501 
1502 	switch (bp->b_datap->db_type) {
1503 	case M_DATA:
1504 		/*
1505 		 * sockfs is the only consumer of STREOF and when it is set,
1506 		 * it implies that the receiver is not interested in receiving
1507 		 * any more data, hence the mblk is freed to prevent unnecessary
1508 		 * message queueing at the stream head.
1509 		 */
1510 		if (stp->sd_flag == STREOF) {
1511 			freemsg(bp);
1512 			return (0);
1513 		}
1514 		if ((rput_opt & SR_IGN_ZEROLEN) &&
1515 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1516 			/*
1517 			 * Ignore zero-length M_DATA messages. These might be
1518 			 * generated by some transports.
1519 			 * The zero-length M_DATA messages, even if they
1520 			 * are ignored, should effect the atmark tracking and
1521 			 * should wake up a thread sleeping in strwaitmark.
1522 			 */
1523 			mutex_enter(&stp->sd_lock);
1524 			if (bp->b_flag & MSGMARKNEXT) {
1525 				/*
1526 				 * Record the position of the mark either
1527 				 * in q_last or in STRATMARK.
1528 				 */
1529 				if (q->q_last != NULL) {
1530 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1531 					q->q_last->b_flag |= MSGMARKNEXT;
1532 				} else {
1533 					stp->sd_flag &= ~STRNOTATMARK;
1534 					stp->sd_flag |= STRATMARK;
1535 				}
1536 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
1537 				/*
1538 				 * Record that this is not the position of
1539 				 * the mark either in q_last or in
1540 				 * STRNOTATMARK.
1541 				 */
1542 				if (q->q_last != NULL) {
1543 					q->q_last->b_flag &= ~MSGMARKNEXT;
1544 					q->q_last->b_flag |= MSGNOTMARKNEXT;
1545 				} else {
1546 					stp->sd_flag &= ~STRATMARK;
1547 					stp->sd_flag |= STRNOTATMARK;
1548 				}
1549 			}
1550 			if (stp->sd_flag & RSLEEP) {
1551 				stp->sd_flag &= ~RSLEEP;
1552 				cv_broadcast(&q->q_wait);
1553 			}
1554 			mutex_exit(&stp->sd_lock);
1555 			freemsg(bp);
1556 			return (0);
1557 		}
1558 		wakeups = RSLEEP;
1559 		if (bp->b_band == 0) {
1560 			firstmsgsigs = S_INPUT | S_RDNORM;
1561 			pollwakeups = POLLIN | POLLRDNORM;
1562 		} else {
1563 			firstmsgsigs = S_INPUT | S_RDBAND;
1564 			pollwakeups = POLLIN | POLLRDBAND;
1565 		}
1566 		if (rput_opt & SR_SIGALLDATA)
1567 			allmsgsigs = firstmsgsigs;
1568 		else
1569 			allmsgsigs = 0;
1570 
1571 		mutex_enter(&stp->sd_lock);
1572 		if ((rput_opt & SR_CONSOL_DATA) &&
1573 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1574 			/*
1575 			 * Consolidate on M_DATA message onto an M_DATA,
1576 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
1577 			 * The consolidation does not take place if
1578 			 * the old message is marked with either of the
1579 			 * marks or the delim flag or if the new
1580 			 * message is marked with MSGMARK. The MSGMARK
1581 			 * check is needed to handle the odd semantics of
1582 			 * MSGMARK where essentially the whole message
1583 			 * is to be treated as marked.
1584 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1585 			 * new message to the front of the b_cont chain.
1586 			 */
1587 			mblk_t *lbp;
1588 
1589 			lbp = q->q_last;
1590 			if (lbp != NULL &&
1591 			    (lbp->b_datap->db_type == M_DATA ||
1592 			    lbp->b_datap->db_type == M_PROTO ||
1593 			    lbp->b_datap->db_type == M_PCPROTO) &&
1594 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|
1595 			    MSGMARKNEXT))) {
1596 				rmvq_noenab(q, lbp);
1597 				/*
1598 				 * The first message in the b_cont list
1599 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1600 				 * We need to handle the case where we
1601 				 * are appending
1602 				 *
1603 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1604 				 * 2) a MSGMARKNEXT to a plain message.
1605 				 * 3) a MSGNOTMARKNEXT to a plain message
1606 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1607 				 *    message.
1608 				 *
1609 				 * Thus we never append a MSGMARKNEXT or
1610 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1611 				 */
1612 				if (bp->b_flag & MSGMARKNEXT) {
1613 					lbp->b_flag |= MSGMARKNEXT;
1614 					lbp->b_flag &= ~MSGNOTMARKNEXT;
1615 					bp->b_flag &= ~MSGMARKNEXT;
1616 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
1617 					lbp->b_flag |= MSGNOTMARKNEXT;
1618 					bp->b_flag &= ~MSGNOTMARKNEXT;
1619 				}
1620 
1621 				linkb(lbp, bp);
1622 				bp = lbp;
1623 				/*
1624 				 * The new message logically isn't the first
1625 				 * even though the q_first check below thinks
1626 				 * it is. Clear the firstmsgsigs to make it
1627 				 * not appear to be first.
1628 				 */
1629 				firstmsgsigs = 0;
1630 			}
1631 		}
1632 		break;
1633 
1634 	case M_PASSFP:
1635 		wakeups = RSLEEP;
1636 		allmsgsigs = 0;
1637 		if (bp->b_band == 0) {
1638 			firstmsgsigs = S_INPUT | S_RDNORM;
1639 			pollwakeups = POLLIN | POLLRDNORM;
1640 		} else {
1641 			firstmsgsigs = S_INPUT | S_RDBAND;
1642 			pollwakeups = POLLIN | POLLRDBAND;
1643 		}
1644 		mutex_enter(&stp->sd_lock);
1645 		break;
1646 
1647 	case M_PROTO:
1648 	case M_PCPROTO:
1649 		ASSERT(stp->sd_rprotofunc != NULL);
1650 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1651 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1652 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1653 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1654 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1655 		POLLWRBAND)
1656 
1657 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1658 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1659 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1660 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1661 
1662 		mutex_enter(&stp->sd_lock);
1663 		break;
1664 
1665 	default:
1666 		ASSERT(stp->sd_rmiscfunc != NULL);
1667 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1668 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1669 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1670 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1671 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1672 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1673 #undef	ALLSIG
1674 #undef	ALLPOLL
1675 		mutex_enter(&stp->sd_lock);
1676 		break;
1677 	}
1678 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1679 
1680 	/* By default generate superset of signals */
1681 	signals = (firstmsgsigs | allmsgsigs);
1682 
1683 	/*
1684 	 * The  proto and misc functions can return multiple messages
1685 	 * as a b_next chain. Such messages are processed separately.
1686 	 */
1687 one_more:
1688 	hipri_sig = 0;
1689 	if (bp == NULL) {
1690 		nextbp = NULL;
1691 	} else {
1692 		nextbp = bp->b_next;
1693 		bp->b_next = NULL;
1694 
1695 		switch (bp->b_datap->db_type) {
1696 		case M_PCPROTO:
1697 			/*
1698 			 * Only one priority protocol message is allowed at the
1699 			 * stream head at a time.
1700 			 */
1701 			if (stp->sd_flag & STRPRI) {
1702 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1703 				    "M_PCPROTO already at head");
1704 				freemsg(bp);
1705 				mutex_exit(&stp->sd_lock);
1706 				goto done;
1707 			}
1708 			stp->sd_flag |= STRPRI;
1709 			hipri_sig = 1;
1710 			/* FALLTHRU */
1711 		case M_DATA:
1712 		case M_PROTO:
1713 		case M_PASSFP:
1714 			band = bp->b_band;
1715 			/*
1716 			 * Marking doesn't work well when messages
1717 			 * are marked in more than one band.  We only
1718 			 * remember the last message received, even if
1719 			 * it is placed on the queue ahead of other
1720 			 * marked messages.
1721 			 */
1722 			if (bp->b_flag & MSGMARK)
1723 				stp->sd_mark = bp;
1724 			(void) putq(q, bp);
1725 
1726 			/*
1727 			 * If message is a PCPROTO message, always use
1728 			 * firstmsgsigs to determine if a signal should be
1729 			 * sent as strrput is the only place to send
1730 			 * signals for PCPROTO. Other messages are based on
1731 			 * the STRGETINPROG flag. The flag determines if
1732 			 * strrput or (k)strgetmsg will be responsible for
1733 			 * sending the signals, in the firstmsgsigs case.
1734 			 */
1735 			if ((hipri_sig == 1) ||
1736 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
1737 			    (q->q_first == bp)))
1738 				signals = (firstmsgsigs | allmsgsigs);
1739 			else
1740 				signals = allmsgsigs;
1741 			break;
1742 
1743 		default:
1744 			mutex_exit(&stp->sd_lock);
1745 			(void) strrput_nondata(q, bp);
1746 			mutex_enter(&stp->sd_lock);
1747 			break;
1748 		}
1749 	}
1750 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1751 	/*
1752 	 * Wake sleeping read/getmsg and cancel deferred wakeup
1753 	 */
1754 	if (wakeups & RSLEEP)
1755 		stp->sd_wakeq &= ~RSLEEP;
1756 
1757 	wakeups &= stp->sd_flag;
1758 	if (wakeups & RSLEEP) {
1759 		stp->sd_flag &= ~RSLEEP;
1760 		cv_broadcast(&q->q_wait);
1761 	}
1762 	if (wakeups & WSLEEP) {
1763 		stp->sd_flag &= ~WSLEEP;
1764 		cv_broadcast(&_WR(q)->q_wait);
1765 	}
1766 
1767 	if (pollwakeups != 0) {
1768 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
1769 			/*
1770 			 * Can't use rput_opt since it was not
1771 			 * read when sd_lock was held and SR_POLLIN is changed
1772 			 * by strpoll() under sd_lock.
1773 			 */
1774 			if (!(stp->sd_rput_opt & SR_POLLIN))
1775 				goto no_pollwake;
1776 			stp->sd_rput_opt &= ~SR_POLLIN;
1777 		}
1778 		mutex_exit(&stp->sd_lock);
1779 		pollwakeup(&stp->sd_pollist, pollwakeups);
1780 		mutex_enter(&stp->sd_lock);
1781 	}
1782 no_pollwake:
1783 
1784 	/*
1785 	 * strsendsig can handle multiple signals with a
1786 	 * single call.
1787 	 */
1788 	if (stp->sd_sigflags & signals)
1789 		strsendsig(stp->sd_siglist, signals, band, 0);
1790 	mutex_exit(&stp->sd_lock);
1791 
1792 
1793 done:
1794 	if (nextbp == NULL)
1795 		return (0);
1796 
1797 	/*
1798 	 * Any signals were handled the first time.
1799 	 * Wakeups and pollwakeups are redone to avoid any race
1800 	 * conditions - all the messages are not queued until the
1801 	 * last message has been processed by strrput.
1802 	 */
1803 	bp = nextbp;
1804 	signals = firstmsgsigs = allmsgsigs = 0;
1805 	mutex_enter(&stp->sd_lock);
1806 	goto one_more;
1807 }
1808 
1809 static void
1810 log_dupioc(queue_t *rq, mblk_t *bp)
1811 {
1812 	queue_t *wq, *qp;
1813 	char *modnames, *mnp, *dname;
1814 	size_t maxmodstr;
1815 	boolean_t islast;
1816 
1817 	/*
1818 	 * Allocate a buffer large enough to hold the names of nstrpush modules
1819 	 * and one driver, with spaces between and NUL terminator.  If we can't
1820 	 * get memory, then we'll just log the driver name.
1821 	 */
1822 	maxmodstr = nstrpush * (FMNAMESZ + 1);
1823 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1824 
1825 	/* march down write side to print log message down to the driver */
1826 	wq = WR(rq);
1827 
1828 	/* make sure q_next doesn't shift around while we're grabbing data */
1829 	claimstr(wq);
1830 	qp = wq->q_next;
1831 	do {
1832 		if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL)
1833 			dname = "?";
1834 		islast = !SAMESTR(qp) || qp->q_next == NULL;
1835 		if (modnames == NULL) {
1836 			/*
1837 			 * If we don't have memory, then get the driver name in
1838 			 * the log where we can see it.  Note that memory
1839 			 * pressure is a possible cause of these sorts of bugs.
1840 			 */
1841 			if (islast) {
1842 				modnames = dname;
1843 				maxmodstr = 0;
1844 			}
1845 		} else {
1846 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1847 			if (!islast)
1848 				*mnp++ = ' ';
1849 		}
1850 		qp = qp->q_next;
1851 	} while (!islast);
1852 	releasestr(wq);
1853 	/* Cannot happen unless stream head is corrupt. */
1854 	ASSERT(modnames != NULL);
1855 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1856 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
1857 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1858 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1859 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1860 	if (maxmodstr != 0)
1861 		kmem_free(modnames, maxmodstr);
1862 }
1863 
1864 int
1865 strrput_nondata(queue_t *q, mblk_t *bp)
1866 {
1867 	struct stdata *stp;
1868 	struct iocblk *iocbp;
1869 	struct stroptions *sop;
1870 	struct copyreq *reqp;
1871 	struct copyresp *resp;
1872 	unsigned char bpri;
1873 	unsigned char  flushed_already = 0;
1874 
1875 	stp = (struct stdata *)q->q_ptr;
1876 
1877 	ASSERT(!(stp->sd_flag & STPLEX));
1878 	ASSERT(qclaimed(q));
1879 
1880 	switch (bp->b_datap->db_type) {
1881 	case M_ERROR:
1882 		/*
1883 		 * An error has occurred downstream, the errno is in the first
1884 		 * bytes of the message.
1885 		 */
1886 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
1887 			unsigned char rw = 0;
1888 
1889 			mutex_enter(&stp->sd_lock);
1890 			if (*bp->b_rptr != NOERROR) {	/* read error */
1891 				if (*bp->b_rptr != 0) {
1892 					if (stp->sd_flag & STRDERR)
1893 						flushed_already |= FLUSHR;
1894 					stp->sd_flag |= STRDERR;
1895 					rw |= FLUSHR;
1896 				} else {
1897 					stp->sd_flag &= ~STRDERR;
1898 				}
1899 				stp->sd_rerror = *bp->b_rptr;
1900 			}
1901 			bp->b_rptr++;
1902 			if (*bp->b_rptr != NOERROR) {	/* write error */
1903 				if (*bp->b_rptr != 0) {
1904 					if (stp->sd_flag & STWRERR)
1905 						flushed_already |= FLUSHW;
1906 					stp->sd_flag |= STWRERR;
1907 					rw |= FLUSHW;
1908 				} else {
1909 					stp->sd_flag &= ~STWRERR;
1910 				}
1911 				stp->sd_werror = *bp->b_rptr;
1912 			}
1913 			if (rw) {
1914 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
1915 					"strrput cv_broadcast:q %p, bp %p",
1916 					q, bp);
1917 				cv_broadcast(&q->q_wait); /* readers */
1918 				cv_broadcast(&_WR(q)->q_wait); /* writers */
1919 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1920 
1921 				mutex_exit(&stp->sd_lock);
1922 				pollwakeup(&stp->sd_pollist, POLLERR);
1923 				mutex_enter(&stp->sd_lock);
1924 
1925 				if (stp->sd_sigflags & S_ERROR)
1926 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1927 					    ((rw & FLUSHR) ? stp->sd_rerror :
1928 					    stp->sd_werror));
1929 				mutex_exit(&stp->sd_lock);
1930 				/*
1931 				 * Send the M_FLUSH only
1932 				 * for the first M_ERROR
1933 				 * message on the stream
1934 				 */
1935 				if (flushed_already == rw) {
1936 					freemsg(bp);
1937 					return (0);
1938 				}
1939 
1940 				bp->b_datap->db_type = M_FLUSH;
1941 				*bp->b_rptr = rw;
1942 				bp->b_wptr = bp->b_rptr + 1;
1943 				/*
1944 				 * Protect against the driver
1945 				 * passing up messages after
1946 				 * it has done a qprocsoff
1947 				 */
1948 				if (_OTHERQ(q)->q_next == NULL)
1949 					freemsg(bp);
1950 				else
1951 					qreply(q, bp);
1952 				return (0);
1953 			} else
1954 				mutex_exit(&stp->sd_lock);
1955 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
1956 				if (stp->sd_flag & (STRDERR|STWRERR))
1957 					flushed_already = FLUSHRW;
1958 				mutex_enter(&stp->sd_lock);
1959 				stp->sd_flag |= (STRDERR|STWRERR);
1960 				stp->sd_rerror = *bp->b_rptr;
1961 				stp->sd_werror = *bp->b_rptr;
1962 				TRACE_2(TR_FAC_STREAMS_FR,
1963 					TR_STRRPUT_WAKE2,
1964 					"strrput wakeup #2:q %p, bp %p", q, bp);
1965 				cv_broadcast(&q->q_wait); /* the readers */
1966 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
1967 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1968 
1969 				mutex_exit(&stp->sd_lock);
1970 				pollwakeup(&stp->sd_pollist, POLLERR);
1971 				mutex_enter(&stp->sd_lock);
1972 
1973 				if (stp->sd_sigflags & S_ERROR)
1974 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1975 					    (stp->sd_werror ? stp->sd_werror :
1976 					    stp->sd_rerror));
1977 				mutex_exit(&stp->sd_lock);
1978 
1979 				/*
1980 				 * Send the M_FLUSH only
1981 				 * for the first M_ERROR
1982 				 * message on the stream
1983 				 */
1984 				if (flushed_already != FLUSHRW) {
1985 					bp->b_datap->db_type = M_FLUSH;
1986 					*bp->b_rptr = FLUSHRW;
1987 					/*
1988 					 * Protect against the driver passing up
1989 					 * messages after it has done a
1990 					 * qprocsoff.
1991 					 */
1992 				if (_OTHERQ(q)->q_next == NULL)
1993 					freemsg(bp);
1994 				else
1995 					qreply(q, bp);
1996 				return (0);
1997 				}
1998 		}
1999 		freemsg(bp);
2000 		return (0);
2001 
2002 	case M_HANGUP:
2003 
2004 		freemsg(bp);
2005 		mutex_enter(&stp->sd_lock);
2006 		stp->sd_werror = ENXIO;
2007 		stp->sd_flag |= STRHUP;
2008 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
2009 
2010 		/*
2011 		 * send signal if controlling tty
2012 		 */
2013 
2014 		if (stp->sd_sidp) {
2015 			prsignal(stp->sd_sidp, SIGHUP);
2016 			if (stp->sd_sidp != stp->sd_pgidp)
2017 				pgsignal(stp->sd_pgidp, SIGTSTP);
2018 		}
2019 
2020 		/*
2021 		 * wake up read, write, and exception pollers and
2022 		 * reset wakeup mechanism.
2023 		 */
2024 		cv_broadcast(&q->q_wait);	/* the readers */
2025 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
2026 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
2027 		mutex_exit(&stp->sd_lock);
2028 		strhup(stp);
2029 		return (0);
2030 
2031 	case M_UNHANGUP:
2032 		freemsg(bp);
2033 		mutex_enter(&stp->sd_lock);
2034 		stp->sd_werror = 0;
2035 		stp->sd_flag &= ~STRHUP;
2036 		mutex_exit(&stp->sd_lock);
2037 		return (0);
2038 
2039 	case M_SIG:
2040 		/*
2041 		 * Someone downstream wants to post a signal.  The
2042 		 * signal to post is contained in the first byte of the
2043 		 * message.  If the message would go on the front of
2044 		 * the queue, send a signal to the process group
2045 		 * (if not SIGPOLL) or to the siglist processes
2046 		 * (SIGPOLL).  If something is already on the queue,
2047 		 * OR if we are delivering a delayed suspend (*sigh*
2048 		 * another "tty" hack) and there's no one sleeping already,
2049 		 * just enqueue the message.
2050 		 */
2051 		mutex_enter(&stp->sd_lock);
2052 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2053 		    !(stp->sd_flag & RSLEEP))) {
2054 			(void) putq(q, bp);
2055 			mutex_exit(&stp->sd_lock);
2056 			return (0);
2057 		}
2058 		mutex_exit(&stp->sd_lock);
2059 		/* FALLTHRU */
2060 
2061 	case M_PCSIG:
2062 		/*
2063 		 * Don't enqueue, just post the signal.
2064 		 */
2065 		strsignal(stp, *bp->b_rptr, 0L);
2066 		freemsg(bp);
2067 		return (0);
2068 
2069 	case M_FLUSH:
2070 		/*
2071 		 * Flush queues.  The indication of which queues to flush
2072 		 * is in the first byte of the message.  If the read queue
2073 		 * is specified, then flush it.  If FLUSHBAND is set, just
2074 		 * flush the band specified by the second byte of the message.
2075 		 *
2076 		 * If a module has issued a M_SETOPT to not flush hi
2077 		 * priority messages off of the stream head, then pass this
2078 		 * flag into the flushq code to preserve such messages.
2079 		 */
2080 
2081 		if (*bp->b_rptr & FLUSHR) {
2082 			mutex_enter(&stp->sd_lock);
2083 			if (*bp->b_rptr & FLUSHBAND) {
2084 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2085 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2086 			} else
2087 				flushq_common(q, FLUSHALL,
2088 				    stp->sd_read_opt & RFLUSHPCPROT);
2089 			if ((q->q_first == NULL) ||
2090 			    (q->q_first->b_datap->db_type < QPCTL))
2091 				stp->sd_flag &= ~STRPRI;
2092 			else {
2093 				ASSERT(stp->sd_flag & STRPRI);
2094 			}
2095 			mutex_exit(&stp->sd_lock);
2096 		}
2097 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2098 			*bp->b_rptr &= ~FLUSHR;
2099 			bp->b_flag |= MSGNOLOOP;
2100 			/*
2101 			 * Protect against the driver passing up
2102 			 * messages after it has done a qprocsoff.
2103 			 */
2104 			if (_OTHERQ(q)->q_next == NULL)
2105 				freemsg(bp);
2106 			else
2107 				qreply(q, bp);
2108 			return (0);
2109 		}
2110 		freemsg(bp);
2111 		return (0);
2112 
2113 	case M_IOCACK:
2114 	case M_IOCNAK:
2115 		iocbp = (struct iocblk *)bp->b_rptr;
2116 		/*
2117 		 * If not waiting for ACK or NAK then just free msg.
2118 		 * If incorrect id sequence number then just free msg.
2119 		 * If already have ACK or NAK for user then this is a
2120 		 *    duplicate, display a warning and free the msg.
2121 		 */
2122 		mutex_enter(&stp->sd_lock);
2123 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2124 		    (stp->sd_iocid != iocbp->ioc_id)) {
2125 			/*
2126 			 * If the ACK/NAK is a dup, display a message
2127 			 * Dup is when sd_iocid == ioc_id, and
2128 			 * sd_iocblk == <valid ptr> or -1 (the former
2129 			 * is when an ioctl has been put on the stream
2130 			 * head, but has not yet been consumed, the
2131 			 * later is when it has been consumed).
2132 			 */
2133 			if ((stp->sd_iocid == iocbp->ioc_id) &&
2134 			    (stp->sd_iocblk != NULL)) {
2135 				log_dupioc(q, bp);
2136 			}
2137 			freemsg(bp);
2138 			mutex_exit(&stp->sd_lock);
2139 			return (0);
2140 		}
2141 
2142 		/*
2143 		 * Assign ACK or NAK to user and wake up.
2144 		 */
2145 		stp->sd_iocblk = bp;
2146 		cv_broadcast(&stp->sd_monitor);
2147 		mutex_exit(&stp->sd_lock);
2148 		return (0);
2149 
2150 	case M_COPYIN:
2151 	case M_COPYOUT:
2152 		reqp = (struct copyreq *)bp->b_rptr;
2153 
2154 		/*
2155 		 * If not waiting for ACK or NAK then just fail request.
2156 		 * If already have ACK, NAK, or copy request, then just
2157 		 * fail request.
2158 		 * If incorrect id sequence number then just fail request.
2159 		 */
2160 		mutex_enter(&stp->sd_lock);
2161 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2162 		    (stp->sd_iocid != reqp->cq_id)) {
2163 			if (bp->b_cont) {
2164 				freemsg(bp->b_cont);
2165 				bp->b_cont = NULL;
2166 			}
2167 			bp->b_datap->db_type = M_IOCDATA;
2168 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2169 			resp = (struct copyresp *)bp->b_rptr;
2170 			resp->cp_rval = (caddr_t)1;	/* failure */
2171 			mutex_exit(&stp->sd_lock);
2172 			putnext(stp->sd_wrq, bp);
2173 			return (0);
2174 		}
2175 
2176 		/*
2177 		 * Assign copy request to user and wake up.
2178 		 */
2179 		stp->sd_iocblk = bp;
2180 		cv_broadcast(&stp->sd_monitor);
2181 		mutex_exit(&stp->sd_lock);
2182 		return (0);
2183 
2184 	case M_SETOPTS:
2185 		/*
2186 		 * Set stream head options (read option, write offset,
2187 		 * min/max packet size, and/or high/low water marks for
2188 		 * the read side only).
2189 		 */
2190 
2191 		bpri = 0;
2192 		sop = (struct stroptions *)bp->b_rptr;
2193 		mutex_enter(&stp->sd_lock);
2194 		if (sop->so_flags & SO_READOPT) {
2195 			switch (sop->so_readopt & RMODEMASK) {
2196 			case RNORM:
2197 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2198 				break;
2199 
2200 			case RMSGD:
2201 				stp->sd_read_opt =
2202 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
2203 				    RD_MSGDIS);
2204 				break;
2205 
2206 			case RMSGN:
2207 				stp->sd_read_opt =
2208 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
2209 				    RD_MSGNODIS);
2210 				break;
2211 			}
2212 			switch (sop->so_readopt & RPROTMASK) {
2213 			case RPROTNORM:
2214 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2215 				break;
2216 
2217 			case RPROTDAT:
2218 				stp->sd_read_opt =
2219 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
2220 				    RD_PROTDAT);
2221 				break;
2222 
2223 			case RPROTDIS:
2224 				stp->sd_read_opt =
2225 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
2226 				    RD_PROTDIS);
2227 				break;
2228 			}
2229 			switch (sop->so_readopt & RFLUSHMASK) {
2230 			case RFLUSHPCPROT:
2231 				/*
2232 				 * This sets the stream head to NOT flush
2233 				 * M_PCPROTO messages.
2234 				 */
2235 				stp->sd_read_opt |= RFLUSHPCPROT;
2236 				break;
2237 			}
2238 		}
2239 		if (sop->so_flags & SO_ERROPT) {
2240 			switch (sop->so_erropt & RERRMASK) {
2241 			case RERRNORM:
2242 				stp->sd_flag &= ~STRDERRNONPERSIST;
2243 				break;
2244 			case RERRNONPERSIST:
2245 				stp->sd_flag |= STRDERRNONPERSIST;
2246 				break;
2247 			}
2248 			switch (sop->so_erropt & WERRMASK) {
2249 			case WERRNORM:
2250 				stp->sd_flag &= ~STWRERRNONPERSIST;
2251 				break;
2252 			case WERRNONPERSIST:
2253 				stp->sd_flag |= STWRERRNONPERSIST;
2254 				break;
2255 			}
2256 		}
2257 		if (sop->so_flags & SO_COPYOPT) {
2258 			if (sop->so_copyopt & ZCVMSAFE) {
2259 				stp->sd_copyflag |= STZCVMSAFE;
2260 				stp->sd_copyflag &= ~STZCVMUNSAFE;
2261 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
2262 				stp->sd_copyflag |= STZCVMUNSAFE;
2263 				stp->sd_copyflag &= ~STZCVMSAFE;
2264 			}
2265 
2266 			if (sop->so_copyopt & COPYCACHED) {
2267 				stp->sd_copyflag |= STRCOPYCACHED;
2268 			}
2269 		}
2270 		if (sop->so_flags & SO_WROFF)
2271 			stp->sd_wroff = sop->so_wroff;
2272 		if (sop->so_flags & SO_MINPSZ)
2273 			q->q_minpsz = sop->so_minpsz;
2274 		if (sop->so_flags & SO_MAXPSZ)
2275 			q->q_maxpsz = sop->so_maxpsz;
2276 		if (sop->so_flags & SO_MAXBLK)
2277 			stp->sd_maxblk = sop->so_maxblk;
2278 		if (sop->so_flags & SO_HIWAT) {
2279 		    if (sop->so_flags & SO_BAND) {
2280 			if (strqset(q, QHIWAT, sop->so_band, sop->so_hiwat))
2281 				cmn_err(CE_WARN,
2282 				    "strrput: could not allocate qband\n");
2283 			else
2284 				bpri = sop->so_band;
2285 		    } else {
2286 			q->q_hiwat = sop->so_hiwat;
2287 		    }
2288 		}
2289 		if (sop->so_flags & SO_LOWAT) {
2290 		    if (sop->so_flags & SO_BAND) {
2291 			if (strqset(q, QLOWAT, sop->so_band, sop->so_lowat))
2292 				cmn_err(CE_WARN,
2293 				    "strrput: could not allocate qband\n");
2294 			else
2295 				bpri = sop->so_band;
2296 		    } else {
2297 			q->q_lowat = sop->so_lowat;
2298 		    }
2299 		}
2300 		if (sop->so_flags & SO_MREADON)
2301 			stp->sd_flag |= SNDMREAD;
2302 		if (sop->so_flags & SO_MREADOFF)
2303 			stp->sd_flag &= ~SNDMREAD;
2304 		if (sop->so_flags & SO_NDELON)
2305 			stp->sd_flag |= OLDNDELAY;
2306 		if (sop->so_flags & SO_NDELOFF)
2307 			stp->sd_flag &= ~OLDNDELAY;
2308 		if (sop->so_flags & SO_ISTTY)
2309 			stp->sd_flag |= STRISTTY;
2310 		if (sop->so_flags & SO_ISNTTY)
2311 			stp->sd_flag &= ~STRISTTY;
2312 		if (sop->so_flags & SO_TOSTOP)
2313 			stp->sd_flag |= STRTOSTOP;
2314 		if (sop->so_flags & SO_TONSTOP)
2315 			stp->sd_flag &= ~STRTOSTOP;
2316 		if (sop->so_flags & SO_DELIM)
2317 			stp->sd_flag |= STRDELIM;
2318 		if (sop->so_flags & SO_NODELIM)
2319 			stp->sd_flag &= ~STRDELIM;
2320 
2321 		mutex_exit(&stp->sd_lock);
2322 		freemsg(bp);
2323 
2324 		/* Check backenable in case the water marks changed */
2325 		qbackenable(q, bpri);
2326 		return (0);
2327 
2328 	/*
2329 	 * The following set of cases deal with situations where two stream
2330 	 * heads are connected to each other (twisted streams).  These messages
2331 	 * have no meaning at the stream head.
2332 	 */
2333 	case M_BREAK:
2334 	case M_CTL:
2335 	case M_DELAY:
2336 	case M_START:
2337 	case M_STOP:
2338 	case M_IOCDATA:
2339 	case M_STARTI:
2340 	case M_STOPI:
2341 		freemsg(bp);
2342 		return (0);
2343 
2344 	case M_IOCTL:
2345 		/*
2346 		 * Always NAK this condition
2347 		 * (makes no sense)
2348 		 * If there is one or more threads in the read side
2349 		 * rwnext we have to defer the nacking until that thread
2350 		 * returns (in strget).
2351 		 */
2352 		mutex_enter(&stp->sd_lock);
2353 		if (stp->sd_struiodnak != 0) {
2354 			/*
2355 			 * Defer NAK to the streamhead. Queue at the end
2356 			 * the list.
2357 			 */
2358 			mblk_t *mp = stp->sd_struionak;
2359 
2360 			while (mp && mp->b_next)
2361 				mp = mp->b_next;
2362 			if (mp)
2363 				mp->b_next = bp;
2364 			else
2365 				stp->sd_struionak = bp;
2366 			bp->b_next = NULL;
2367 			mutex_exit(&stp->sd_lock);
2368 			return (0);
2369 		}
2370 		mutex_exit(&stp->sd_lock);
2371 
2372 		bp->b_datap->db_type = M_IOCNAK;
2373 		/*
2374 		 * Protect against the driver passing up
2375 		 * messages after it has done a qprocsoff.
2376 		 */
2377 		if (_OTHERQ(q)->q_next == NULL)
2378 			freemsg(bp);
2379 		else
2380 			qreply(q, bp);
2381 		return (0);
2382 
2383 	default:
2384 #ifdef DEBUG
2385 		cmn_err(CE_WARN,
2386 			"bad message type %x received at stream head\n",
2387 			bp->b_datap->db_type);
2388 #endif
2389 		freemsg(bp);
2390 		return (0);
2391 	}
2392 
2393 	/* NOTREACHED */
2394 }
2395 
2396 /*
2397  * Check if the stream pointed to by `stp' can be written to, and return an
2398  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2399  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2400  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2401  */
2402 static int
2403 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2404 {
2405 	int error;
2406 
2407 	ASSERT(MUTEX_HELD(&stp->sd_lock));
2408 
2409 	/*
2410 	 * For modem support, POSIX states that on writes, EIO should
2411 	 * be returned if the stream has been hung up.
2412 	 */
2413 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2414 		error = EIO;
2415 	else
2416 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2417 
2418 	if (error != 0) {
2419 		if (!(stp->sd_flag & STPLEX) &&
2420 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2421 			tsignal(curthread, SIGPIPE);
2422 			error = EPIPE;
2423 		}
2424 	}
2425 
2426 	return (error);
2427 }
2428 
2429 /*
2430  * Copyin and send data down a stream.
2431  * The caller will allocate and copyin any control part that precedes the
2432  * message and pass than in as mctl.
2433  *
2434  * Caller should *not* hold sd_lock.
2435  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2436  * under sd_lock in order to avoid missing a backenabling wakeup.
2437  *
2438  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2439  *
2440  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2441  * For sync streams we can only ignore flow control by reverting to using
2442  * putnext.
2443  *
2444  * If sd_maxblk is less than *iosize this routine might return without
2445  * transferring all of *iosize. In all cases, on return *iosize will contain
2446  * the amount of data that was transferred.
2447  */
2448 static int
2449 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2450     int b_flag, int pri, int flags)
2451 {
2452 	struiod_t uiod;
2453 	mblk_t *mp;
2454 	queue_t *wqp = stp->sd_wrq;
2455 	int error = 0;
2456 	ssize_t count = *iosize;
2457 	cred_t *cr;
2458 
2459 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2460 
2461 	if (uiop != NULL && count >= 0)
2462 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2463 
2464 	if (!(flags & STRUIO_POSTPONE)) {
2465 		/*
2466 		 * Use regular canputnext, strmakedata, putnext sequence.
2467 		 */
2468 		if (pri == 0) {
2469 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2470 				freemsg(mctl);
2471 				return (EWOULDBLOCK);
2472 			}
2473 		} else {
2474 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2475 				freemsg(mctl);
2476 				return (EWOULDBLOCK);
2477 			}
2478 		}
2479 
2480 		if ((error = strmakedata(iosize, uiop, stp, flags,
2481 					&mp)) != 0) {
2482 			freemsg(mctl);
2483 			/*
2484 			 * need to change return code to ENOMEM
2485 			 * so that this is not confused with
2486 			 * flow control, EAGAIN.
2487 			 */
2488 
2489 			if (error == EAGAIN)
2490 				return (ENOMEM);
2491 			else
2492 				return (error);
2493 		}
2494 		if (mctl != NULL) {
2495 			if (mctl->b_cont == NULL)
2496 				mctl->b_cont = mp;
2497 			else if (mp != NULL)
2498 				linkb(mctl, mp);
2499 			mp = mctl;
2500 			/*
2501 			 * Note that for interrupt thread, the CRED() is
2502 			 * NULL. Don't bother with the pid either.
2503 			 */
2504 			if ((cr = CRED()) != NULL) {
2505 				mblk_setcred(mp, cr);
2506 				DB_CPID(mp) = curproc->p_pid;
2507 			}
2508 		} else if (mp == NULL)
2509 			return (0);
2510 
2511 		mp->b_flag |= b_flag;
2512 		mp->b_band = (uchar_t)pri;
2513 
2514 		if (flags & MSG_IGNFLOW) {
2515 			/*
2516 			 * XXX Hack: Don't get stuck running service
2517 			 * procedures. This is needed for sockfs when
2518 			 * sending the unbind message out of the rput
2519 			 * procedure - we don't want a put procedure
2520 			 * to run service procedures.
2521 			 */
2522 			putnext(wqp, mp);
2523 		} else {
2524 			stream_willservice(stp);
2525 			putnext(wqp, mp);
2526 			stream_runservice(stp);
2527 		}
2528 		return (0);
2529 	}
2530 	/*
2531 	 * Stream supports rwnext() for the write side.
2532 	 */
2533 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2534 		freemsg(mctl);
2535 		/*
2536 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2537 		 */
2538 		return (error == EAGAIN ? ENOMEM : error);
2539 	}
2540 	if (mctl != NULL) {
2541 		if (mctl->b_cont == NULL)
2542 			mctl->b_cont = mp;
2543 		else if (mp != NULL)
2544 			linkb(mctl, mp);
2545 		mp = mctl;
2546 		/*
2547 		 * Note that for interrupt thread, the CRED() is
2548 		 * NULL.  Don't bother with the pid either.
2549 		 */
2550 		if ((cr = CRED()) != NULL) {
2551 			mblk_setcred(mp, cr);
2552 			DB_CPID(mp) = curproc->p_pid;
2553 		}
2554 	} else if (mp == NULL) {
2555 		return (0);
2556 	}
2557 
2558 	mp->b_flag |= b_flag;
2559 	mp->b_band = (uchar_t)pri;
2560 
2561 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2562 		sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2563 	uiod.d_uio.uio_offset = 0;
2564 	uiod.d_mp = mp;
2565 	error = rwnext(wqp, &uiod);
2566 	if (! uiod.d_mp) {
2567 		uioskip(uiop, *iosize);
2568 		return (error);
2569 	}
2570 	ASSERT(mp == uiod.d_mp);
2571 	if (error == EINVAL) {
2572 		/*
2573 		 * The stream plumbing must have changed while
2574 		 * we were away, so just turn off rwnext()s.
2575 		 */
2576 		error = 0;
2577 	} else if (error == EBUSY || error == EWOULDBLOCK) {
2578 		/*
2579 		 * Couldn't enter a perimeter or took a page fault,
2580 		 * so fall-back to putnext().
2581 		 */
2582 		error = 0;
2583 	} else {
2584 		freemsg(mp);
2585 		return (error);
2586 	}
2587 	/* Have to check canput before consuming data from the uio */
2588 	if (pri == 0) {
2589 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2590 			freemsg(mp);
2591 			return (EWOULDBLOCK);
2592 		}
2593 	} else {
2594 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2595 			freemsg(mp);
2596 			return (EWOULDBLOCK);
2597 		}
2598 	}
2599 	ASSERT(mp == uiod.d_mp);
2600 	/* Copyin data from the uio */
2601 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2602 		freemsg(mp);
2603 		return (error);
2604 	}
2605 	uioskip(uiop, *iosize);
2606 	if (flags & MSG_IGNFLOW) {
2607 		/*
2608 		 * XXX Hack: Don't get stuck running service procedures.
2609 		 * This is needed for sockfs when sending the unbind message
2610 		 * out of the rput procedure - we don't want a put procedure
2611 		 * to run service procedures.
2612 		 */
2613 		putnext(wqp, mp);
2614 	} else {
2615 		stream_willservice(stp);
2616 		putnext(wqp, mp);
2617 		stream_runservice(stp);
2618 	}
2619 	return (0);
2620 }
2621 
2622 /*
2623  * Write attempts to break the write request into messages conforming
2624  * with the minimum and maximum packet sizes set downstream.
2625  *
2626  * Write will not block if downstream queue is full and
2627  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2628  *
2629  * A write of zero bytes gets packaged into a zero length message and sent
2630  * downstream like any other message.
2631  *
2632  * If buffers of the requested sizes are not available, the write will
2633  * sleep until the buffers become available.
2634  *
2635  * Write (if specified) will supply a write offset in a message if it
2636  * makes sense. This can be specified by downstream modules as part of
2637  * a M_SETOPTS message.  Write will not supply the write offset if it
2638  * cannot supply any data in a buffer.  In other words, write will never
2639  * send down an empty packet due to a write offset.
2640  */
2641 /* ARGSUSED2 */
2642 int
2643 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2644 {
2645 	struct stdata *stp;
2646 	struct queue *wqp;
2647 	ssize_t rmin, rmax;
2648 	ssize_t iosize;
2649 	char waitflag;
2650 	int tempmode;
2651 	int error = 0;
2652 	int b_flag;
2653 
2654 	ASSERT(vp->v_stream);
2655 	stp = vp->v_stream;
2656 
2657 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
2658 		if ((error = straccess(stp, JCWRITE)) != 0)
2659 			return (error);
2660 
2661 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2662 		mutex_enter(&stp->sd_lock);
2663 		error = strwriteable(stp, B_TRUE, B_TRUE);
2664 		mutex_exit(&stp->sd_lock);
2665 		if (error != 0)
2666 			return (error);
2667 	}
2668 
2669 	wqp = stp->sd_wrq;
2670 
2671 	/* get these values from them cached in the stream head */
2672 	rmin = stp->sd_qn_minpsz;
2673 	rmax = stp->sd_qn_maxpsz;
2674 
2675 	/*
2676 	 * Check the min/max packet size constraints.  If min packet size
2677 	 * is non-zero, the write cannot be split into multiple messages
2678 	 * and still guarantee the size constraints.
2679 	 */
2680 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2681 
2682 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
2683 	if (rmax == 0) {
2684 		return (0);
2685 	}
2686 	if (rmin > 0) {
2687 		if (uiop->uio_resid < rmin) {
2688 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2689 				"strwrite out:q %p out %d error %d",
2690 				wqp, 0, ERANGE);
2691 			return (ERANGE);
2692 		}
2693 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2694 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2695 				"strwrite out:q %p out %d error %d",
2696 				wqp, 1, ERANGE);
2697 			return (ERANGE);
2698 		}
2699 	}
2700 
2701 	/*
2702 	 * Do until count satisfied or error.
2703 	 */
2704 	waitflag = WRITEWAIT;
2705 	if (stp->sd_flag & OLDNDELAY)
2706 		tempmode = uiop->uio_fmode & ~FNDELAY;
2707 	else
2708 		tempmode = uiop->uio_fmode;
2709 
2710 	if (rmax == INFPSZ)
2711 		rmax = uiop->uio_resid;
2712 
2713 	/*
2714 	 * Note that tempmode does not get used in strput/strmakedata
2715 	 * but only in strwaitq. The other routines use uio_fmode
2716 	 * unmodified.
2717 	 */
2718 
2719 	/* LINTED: constant in conditional context */
2720 	while (1) {	/* breaks when uio_resid reaches zero */
2721 		/*
2722 		 * Determine the size of the next message to be
2723 		 * packaged.  May have to break write into several
2724 		 * messages based on max packet size.
2725 		 */
2726 		iosize = MIN(uiop->uio_resid, rmax);
2727 
2728 		/*
2729 		 * Put block downstream when flow control allows it.
2730 		 */
2731 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2732 			b_flag = MSGDELIM;
2733 		else
2734 			b_flag = 0;
2735 
2736 		for (;;) {
2737 			int done = 0;
2738 
2739 			error = strput(stp, NULL, uiop, &iosize, b_flag,
2740 				0, 0);
2741 			if (error == 0)
2742 				break;
2743 			if (error != EWOULDBLOCK)
2744 				goto out;
2745 
2746 			mutex_enter(&stp->sd_lock);
2747 			/*
2748 			 * Check for a missed wakeup.
2749 			 * Needed since strput did not hold sd_lock across
2750 			 * the canputnext.
2751 			 */
2752 			if (canputnext(wqp)) {
2753 				/* Try again */
2754 				mutex_exit(&stp->sd_lock);
2755 				continue;
2756 			}
2757 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2758 				"strwrite wait:q %p wait", wqp);
2759 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2760 			    tempmode, -1, &done)) != 0 || done) {
2761 				mutex_exit(&stp->sd_lock);
2762 				if ((vp->v_type == VFIFO) &&
2763 				    (uiop->uio_fmode & FNDELAY) &&
2764 				    (error == EAGAIN))
2765 					error = 0;
2766 				goto out;
2767 			}
2768 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2769 				"strwrite wake:q %p awakes", wqp);
2770 			mutex_exit(&stp->sd_lock);
2771 			if (stp->sd_sidp != NULL &&
2772 			    stp->sd_vnode->v_type != VFIFO)
2773 				if (error = straccess(stp, JCWRITE))
2774 					goto out;
2775 		}
2776 		waitflag |= NOINTR;
2777 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2778 			"strwrite resid:q %p uiop %p", wqp, uiop);
2779 		if (uiop->uio_resid) {
2780 			/* Recheck for errors - needed for sockets */
2781 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2782 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2783 				mutex_enter(&stp->sd_lock);
2784 				error = strwriteable(stp, B_FALSE, B_TRUE);
2785 				mutex_exit(&stp->sd_lock);
2786 				if (error != 0)
2787 					return (error);
2788 			}
2789 			continue;
2790 		}
2791 		break;
2792 	}
2793 out:
2794 	/*
2795 	 * For historical reasons, applications expect EAGAIN when a data
2796 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2797 	 */
2798 	if (error == ENOMEM)
2799 		error = EAGAIN;
2800 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2801 		"strwrite out:q %p out %d error %d", wqp, 2, error);
2802 	return (error);
2803 }
2804 
2805 /*
2806  * kstrwritemp() has very similar semantics as that of strwrite().
2807  * The main difference is it obtains mblks from the caller and also
2808  * does not do any copy as done in strwrite() from user buffers to
2809  * kernel buffers.
2810  *
2811  *
2812  * Currently, this routine is used by sendfile to send data allocated
2813  * within the kernel without any copying. This interface does not use the
2814  * synchronous stream interface as synch. stream interface implies
2815  * copying.
2816  */
2817 int
2818 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
2819 {
2820 	struct stdata *stp;
2821 	struct queue *wqp;
2822 	char waitflag;
2823 	int tempmode;
2824 	int error;
2825 	int done = 0;
2826 
2827 	ASSERT(vp->v_stream);
2828 	stp = vp->v_stream;
2829 
2830 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2831 		mutex_enter(&stp->sd_lock);
2832 		error = strwriteable(stp, B_FALSE, B_TRUE);
2833 		mutex_exit(&stp->sd_lock);
2834 		if (error != 0)
2835 			return (error);
2836 	}
2837 
2838 	/*
2839 	 * First, check for flow control without grabbing the sd_lock.
2840 	 * If we would block, re-check with the lock. This is similar
2841 	 * to the logic used by strwrite().
2842 	 */
2843 	wqp = stp->sd_wrq;
2844 	if (canputnext(wqp)) {
2845 		putnext(wqp, mp);
2846 		return (0);
2847 	}
2848 
2849 	waitflag = WRITEWAIT;
2850 	if (stp->sd_flag & OLDNDELAY)
2851 		tempmode = fmode & ~FNDELAY;
2852 	else
2853 		tempmode = fmode;
2854 
2855 	mutex_enter(&stp->sd_lock);
2856 	do {
2857 		if (canputnext(wqp)) {
2858 			mutex_exit(&stp->sd_lock);
2859 			putnext(wqp, mp);
2860 			return (0);
2861 		}
2862 		error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
2863 		    &done);
2864 	} while (error == 0 && !done);
2865 
2866 	mutex_exit(&stp->sd_lock);
2867 	/*
2868 	 * EAGAIN tells the application to try again. ENOMEM
2869 	 * is returned only if the memory allocation size
2870 	 * exceeds the physical limits of the system. ENOMEM
2871 	 * can't be true here.
2872 	 */
2873 	if (error == ENOMEM)
2874 		error = EAGAIN;
2875 	return (error);
2876 }
2877 
2878 /*
2879  * Stream head write service routine.
2880  * Its job is to wake up any sleeping writers when a queue
2881  * downstream needs data (part of the flow control in putq and getq).
2882  * It also must wake anyone sleeping on a poll().
2883  * For stream head right below mux module, it must also invoke put procedure
2884  * of next downstream module.
2885  */
2886 int
2887 strwsrv(queue_t *q)
2888 {
2889 	struct stdata *stp;
2890 	queue_t *tq;
2891 	qband_t *qbp;
2892 	int i;
2893 	qband_t *myqbp;
2894 	int isevent;
2895 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
2896 
2897 	TRACE_1(TR_FAC_STREAMS_FR,
2898 		TR_STRWSRV, "strwsrv:q %p", q);
2899 	stp = (struct stdata *)q->q_ptr;
2900 	ASSERT(qclaimed(q));
2901 	mutex_enter(&stp->sd_lock);
2902 	ASSERT(!(stp->sd_flag & STPLEX));
2903 
2904 	if (stp->sd_flag & WSLEEP) {
2905 		stp->sd_flag &= ~WSLEEP;
2906 		cv_broadcast(&q->q_wait);
2907 	}
2908 	mutex_exit(&stp->sd_lock);
2909 
2910 	/* The other end of a stream pipe went away. */
2911 	if ((tq = q->q_next) == NULL) {
2912 		return (0);
2913 	}
2914 
2915 	/* Find the next module forward that has a service procedure */
2916 	claimstr(q);
2917 	tq = q->q_nfsrv;
2918 	ASSERT(tq != NULL);
2919 
2920 	if ((q->q_flag & QBACK)) {
2921 		if ((tq->q_flag & QFULL)) {
2922 			mutex_enter(QLOCK(tq));
2923 			if (!(tq->q_flag & QFULL)) {
2924 				mutex_exit(QLOCK(tq));
2925 				goto wakeup;
2926 			}
2927 			/*
2928 			 * The queue must have become full again. Set QWANTW
2929 			 * again so strwsrv will be back enabled when
2930 			 * the queue becomes non-full next time.
2931 			 */
2932 			tq->q_flag |= QWANTW;
2933 			mutex_exit(QLOCK(tq));
2934 		} else {
2935 		wakeup:
2936 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
2937 			mutex_enter(&stp->sd_lock);
2938 			if (stp->sd_sigflags & S_WRNORM)
2939 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2940 			mutex_exit(&stp->sd_lock);
2941 		}
2942 	}
2943 
2944 	isevent = 0;
2945 	i = 1;
2946 	bzero((caddr_t)qbf, NBAND);
2947 	mutex_enter(QLOCK(tq));
2948 	if ((myqbp = q->q_bandp) != NULL)
2949 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
2950 			ASSERT(myqbp);
2951 			if ((myqbp->qb_flag & QB_BACK)) {
2952 				if (qbp->qb_flag & QB_FULL) {
2953 					/*
2954 					 * The band must have become full again.
2955 					 * Set QB_WANTW again so strwsrv will
2956 					 * be back enabled when the band becomes
2957 					 * non-full next time.
2958 					 */
2959 					qbp->qb_flag |= QB_WANTW;
2960 				} else {
2961 					isevent = 1;
2962 					qbf[i] = 1;
2963 				}
2964 			}
2965 			myqbp = myqbp->qb_next;
2966 			i++;
2967 		}
2968 	mutex_exit(QLOCK(tq));
2969 
2970 	if (isevent) {
2971 	    for (i = tq->q_nband; i; i--) {
2972 		if (qbf[i]) {
2973 			pollwakeup(&stp->sd_pollist, POLLWRBAND);
2974 			mutex_enter(&stp->sd_lock);
2975 			if (stp->sd_sigflags & S_WRBAND)
2976 				strsendsig(stp->sd_siglist, S_WRBAND,
2977 					(uchar_t)i, 0);
2978 			mutex_exit(&stp->sd_lock);
2979 		}
2980 	    }
2981 	}
2982 
2983 	releasestr(q);
2984 	return (0);
2985 }
2986 
2987 /*
2988  * Special case of strcopyin/strcopyout for copying
2989  * struct strioctl that can deal with both data
2990  * models.
2991  */
2992 
2993 #ifdef	_LP64
2994 
2995 static int
2996 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
2997 {
2998 	struct	strioctl32 strioc32;
2999 	struct	strioctl *striocp;
3000 
3001 	if (copyflag & U_TO_K) {
3002 		ASSERT((copyflag & K_TO_K) == 0);
3003 
3004 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3005 			if (copyin(from, &strioc32, sizeof (strioc32)))
3006 				return (EFAULT);
3007 
3008 			striocp = (struct strioctl *)to;
3009 			striocp->ic_cmd	= strioc32.ic_cmd;
3010 			striocp->ic_timout = strioc32.ic_timout;
3011 			striocp->ic_len	= strioc32.ic_len;
3012 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
3013 
3014 		} else { /* NATIVE data model */
3015 			if (copyin(from, to, sizeof (struct strioctl))) {
3016 				return (EFAULT);
3017 			} else {
3018 				return (0);
3019 			}
3020 		}
3021 	} else {
3022 		ASSERT(copyflag & K_TO_K);
3023 		bcopy(from, to, sizeof (struct strioctl));
3024 	}
3025 	return (0);
3026 }
3027 
3028 static int
3029 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3030 {
3031 	struct	strioctl32 strioc32;
3032 	struct	strioctl *striocp;
3033 
3034 	if (copyflag & U_TO_K) {
3035 		ASSERT((copyflag & K_TO_K) == 0);
3036 
3037 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3038 			striocp = (struct strioctl *)from;
3039 			strioc32.ic_cmd	= striocp->ic_cmd;
3040 			strioc32.ic_timout = striocp->ic_timout;
3041 			strioc32.ic_len	= striocp->ic_len;
3042 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
3043 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3044 			    striocp->ic_dp);
3045 
3046 			if (copyout(&strioc32, to, sizeof (strioc32)))
3047 				return (EFAULT);
3048 
3049 		} else { /* NATIVE data model */
3050 			if (copyout(from, to, sizeof (struct strioctl))) {
3051 				return (EFAULT);
3052 			} else {
3053 				return (0);
3054 			}
3055 		}
3056 	} else {
3057 		ASSERT(copyflag & K_TO_K);
3058 		bcopy(from, to, sizeof (struct strioctl));
3059 	}
3060 	return (0);
3061 }
3062 
3063 #else	/* ! _LP64 */
3064 
3065 /* ARGSUSED2 */
3066 static int
3067 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3068 {
3069 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3070 }
3071 
3072 /* ARGSUSED2 */
3073 static int
3074 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3075 {
3076 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3077 }
3078 
3079 #endif	/* _LP64 */
3080 
3081 /*
3082  * Determine type of job control semantics expected by user.  The
3083  * possibilities are:
3084  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
3085  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
3086  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3087  *	JCGETP	- Gets a value in the stream; no signals.
3088  * See straccess in strsubr.c for usage of these values.
3089  *
3090  * This routine also returns -1 for I_STR as a special case; the
3091  * caller must call again with the real ioctl number for
3092  * classification.
3093  */
3094 static int
3095 job_control_type(int cmd)
3096 {
3097 	switch (cmd) {
3098 	case I_STR:
3099 		return (-1);
3100 
3101 	case I_RECVFD:
3102 	case I_E_RECVFD:
3103 		return (JCREAD);
3104 
3105 	case I_FDINSERT:
3106 	case I_SENDFD:
3107 		return (JCWRITE);
3108 
3109 	case TCSETA:
3110 	case TCSETAW:
3111 	case TCSETAF:
3112 	case TCSBRK:
3113 	case TCXONC:
3114 	case TCFLSH:
3115 	case TCDSET:	/* Obsolete */
3116 	case TIOCSWINSZ:
3117 	case TCSETS:
3118 	case TCSETSW:
3119 	case TCSETSF:
3120 	case TIOCSETD:
3121 	case TIOCHPCL:
3122 	case TIOCSETP:
3123 	case TIOCSETN:
3124 	case TIOCEXCL:
3125 	case TIOCNXCL:
3126 	case TIOCFLUSH:
3127 	case TIOCSETC:
3128 	case TIOCLBIS:
3129 	case TIOCLBIC:
3130 	case TIOCLSET:
3131 	case TIOCSBRK:
3132 	case TIOCCBRK:
3133 	case TIOCSDTR:
3134 	case TIOCCDTR:
3135 	case TIOCSLTC:
3136 	case TIOCSTOP:
3137 	case TIOCSTART:
3138 	case TIOCSTI:
3139 	case TIOCSPGRP:
3140 	case TIOCMSET:
3141 	case TIOCMBIS:
3142 	case TIOCMBIC:
3143 	case TIOCREMOTE:
3144 	case TIOCSIGNAL:
3145 	case LDSETT:
3146 	case LDSMAP:	/* Obsolete */
3147 	case DIOCSETP:
3148 	case I_FLUSH:
3149 	case I_SRDOPT:
3150 	case I_SETSIG:
3151 	case I_SWROPT:
3152 	case I_FLUSHBAND:
3153 	case I_SETCLTIME:
3154 	case I_SERROPT:
3155 	case I_ESETSIG:
3156 	case FIONBIO:
3157 	case FIOASYNC:
3158 	case FIOSETOWN:
3159 	case JBOOT:	/* Obsolete */
3160 	case JTERM:	/* Obsolete */
3161 	case JTIMOM:	/* Obsolete */
3162 	case JZOMBOOT:	/* Obsolete */
3163 	case JAGENT:	/* Obsolete */
3164 	case JTRUN:	/* Obsolete */
3165 	case JXTPROTO:	/* Obsolete */
3166 		return (JCSETP);
3167 	}
3168 
3169 	return (JCGETP);
3170 }
3171 
3172 /*
3173  * ioctl for streams
3174  */
3175 int
3176 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3177     cred_t *crp, int *rvalp)
3178 {
3179 	struct stdata *stp;
3180 	struct strioctl strioc;
3181 	struct uio uio;
3182 	struct iovec iov;
3183 	int access;
3184 	mblk_t *mp;
3185 	int error = 0;
3186 	int done = 0;
3187 	ssize_t	rmin, rmax;
3188 	queue_t *wrq;
3189 	queue_t *rdq;
3190 	boolean_t kioctl = B_FALSE;
3191 
3192 	if (flag & FKIOCTL) {
3193 		copyflag = K_TO_K;
3194 		kioctl = B_TRUE;
3195 	}
3196 	ASSERT(vp->v_stream);
3197 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3198 	stp = vp->v_stream;
3199 
3200 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3201 		"strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3202 
3203 #ifdef C2_AUDIT
3204 	if (audit_active)
3205 		audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp);
3206 #endif
3207 
3208 	/*
3209 	 * If the copy is kernel to kernel, make sure that the FNATIVE
3210 	 * flag is set.  After this it would be a serious error to have
3211 	 * no model flag.
3212 	 */
3213 	if (copyflag == K_TO_K)
3214 		flag = (flag & ~FMODELS) | FNATIVE;
3215 
3216 	ASSERT((flag & FMODELS) != 0);
3217 
3218 	wrq = stp->sd_wrq;
3219 	rdq = _RD(wrq);
3220 
3221 	access = job_control_type(cmd);
3222 
3223 	/* We should never see these here, should be handled by iwscn */
3224 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3225 		return (EINVAL);
3226 
3227 	if (access != -1 && stp->sd_sidp != NULL &&
3228 	    stp->sd_vnode->v_type != VFIFO)
3229 		if (error = straccess(stp, access))
3230 			return (error);
3231 
3232 	/*
3233 	 * Check for sgttyb-related ioctls first, and complain as
3234 	 * necessary.
3235 	 */
3236 	switch (cmd) {
3237 	case TIOCGETP:
3238 	case TIOCSETP:
3239 	case TIOCSETN:
3240 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3241 			sgttyb_complaint = B_TRUE;
3242 			cmn_err(CE_NOTE,
3243 			    "application used obsolete TIOC[GS]ET");
3244 		}
3245 		if (sgttyb_handling >= 3) {
3246 			tsignal(curthread, SIGSYS);
3247 			return (EIO);
3248 		}
3249 		break;
3250 	}
3251 
3252 	mutex_enter(&stp->sd_lock);
3253 
3254 	switch (cmd) {
3255 	case I_RECVFD:
3256 	case I_E_RECVFD:
3257 	case I_PEEK:
3258 	case I_NREAD:
3259 	case FIONREAD:
3260 	case FIORDCHK:
3261 	case I_ATMARK:
3262 	case FIONBIO:
3263 	case FIOASYNC:
3264 		if (stp->sd_flag & (STRDERR|STPLEX)) {
3265 			error = strgeterr(stp, STRDERR|STPLEX, 0);
3266 			if (error != 0) {
3267 				mutex_exit(&stp->sd_lock);
3268 				return (error);
3269 			}
3270 		}
3271 		break;
3272 
3273 	default:
3274 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3275 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3276 			if (error != 0) {
3277 				mutex_exit(&stp->sd_lock);
3278 				return (error);
3279 			}
3280 		}
3281 	}
3282 
3283 	mutex_exit(&stp->sd_lock);
3284 
3285 	switch (cmd) {
3286 	default:
3287 		/*
3288 		 * The stream head has hardcoded knowledge of a
3289 		 * miscellaneous collection of terminal-, keyboard- and
3290 		 * mouse-related ioctls, enumerated below.  This hardcoded
3291 		 * knowledge allows the stream head to automatically
3292 		 * convert transparent ioctl requests made by userland
3293 		 * programs into I_STR ioctls which many old STREAMS
3294 		 * modules and drivers require.
3295 		 *
3296 		 * No new ioctls should ever be added to this list.
3297 		 * Instead, the STREAMS module or driver should be written
3298 		 * to either handle transparent ioctls or require any
3299 		 * userland programs to use I_STR ioctls (by returning
3300 		 * EINVAL to any transparent ioctl requests).
3301 		 *
3302 		 * More importantly, removing ioctls from this list should
3303 		 * be done with the utmost care, since our STREAMS modules
3304 		 * and drivers *count* on the stream head performing this
3305 		 * conversion, and thus may panic while processing
3306 		 * transparent ioctl request for one of these ioctls (keep
3307 		 * in mind that third party modules and drivers may have
3308 		 * similar problems).
3309 		 */
3310 		if (((cmd & IOCTYPE) == LDIOC) ||
3311 		    ((cmd & IOCTYPE) == tIOC) ||
3312 		    ((cmd & IOCTYPE) == TIOC) ||
3313 		    ((cmd & IOCTYPE) == KIOC) ||
3314 		    ((cmd & IOCTYPE) == MSIOC) ||
3315 		    ((cmd & IOCTYPE) == VUIOC)) {
3316 			/*
3317 			 * The ioctl is a tty ioctl - set up strioc buffer
3318 			 * and call strdoioctl() to do the work.
3319 			 */
3320 			if (stp->sd_flag & STRHUP)
3321 				return (ENXIO);
3322 			strioc.ic_cmd = cmd;
3323 			strioc.ic_timout = INFTIM;
3324 
3325 			switch (cmd) {
3326 
3327 			case TCXONC:
3328 			case TCSBRK:
3329 			case TCFLSH:
3330 			case TCDSET:
3331 				{
3332 				int native_arg = (int)arg;
3333 				strioc.ic_len = sizeof (int);
3334 				strioc.ic_dp = (char *)&native_arg;
3335 				return (strdoioctl(stp, &strioc, flag,
3336 				    K_TO_K, crp, rvalp));
3337 				}
3338 
3339 			case TCSETA:
3340 			case TCSETAW:
3341 			case TCSETAF:
3342 				strioc.ic_len = sizeof (struct termio);
3343 				strioc.ic_dp = (char *)arg;
3344 				return (strdoioctl(stp, &strioc, flag,
3345 					copyflag, crp, rvalp));
3346 
3347 			case TCSETS:
3348 			case TCSETSW:
3349 			case TCSETSF:
3350 				strioc.ic_len = sizeof (struct termios);
3351 				strioc.ic_dp = (char *)arg;
3352 				return (strdoioctl(stp, &strioc, flag,
3353 					copyflag, crp, rvalp));
3354 
3355 			case LDSETT:
3356 				strioc.ic_len = sizeof (struct termcb);
3357 				strioc.ic_dp = (char *)arg;
3358 				return (strdoioctl(stp, &strioc, flag,
3359 					copyflag, crp, rvalp));
3360 
3361 			case TIOCSETP:
3362 				strioc.ic_len = sizeof (struct sgttyb);
3363 				strioc.ic_dp = (char *)arg;
3364 				return (strdoioctl(stp, &strioc, flag,
3365 					copyflag, crp, rvalp));
3366 
3367 			case TIOCSTI:
3368 				if ((flag & FREAD) == 0 &&
3369 				    secpolicy_sti(crp) != 0) {
3370 					return (EPERM);
3371 				}
3372 				if (stp->sd_sidp !=
3373 				    ttoproc(curthread)->p_sessp->s_sidp &&
3374 				    secpolicy_sti(crp) != 0) {
3375 					return (EACCES);
3376 				}
3377 
3378 				strioc.ic_len = sizeof (char);
3379 				strioc.ic_dp = (char *)arg;
3380 				return (strdoioctl(stp, &strioc, flag,
3381 					copyflag, crp, rvalp));
3382 
3383 			case TIOCSWINSZ:
3384 				strioc.ic_len = sizeof (struct winsize);
3385 				strioc.ic_dp = (char *)arg;
3386 				return (strdoioctl(stp, &strioc, flag,
3387 					copyflag, crp, rvalp));
3388 
3389 			case TIOCSSIZE:
3390 				strioc.ic_len = sizeof (struct ttysize);
3391 				strioc.ic_dp = (char *)arg;
3392 				return (strdoioctl(stp, &strioc, flag,
3393 					copyflag, crp, rvalp));
3394 
3395 			case TIOCSSOFTCAR:
3396 			case KIOCTRANS:
3397 			case KIOCTRANSABLE:
3398 			case KIOCCMD:
3399 			case KIOCSDIRECT:
3400 			case KIOCSCOMPAT:
3401 			case KIOCSKABORTEN:
3402 			case KIOCSRPTDELAY:
3403 			case KIOCSRPTRATE:
3404 			case VUIDSFORMAT:
3405 			case TIOCSPPS:
3406 				strioc.ic_len = sizeof (int);
3407 				strioc.ic_dp = (char *)arg;
3408 				return (strdoioctl(stp, &strioc, flag,
3409 					copyflag, crp, rvalp));
3410 
3411 			case KIOCSETKEY:
3412 			case KIOCGETKEY:
3413 				strioc.ic_len = sizeof (struct kiockey);
3414 				strioc.ic_dp = (char *)arg;
3415 				return (strdoioctl(stp, &strioc, flag,
3416 					copyflag, crp, rvalp));
3417 
3418 			case KIOCSKEY:
3419 			case KIOCGKEY:
3420 				strioc.ic_len = sizeof (struct kiockeymap);
3421 				strioc.ic_dp = (char *)arg;
3422 				return (strdoioctl(stp, &strioc, flag,
3423 					copyflag, crp, rvalp));
3424 
3425 			case KIOCSLED:
3426 				/* arg is a pointer to char */
3427 				strioc.ic_len = sizeof (char);
3428 				strioc.ic_dp = (char *)arg;
3429 				return (strdoioctl(stp, &strioc, flag,
3430 					copyflag, crp, rvalp));
3431 
3432 			case MSIOSETPARMS:
3433 				strioc.ic_len = sizeof (Ms_parms);
3434 				strioc.ic_dp = (char *)arg;
3435 				return (strdoioctl(stp, &strioc, flag,
3436 					copyflag, crp, rvalp));
3437 
3438 			case VUIDSADDR:
3439 			case VUIDGADDR:
3440 				strioc.ic_len = sizeof (struct vuid_addr_probe);
3441 				strioc.ic_dp = (char *)arg;
3442 				return (strdoioctl(stp, &strioc, flag,
3443 					copyflag, crp, rvalp));
3444 
3445 			/*
3446 			 * These M_IOCTL's don't require any data to be sent
3447 			 * downstream, and the driver will allocate and link
3448 			 * on its own mblk_t upon M_IOCACK -- thus we set
3449 			 * ic_len to zero and set ic_dp to arg so we know
3450 			 * where to copyout to later.
3451 			 */
3452 			case TIOCGSOFTCAR:
3453 			case TIOCGWINSZ:
3454 			case TIOCGSIZE:
3455 			case KIOCGTRANS:
3456 			case KIOCGTRANSABLE:
3457 			case KIOCTYPE:
3458 			case KIOCGDIRECT:
3459 			case KIOCGCOMPAT:
3460 			case KIOCLAYOUT:
3461 			case KIOCGLED:
3462 			case MSIOGETPARMS:
3463 			case MSIOBUTTONS:
3464 			case VUIDGFORMAT:
3465 			case TIOCGPPS:
3466 			case TIOCGPPSEV:
3467 			case TCGETA:
3468 			case TCGETS:
3469 			case LDGETT:
3470 			case TIOCGETP:
3471 			case KIOCGRPTDELAY:
3472 			case KIOCGRPTRATE:
3473 				strioc.ic_len = 0;
3474 				strioc.ic_dp = (char *)arg;
3475 				return (strdoioctl(stp, &strioc, flag,
3476 					copyflag, crp, rvalp));
3477 			}
3478 		}
3479 
3480 		/*
3481 		 * Unknown cmd - send it down as a transparent ioctl.
3482 		 */
3483 		strioc.ic_cmd = cmd;
3484 		strioc.ic_timout = INFTIM;
3485 		strioc.ic_len = TRANSPARENT;
3486 		strioc.ic_dp = (char *)&arg;
3487 
3488 		return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3489 
3490 	case I_STR:
3491 		/*
3492 		 * Stream ioctl.  Read in an strioctl buffer from the user
3493 		 * along with any data specified and send it downstream.
3494 		 * Strdoioctl will wait allow only one ioctl message at
3495 		 * a time, and waits for the acknowledgement.
3496 		 */
3497 
3498 		if (stp->sd_flag & STRHUP)
3499 			return (ENXIO);
3500 
3501 		error = strcopyin_strioctl((void *)arg, &strioc, flag,
3502 		    copyflag);
3503 		if (error != 0)
3504 			return (error);
3505 
3506 		if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3507 			return (EINVAL);
3508 
3509 		access = job_control_type(strioc.ic_cmd);
3510 		if (access != -1 && stp->sd_sidp != NULL &&
3511 		    stp->sd_vnode->v_type != VFIFO &&
3512 		    (error = straccess(stp, access)) != 0)
3513 			return (error);
3514 
3515 		/*
3516 		 * The I_STR facility provides a trap door for malicious
3517 		 * code to send down bogus streamio(7I) ioctl commands to
3518 		 * unsuspecting STREAMS modules and drivers which expect to
3519 		 * only get these messages from the stream head.
3520 		 * Explicitly prohibit any streamio ioctls which can be
3521 		 * passed downstream by the stream head.  Note that we do
3522 		 * not block all streamio ioctls because the ioctl
3523 		 * numberspace is not well managed and thus it's possible
3524 		 * that a module or driver's ioctl numbers may accidentally
3525 		 * collide with them.
3526 		 */
3527 		switch (strioc.ic_cmd) {
3528 		case I_LINK:
3529 		case I_PLINK:
3530 		case I_UNLINK:
3531 		case I_PUNLINK:
3532 		case _I_GETPEERCRED:
3533 		case _I_PLINK_LH:
3534 			return (EINVAL);
3535 		}
3536 
3537 		error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3538 		if (error == 0) {
3539 			error = strcopyout_strioctl(&strioc, (void *)arg,
3540 			    flag, copyflag);
3541 		}
3542 		return (error);
3543 
3544 	case I_NREAD:
3545 		/*
3546 		 * Return number of bytes of data in first message
3547 		 * in queue in "arg" and return the number of messages
3548 		 * in queue in return value.
3549 		 */
3550 	    {
3551 		size_t	size;
3552 		int	retval;
3553 		int	count = 0;
3554 
3555 		mutex_enter(QLOCK(rdq));
3556 
3557 		size = msgdsize(rdq->q_first);
3558 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3559 			count++;
3560 
3561 		mutex_exit(QLOCK(rdq));
3562 		if (stp->sd_struiordq) {
3563 			infod_t infod;
3564 
3565 			infod.d_cmd = INFOD_COUNT;
3566 			infod.d_count = 0;
3567 			if (count == 0) {
3568 				infod.d_cmd |= INFOD_FIRSTBYTES;
3569 				infod.d_bytes = 0;
3570 			}
3571 			infod.d_res = 0;
3572 			(void) infonext(rdq, &infod);
3573 			count += infod.d_count;
3574 			if (infod.d_res & INFOD_FIRSTBYTES)
3575 				size = infod.d_bytes;
3576 		}
3577 
3578 		/*
3579 		 * Drop down from size_t to the "int" required by the
3580 		 * interface.  Cap at INT_MAX.
3581 		 */
3582 		retval = MIN(size, INT_MAX);
3583 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3584 		    copyflag);
3585 		if (!error)
3586 			*rvalp = count;
3587 		return (error);
3588 	    }
3589 
3590 	case FIONREAD:
3591 		/*
3592 		 * Return number of bytes of data in all data messages
3593 		 * in queue in "arg".
3594 		 */
3595 	    {
3596 		size_t	size = 0;
3597 		int	retval;
3598 
3599 		mutex_enter(QLOCK(rdq));
3600 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3601 			size += msgdsize(mp);
3602 		mutex_exit(QLOCK(rdq));
3603 
3604 		if (stp->sd_struiordq) {
3605 			infod_t infod;
3606 
3607 			infod.d_cmd = INFOD_BYTES;
3608 			infod.d_res = 0;
3609 			infod.d_bytes = 0;
3610 			(void) infonext(rdq, &infod);
3611 			size += infod.d_bytes;
3612 		}
3613 
3614 		/*
3615 		 * Drop down from size_t to the "int" required by the
3616 		 * interface.  Cap at INT_MAX.
3617 		 */
3618 		retval = MIN(size, INT_MAX);
3619 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3620 		    copyflag);
3621 
3622 		*rvalp = 0;
3623 		return (error);
3624 	    }
3625 	case FIORDCHK:
3626 		/*
3627 		 * FIORDCHK does not use arg value (like FIONREAD),
3628 		 * instead a count is returned. I_NREAD value may
3629 		 * not be accurate but safe. The real thing to do is
3630 		 * to add the msgdsizes of all data  messages until
3631 		 * a non-data message.
3632 		 */
3633 	    {
3634 		size_t size = 0;
3635 
3636 		mutex_enter(QLOCK(rdq));
3637 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3638 			size += msgdsize(mp);
3639 		mutex_exit(QLOCK(rdq));
3640 
3641 		if (stp->sd_struiordq) {
3642 			infod_t infod;
3643 
3644 			infod.d_cmd = INFOD_BYTES;
3645 			infod.d_res = 0;
3646 			infod.d_bytes = 0;
3647 			(void) infonext(rdq, &infod);
3648 			size += infod.d_bytes;
3649 		}
3650 
3651 		/*
3652 		 * Since ioctl returns an int, and memory sizes under
3653 		 * LP64 may not fit, we return INT_MAX if the count was
3654 		 * actually greater.
3655 		 */
3656 		*rvalp = MIN(size, INT_MAX);
3657 		return (0);
3658 	    }
3659 
3660 	case I_FIND:
3661 		/*
3662 		 * Get module name.
3663 		 */
3664 	    {
3665 		char mname[FMNAMESZ + 1];
3666 		queue_t *q;
3667 
3668 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3669 		    mname, FMNAMESZ + 1, NULL);
3670 		if (error)
3671 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3672 
3673 		/*
3674 		 * Return EINVAL if we're handed a bogus module name.
3675 		 */
3676 		if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3677 			TRACE_0(TR_FAC_STREAMS_FR,
3678 				TR_I_CANT_FIND, "couldn't I_FIND");
3679 			return (EINVAL);
3680 		}
3681 
3682 		*rvalp = 0;
3683 
3684 		/* Look downstream to see if module is there. */
3685 		claimstr(stp->sd_wrq);
3686 		for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3687 			if (q->q_flag&QREADR) {
3688 				q = NULL;
3689 				break;
3690 			}
3691 			if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0)
3692 				break;
3693 		}
3694 		releasestr(stp->sd_wrq);
3695 
3696 		*rvalp = (q ? 1 : 0);
3697 		return (error);
3698 	    }
3699 
3700 	case I_PUSH:
3701 	case __I_PUSH_NOCTTY:
3702 		/*
3703 		 * Push a module.
3704 		 * For the case __I_PUSH_NOCTTY push a module but
3705 		 * do not allocate controlling tty. See bugid 4025044
3706 		 */
3707 
3708 	    {
3709 		char mname[FMNAMESZ + 1];
3710 		fmodsw_impl_t *fp;
3711 		dev_t dummydev;
3712 
3713 		if (stp->sd_flag & STRHUP)
3714 			return (ENXIO);
3715 
3716 		/*
3717 		 * Get module name and look up in fmodsw.
3718 		 */
3719 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3720 		    mname, FMNAMESZ + 1, NULL);
3721 		if (error)
3722 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3723 
3724 		if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3725 		    NULL)
3726 			return (EINVAL);
3727 
3728 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3729 		    "I_PUSH:fp %p stp %p", fp, stp);
3730 
3731 		if (error = strstartplumb(stp, flag, cmd)) {
3732 			fmodsw_rele(fp);
3733 			return (error);
3734 		}
3735 
3736 		/*
3737 		 * See if any more modules can be pushed on this stream.
3738 		 * Note that this check must be done after strstartplumb()
3739 		 * since otherwise multiple threads issuing I_PUSHes on
3740 		 * the same stream will be able to exceed nstrpush.
3741 		 */
3742 		mutex_enter(&stp->sd_lock);
3743 		if (stp->sd_pushcnt >= nstrpush) {
3744 			fmodsw_rele(fp);
3745 			strendplumb(stp);
3746 			mutex_exit(&stp->sd_lock);
3747 			return (EINVAL);
3748 		}
3749 		mutex_exit(&stp->sd_lock);
3750 
3751 		/*
3752 		 * Push new module and call its open routine
3753 		 * via qattach().  Modules don't change device
3754 		 * numbers, so just ignore dummydev here.
3755 		 */
3756 		dummydev = vp->v_rdev;
3757 		if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3758 		    B_FALSE)) == 0) {
3759 			if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3760 			    (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3761 				/*
3762 				 * try to allocate it as a controlling terminal
3763 				 */
3764 				stralloctty(stp);
3765 			}
3766 		}
3767 
3768 		mutex_enter(&stp->sd_lock);
3769 
3770 		/*
3771 		 * As a performance concern we are caching the values of
3772 		 * q_minpsz and q_maxpsz of the module below the stream
3773 		 * head in the stream head.
3774 		 */
3775 		mutex_enter(QLOCK(stp->sd_wrq->q_next));
3776 		rmin = stp->sd_wrq->q_next->q_minpsz;
3777 		rmax = stp->sd_wrq->q_next->q_maxpsz;
3778 		mutex_exit(QLOCK(stp->sd_wrq->q_next));
3779 
3780 		/* Do this processing here as a performance concern */
3781 		if (strmsgsz != 0) {
3782 			if (rmax == INFPSZ)
3783 				rmax = strmsgsz;
3784 			else  {
3785 				if (vp->v_type == VFIFO)
3786 					rmax = MIN(PIPE_BUF, rmax);
3787 				else	rmax = MIN(strmsgsz, rmax);
3788 			}
3789 		}
3790 
3791 		mutex_enter(QLOCK(wrq));
3792 		stp->sd_qn_minpsz = rmin;
3793 		stp->sd_qn_maxpsz = rmax;
3794 		mutex_exit(QLOCK(wrq));
3795 
3796 		strendplumb(stp);
3797 		mutex_exit(&stp->sd_lock);
3798 		return (error);
3799 	    }
3800 
3801 	case I_POP:
3802 	    {
3803 		queue_t	*q;
3804 
3805 		if (stp->sd_flag & STRHUP)
3806 			return (ENXIO);
3807 		if (!wrq->q_next)	/* for broken pipes */
3808 			return (EINVAL);
3809 
3810 		if (error = strstartplumb(stp, flag, cmd))
3811 			return (error);
3812 
3813 		/*
3814 		 * If there is an anchor on this stream and popping
3815 		 * the current module would attempt to pop through the
3816 		 * anchor, then disallow the pop unless we have sufficient
3817 		 * privileges; take the cheapest (non-locking) check
3818 		 * first.
3819 		 */
3820 		if (secpolicy_net_config(crp, B_TRUE) != 0) {
3821 			mutex_enter(&stp->sd_lock);
3822 			/*
3823 			 * Anchors only apply if there's at least one
3824 			 * module on the stream (sd_pushcnt > 0).
3825 			 */
3826 			if (stp->sd_pushcnt > 0 &&
3827 			    stp->sd_pushcnt == stp->sd_anchor &&
3828 			    stp->sd_vnode->v_type != VFIFO) {
3829 				strendplumb(stp);
3830 				mutex_exit(&stp->sd_lock);
3831 				/* Audit and report error */
3832 				return (secpolicy_net_config(crp, B_FALSE));
3833 			}
3834 			mutex_exit(&stp->sd_lock);
3835 		}
3836 
3837 		q = wrq->q_next;
3838 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3839 			"I_POP:%p from %p", q, stp);
3840 		if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3841 			error = EINVAL;
3842 		} else {
3843 			qdetach(_RD(q), 1, flag, crp, B_FALSE);
3844 			error = 0;
3845 		}
3846 		mutex_enter(&stp->sd_lock);
3847 
3848 		/*
3849 		 * As a performance concern we are caching the values of
3850 		 * q_minpsz and q_maxpsz of the module below the stream
3851 		 * head in the stream head.
3852 		 */
3853 		mutex_enter(QLOCK(wrq->q_next));
3854 		rmin = wrq->q_next->q_minpsz;
3855 		rmax = wrq->q_next->q_maxpsz;
3856 		mutex_exit(QLOCK(wrq->q_next));
3857 
3858 		/* Do this processing here as a performance concern */
3859 		if (strmsgsz != 0) {
3860 			if (rmax == INFPSZ)
3861 				rmax = strmsgsz;
3862 			else  {
3863 				if (vp->v_type == VFIFO)
3864 					rmax = MIN(PIPE_BUF, rmax);
3865 				else	rmax = MIN(strmsgsz, rmax);
3866 			}
3867 		}
3868 
3869 		mutex_enter(QLOCK(wrq));
3870 		stp->sd_qn_minpsz = rmin;
3871 		stp->sd_qn_maxpsz = rmax;
3872 		mutex_exit(QLOCK(wrq));
3873 
3874 		/* If we popped through the anchor, then reset the anchor. */
3875 		if (stp->sd_pushcnt < stp->sd_anchor)
3876 			stp->sd_anchor = 0;
3877 
3878 		strendplumb(stp);
3879 		mutex_exit(&stp->sd_lock);
3880 		return (error);
3881 	    }
3882 
3883 	case _I_MUXID2FD:
3884 	{
3885 		/*
3886 		 * Create a fd for a I_PLINK'ed lower stream with a given
3887 		 * muxid.  With the fd, application can send down ioctls,
3888 		 * like I_LIST, to the previously I_PLINK'ed stream.  Note
3889 		 * that after getting the fd, the application has to do an
3890 		 * I_PUNLINK on the muxid before it can do any operation
3891 		 * on the lower stream.  This is required by spec1170.
3892 		 *
3893 		 * The fd used to do this ioctl should point to the same
3894 		 * controlling device used to do the I_PLINK.  If it uses
3895 		 * a different stream or an invalid muxid, I_MUXID2FD will
3896 		 * fail.  The error code is set to EINVAL.
3897 		 *
3898 		 * The intended use of this interface is the following.
3899 		 * An application I_PLINK'ed a stream and exits.  The fd
3900 		 * to the lower stream is gone.  Another application
3901 		 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
3902 		 */
3903 		int muxid = (int)arg;
3904 		int fd;
3905 		linkinfo_t *linkp;
3906 		struct file *fp;
3907 
3908 		/*
3909 		 * Do not allow the wildcard muxid.  This ioctl is not
3910 		 * intended to find arbitrary link.
3911 		 */
3912 		if (muxid == 0) {
3913 			return (EINVAL);
3914 		}
3915 
3916 		mutex_enter(&muxifier);
3917 		linkp = findlinks(vp->v_stream, muxid, LINKPERSIST);
3918 		if (linkp == NULL) {
3919 			mutex_exit(&muxifier);
3920 			return (EINVAL);
3921 		}
3922 
3923 		if ((fd = ufalloc(0)) == -1) {
3924 			mutex_exit(&muxifier);
3925 			return (EMFILE);
3926 		}
3927 		fp = linkp->li_fpdown;
3928 		mutex_enter(&fp->f_tlock);
3929 		fp->f_count++;
3930 		mutex_exit(&fp->f_tlock);
3931 		mutex_exit(&muxifier);
3932 		setf(fd, fp);
3933 		*rvalp = fd;
3934 		return (0);
3935 	}
3936 
3937 	case _I_INSERT:
3938 	{
3939 		/*
3940 		 * To insert a module to a given position in a stream.
3941 		 * In the first release, only allow privileged user
3942 		 * to use this ioctl.
3943 		 *
3944 		 * Note that we do not plan to support this ioctl
3945 		 * on pipes in the first release.  We want to learn more
3946 		 * about the implications of these ioctls before extending
3947 		 * their support.  And we do not think these features are
3948 		 * valuable for pipes.
3949 		 *
3950 		 * Neither do we support O/C hot stream.  Note that only
3951 		 * the upper streams of TCP/IP stack are O/C hot streams.
3952 		 * The lower IP stream is not.
3953 		 * When there is a O/C cold barrier, we only allow inserts
3954 		 * above the barrier.
3955 		 */
3956 		STRUCT_DECL(strmodconf, strmodinsert);
3957 		char mod_name[FMNAMESZ + 1];
3958 		fmodsw_impl_t *fp;
3959 		dev_t dummydev;
3960 		queue_t *tmp_wrq;
3961 		int pos;
3962 		boolean_t is_insert;
3963 
3964 		STRUCT_INIT(strmodinsert, flag);
3965 		if (stp->sd_flag & STRHUP)
3966 			return (ENXIO);
3967 		if (STRMATED(stp))
3968 			return (EINVAL);
3969 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
3970 			return (error);
3971 
3972 		error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
3973 		    STRUCT_SIZE(strmodinsert), copyflag);
3974 		if (error)
3975 			return (error);
3976 
3977 		/*
3978 		 * Get module name and look up in fmodsw.
3979 		 */
3980 		error = (copyflag & U_TO_K ? copyinstr :
3981 		    copystr)(STRUCT_FGETP(strmodinsert, mod_name),
3982 		    mod_name, FMNAMESZ + 1, NULL);
3983 		if (error)
3984 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3985 
3986 		if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
3987 		    NULL)
3988 			return (EINVAL);
3989 
3990 		if (error = strstartplumb(stp, flag, cmd)) {
3991 			fmodsw_rele(fp);
3992 			return (error);
3993 		}
3994 
3995 		/*
3996 		 * Is this _I_INSERT just like an I_PUSH?  We need to know
3997 		 * this because we do some optimizations if this is a
3998 		 * module being pushed.
3999 		 */
4000 		pos = STRUCT_FGET(strmodinsert, pos);
4001 		is_insert = (pos != 0);
4002 
4003 		/*
4004 		 * Make sure pos is valid.  Even though it is not an I_PUSH,
4005 		 * we impose the same limit on the number of modules in a
4006 		 * stream.
4007 		 */
4008 		mutex_enter(&stp->sd_lock);
4009 		if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4010 		    pos > stp->sd_pushcnt) {
4011 			fmodsw_rele(fp);
4012 			strendplumb(stp);
4013 			mutex_exit(&stp->sd_lock);
4014 			return (EINVAL);
4015 		}
4016 		mutex_exit(&stp->sd_lock);
4017 
4018 		/*
4019 		 * First find the correct position this module to
4020 		 * be inserted.  We don't need to call claimstr()
4021 		 * as the stream should not be changing at this point.
4022 		 *
4023 		 * Insert new module and call its open routine
4024 		 * via qattach().  Modules don't change device
4025 		 * numbers, so just ignore dummydev here.
4026 		 */
4027 		for (tmp_wrq = stp->sd_wrq; pos > 0;
4028 		    tmp_wrq = tmp_wrq->q_next, pos--) {
4029 			ASSERT(SAMESTR(tmp_wrq));
4030 		}
4031 		dummydev = vp->v_rdev;
4032 		if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4033 		    fp, is_insert)) != 0) {
4034 			mutex_enter(&stp->sd_lock);
4035 			strendplumb(stp);
4036 			mutex_exit(&stp->sd_lock);
4037 			return (error);
4038 		}
4039 
4040 		mutex_enter(&stp->sd_lock);
4041 
4042 		/*
4043 		 * As a performance concern we are caching the values of
4044 		 * q_minpsz and q_maxpsz of the module below the stream
4045 		 * head in the stream head.
4046 		 */
4047 		if (!is_insert) {
4048 			mutex_enter(QLOCK(stp->sd_wrq->q_next));
4049 			rmin = stp->sd_wrq->q_next->q_minpsz;
4050 			rmax = stp->sd_wrq->q_next->q_maxpsz;
4051 			mutex_exit(QLOCK(stp->sd_wrq->q_next));
4052 
4053 			/* Do this processing here as a performance concern */
4054 			if (strmsgsz != 0) {
4055 				if (rmax == INFPSZ) {
4056 					rmax = strmsgsz;
4057 				} else  {
4058 					rmax = MIN(strmsgsz, rmax);
4059 				}
4060 			}
4061 
4062 			mutex_enter(QLOCK(wrq));
4063 			stp->sd_qn_minpsz = rmin;
4064 			stp->sd_qn_maxpsz = rmax;
4065 			mutex_exit(QLOCK(wrq));
4066 		}
4067 
4068 		/*
4069 		 * Need to update the anchor value if this module is
4070 		 * inserted below the anchor point.
4071 		 */
4072 		if (stp->sd_anchor != 0) {
4073 			pos = STRUCT_FGET(strmodinsert, pos);
4074 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4075 				stp->sd_anchor++;
4076 		}
4077 
4078 		strendplumb(stp);
4079 		mutex_exit(&stp->sd_lock);
4080 		return (0);
4081 	}
4082 
4083 	case _I_REMOVE:
4084 	{
4085 		/*
4086 		 * To remove a module with a given name in a stream.  The
4087 		 * caller of this ioctl needs to provide both the name and
4088 		 * the position of the module to be removed.  This eliminates
4089 		 * the ambiguity of removal if a module is inserted/pushed
4090 		 * multiple times in a stream.  In the first release, only
4091 		 * allow privileged user to use this ioctl.
4092 		 *
4093 		 * Note that we do not plan to support this ioctl
4094 		 * on pipes in the first release.  We want to learn more
4095 		 * about the implications of these ioctls before extending
4096 		 * their support.  And we do not think these features are
4097 		 * valuable for pipes.
4098 		 *
4099 		 * Neither do we support O/C hot stream.  Note that only
4100 		 * the upper streams of TCP/IP stack are O/C hot streams.
4101 		 * The lower IP stream is not.
4102 		 * When there is a O/C cold barrier we do not allow removal
4103 		 * below the barrier.
4104 		 *
4105 		 * Also note that _I_REMOVE cannot be used to remove a
4106 		 * driver or the stream head.
4107 		 */
4108 		STRUCT_DECL(strmodconf, strmodremove);
4109 		queue_t	*q;
4110 		int pos;
4111 		char mod_name[FMNAMESZ + 1];
4112 		boolean_t is_remove;
4113 
4114 		STRUCT_INIT(strmodremove, flag);
4115 		if (stp->sd_flag & STRHUP)
4116 			return (ENXIO);
4117 		if (STRMATED(stp))
4118 			return (EINVAL);
4119 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4120 			return (error);
4121 
4122 		error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4123 		    STRUCT_SIZE(strmodremove), copyflag);
4124 		if (error)
4125 			return (error);
4126 
4127 		error = (copyflag & U_TO_K ? copyinstr :
4128 		    copystr)(STRUCT_FGETP(strmodremove, mod_name),
4129 		    mod_name, FMNAMESZ + 1, NULL);
4130 		if (error)
4131 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4132 
4133 		if ((error = strstartplumb(stp, flag, cmd)) != 0)
4134 			return (error);
4135 
4136 		/*
4137 		 * Match the name of given module to the name of module at
4138 		 * the given position.
4139 		 */
4140 		pos = STRUCT_FGET(strmodremove, pos);
4141 
4142 		is_remove = (pos != 0);
4143 		for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4144 		    q = q->q_next, pos--)
4145 			;
4146 		if (pos > 0 || ! SAMESTR(q) ||
4147 		    strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name,
4148 		    strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) {
4149 			mutex_enter(&stp->sd_lock);
4150 			strendplumb(stp);
4151 			mutex_exit(&stp->sd_lock);
4152 			return (EINVAL);
4153 		}
4154 
4155 		ASSERT(!(q->q_flag & QREADR));
4156 		qdetach(_RD(q), 1, flag, crp, is_remove);
4157 
4158 		mutex_enter(&stp->sd_lock);
4159 
4160 		/*
4161 		 * As a performance concern we are caching the values of
4162 		 * q_minpsz and q_maxpsz of the module below the stream
4163 		 * head in the stream head.
4164 		 */
4165 		if (!is_remove) {
4166 			mutex_enter(QLOCK(wrq->q_next));
4167 			rmin = wrq->q_next->q_minpsz;
4168 			rmax = wrq->q_next->q_maxpsz;
4169 			mutex_exit(QLOCK(wrq->q_next));
4170 
4171 			/* Do this processing here as a performance concern */
4172 			if (strmsgsz != 0) {
4173 				if (rmax == INFPSZ)
4174 					rmax = strmsgsz;
4175 				else  {
4176 					if (vp->v_type == VFIFO)
4177 						rmax = MIN(PIPE_BUF, rmax);
4178 					else	rmax = MIN(strmsgsz, rmax);
4179 				}
4180 			}
4181 
4182 			mutex_enter(QLOCK(wrq));
4183 			stp->sd_qn_minpsz = rmin;
4184 			stp->sd_qn_maxpsz = rmax;
4185 			mutex_exit(QLOCK(wrq));
4186 		}
4187 
4188 		/*
4189 		 * Need to update the anchor value if this module is removed
4190 		 * at or below the anchor point.  If the removed module is at
4191 		 * the anchor point, remove the anchor for this stream if
4192 		 * there is no module above the anchor point.  Otherwise, if
4193 		 * the removed module is below the anchor point, decrement the
4194 		 * anchor point by 1.
4195 		 */
4196 		if (stp->sd_anchor != 0) {
4197 			pos = STRUCT_FGET(strmodremove, pos);
4198 			if (pos == 0)
4199 				stp->sd_anchor = 0;
4200 			else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4201 				stp->sd_anchor--;
4202 		}
4203 
4204 		strendplumb(stp);
4205 		mutex_exit(&stp->sd_lock);
4206 		return (0);
4207 	}
4208 
4209 	case I_ANCHOR:
4210 		/*
4211 		 * Set the anchor position on the stream to reside at
4212 		 * the top module (in other words, the top module
4213 		 * cannot be popped).  Anchors with a FIFO make no
4214 		 * obvious sense, so they're not allowed.
4215 		 */
4216 		mutex_enter(&stp->sd_lock);
4217 
4218 		if (stp->sd_vnode->v_type == VFIFO) {
4219 			mutex_exit(&stp->sd_lock);
4220 			return (EINVAL);
4221 		}
4222 
4223 		stp->sd_anchor = stp->sd_pushcnt;
4224 
4225 		mutex_exit(&stp->sd_lock);
4226 		return (0);
4227 
4228 	case I_LOOK:
4229 		/*
4230 		 * Get name of first module downstream.
4231 		 * If no module, return an error.
4232 		 */
4233 	    {
4234 		claimstr(wrq);
4235 		if (_SAMESTR(wrq) && wrq->q_next->q_next) {
4236 			char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname;
4237 			error = strcopyout(name, (void *)arg, strlen(name) + 1,
4238 			    copyflag);
4239 			releasestr(wrq);
4240 			return (error);
4241 		}
4242 		releasestr(wrq);
4243 		return (EINVAL);
4244 	    }
4245 
4246 	case I_LINK:
4247 	case I_PLINK:
4248 		/*
4249 		 * Link a multiplexor.
4250 		 */
4251 		return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4252 
4253 	case _I_PLINK_LH:
4254 		/*
4255 		 * Link a multiplexor: Call must originate from kernel.
4256 		 */
4257 		if (kioctl)
4258 			return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4259 
4260 		return (EINVAL);
4261 	case I_UNLINK:
4262 	case I_PUNLINK:
4263 		/*
4264 		 * Unlink a multiplexor.
4265 		 * If arg is -1, unlink all links for which this is the
4266 		 * controlling stream.  Otherwise, arg is an index number
4267 		 * for a link to be removed.
4268 		 */
4269 	    {
4270 		struct linkinfo *linkp;
4271 		int native_arg = (int)arg;
4272 		int type;
4273 
4274 		TRACE_1(TR_FAC_STREAMS_FR,
4275 			TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4276 		if (vp->v_type == VFIFO) {
4277 			return (EINVAL);
4278 		}
4279 		if (cmd == I_UNLINK)
4280 			type = LINKNORMAL;
4281 		else	/* I_PUNLINK */
4282 			type = LINKPERSIST;
4283 		if (native_arg == 0) {
4284 			return (EINVAL);
4285 		}
4286 		if (native_arg == MUXID_ALL)
4287 			error = munlinkall(stp, type, crp, rvalp);
4288 		else {
4289 			mutex_enter(&muxifier);
4290 			if (!(linkp = findlinks(stp, (int)arg, type))) {
4291 				/* invalid user supplied index number */
4292 				mutex_exit(&muxifier);
4293 				return (EINVAL);
4294 			}
4295 			/* munlink drops the muxifier lock */
4296 			error = munlink(stp, linkp, type, crp, rvalp);
4297 		}
4298 		return (error);
4299 	    }
4300 
4301 	case I_FLUSH:
4302 		/*
4303 		 * send a flush message downstream
4304 		 * flush message can indicate
4305 		 * FLUSHR - flush read queue
4306 		 * FLUSHW - flush write queue
4307 		 * FLUSHRW - flush read/write queue
4308 		 */
4309 		if (stp->sd_flag & STRHUP)
4310 			return (ENXIO);
4311 		if (arg & ~FLUSHRW)
4312 			return (EINVAL);
4313 
4314 		for (;;) {
4315 			if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4316 				break;
4317 			}
4318 			if (error = strwaitbuf(1, BPRI_HI)) {
4319 				return (error);
4320 			}
4321 		}
4322 
4323 		/*
4324 		 * Send down an unsupported ioctl and wait for the nack
4325 		 * in order to allow the M_FLUSH to propagate back
4326 		 * up to the stream head.
4327 		 * Replaces if (qready()) runqueues();
4328 		 */
4329 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4330 		strioc.ic_timout = 0;
4331 		strioc.ic_len = 0;
4332 		strioc.ic_dp = NULL;
4333 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4334 		*rvalp = 0;
4335 		return (0);
4336 
4337 	case I_FLUSHBAND:
4338 	    {
4339 		struct bandinfo binfo;
4340 
4341 		error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4342 		    copyflag);
4343 		if (error)
4344 			return (error);
4345 		if (stp->sd_flag & STRHUP)
4346 			return (ENXIO);
4347 		if (binfo.bi_flag & ~FLUSHRW)
4348 			return (EINVAL);
4349 		while (!(mp = allocb(2, BPRI_HI))) {
4350 			if (error = strwaitbuf(2, BPRI_HI))
4351 				return (error);
4352 		}
4353 		mp->b_datap->db_type = M_FLUSH;
4354 		*mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4355 		*mp->b_wptr++ = binfo.bi_pri;
4356 		putnext(stp->sd_wrq, mp);
4357 		/*
4358 		 * Send down an unsupported ioctl and wait for the nack
4359 		 * in order to allow the M_FLUSH to propagate back
4360 		 * up to the stream head.
4361 		 * Replaces if (qready()) runqueues();
4362 		 */
4363 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4364 		strioc.ic_timout = 0;
4365 		strioc.ic_len = 0;
4366 		strioc.ic_dp = NULL;
4367 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4368 		*rvalp = 0;
4369 		return (0);
4370 	    }
4371 
4372 	case I_SRDOPT:
4373 		/*
4374 		 * Set read options
4375 		 *
4376 		 * RNORM - default stream mode
4377 		 * RMSGN - message no discard
4378 		 * RMSGD - message discard
4379 		 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4380 		 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4381 		 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4382 		 */
4383 		if (arg & ~(RMODEMASK | RPROTMASK))
4384 			return (EINVAL);
4385 
4386 		if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4387 			return (EINVAL);
4388 
4389 		mutex_enter(&stp->sd_lock);
4390 		switch (arg & RMODEMASK) {
4391 		case RNORM:
4392 			stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4393 			break;
4394 		case RMSGD:
4395 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4396 			    RD_MSGDIS;
4397 			break;
4398 		case RMSGN:
4399 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4400 			    RD_MSGNODIS;
4401 			break;
4402 		}
4403 
4404 		switch (arg & RPROTMASK) {
4405 		case RPROTNORM:
4406 			stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4407 			break;
4408 
4409 		case RPROTDAT:
4410 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4411 			    RD_PROTDAT);
4412 			break;
4413 
4414 		case RPROTDIS:
4415 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4416 			    RD_PROTDIS);
4417 			break;
4418 		}
4419 		mutex_exit(&stp->sd_lock);
4420 		return (0);
4421 
4422 	case I_GRDOPT:
4423 		/*
4424 		 * Get read option and return the value
4425 		 * to spot pointed to by arg
4426 		 */
4427 	    {
4428 		int rdopt;
4429 
4430 		rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4431 		    ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4432 		rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4433 		    ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4434 
4435 		return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4436 		    copyflag));
4437 	    }
4438 
4439 	case I_SERROPT:
4440 		/*
4441 		 * Set error options
4442 		 *
4443 		 * RERRNORM - persistent read errors
4444 		 * RERRNONPERSIST - non-persistent read errors
4445 		 * WERRNORM - persistent write errors
4446 		 * WERRNONPERSIST - non-persistent write errors
4447 		 */
4448 		if (arg & ~(RERRMASK | WERRMASK))
4449 			return (EINVAL);
4450 
4451 		mutex_enter(&stp->sd_lock);
4452 		switch (arg & RERRMASK) {
4453 		case RERRNORM:
4454 			stp->sd_flag &= ~STRDERRNONPERSIST;
4455 			break;
4456 		case RERRNONPERSIST:
4457 			stp->sd_flag |= STRDERRNONPERSIST;
4458 			break;
4459 		}
4460 		switch (arg & WERRMASK) {
4461 		case WERRNORM:
4462 			stp->sd_flag &= ~STWRERRNONPERSIST;
4463 			break;
4464 		case WERRNONPERSIST:
4465 			stp->sd_flag |= STWRERRNONPERSIST;
4466 			break;
4467 		}
4468 		mutex_exit(&stp->sd_lock);
4469 		return (0);
4470 
4471 	case I_GERROPT:
4472 		/*
4473 		 * Get error option and return the value
4474 		 * to spot pointed to by arg
4475 		 */
4476 	    {
4477 		int erropt = 0;
4478 
4479 		erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4480 			RERRNORM;
4481 		erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4482 			WERRNORM;
4483 		return (strcopyout(&erropt, (void *)arg, sizeof (int),
4484 		    copyflag));
4485 	    }
4486 
4487 	case I_SETSIG:
4488 		/*
4489 		 * Register the calling proc to receive the SIGPOLL
4490 		 * signal based on the events given in arg.  If
4491 		 * arg is zero, remove the proc from register list.
4492 		 */
4493 	    {
4494 		strsig_t *ssp, *pssp;
4495 		struct pid *pidp;
4496 
4497 		pssp = NULL;
4498 		pidp = curproc->p_pidp;
4499 		/*
4500 		 * Hold sd_lock to prevent traversal of sd_siglist while
4501 		 * it is modified.
4502 		 */
4503 		mutex_enter(&stp->sd_lock);
4504 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4505 			pssp = ssp, ssp = ssp->ss_next)
4506 			;
4507 
4508 		if (arg) {
4509 			if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4510 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4511 				mutex_exit(&stp->sd_lock);
4512 				return (EINVAL);
4513 			}
4514 			if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4515 				mutex_exit(&stp->sd_lock);
4516 				return (EINVAL);
4517 			}
4518 
4519 			/*
4520 			 * If proc not already registered, add it
4521 			 * to list.
4522 			 */
4523 			if (!ssp) {
4524 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4525 				ssp->ss_pidp = pidp;
4526 				ssp->ss_pid = pidp->pid_id;
4527 				ssp->ss_next = NULL;
4528 				if (pssp)
4529 					pssp->ss_next = ssp;
4530 				else
4531 					stp->sd_siglist = ssp;
4532 				mutex_enter(&pidlock);
4533 				PID_HOLD(pidp);
4534 				mutex_exit(&pidlock);
4535 			}
4536 
4537 			/*
4538 			 * Set events.
4539 			 */
4540 			ssp->ss_events = (int)arg;
4541 		} else {
4542 			/*
4543 			 * Remove proc from register list.
4544 			 */
4545 			if (ssp) {
4546 				mutex_enter(&pidlock);
4547 				PID_RELE(pidp);
4548 				mutex_exit(&pidlock);
4549 				if (pssp)
4550 					pssp->ss_next = ssp->ss_next;
4551 				else
4552 					stp->sd_siglist = ssp->ss_next;
4553 				kmem_free(ssp, sizeof (strsig_t));
4554 			} else {
4555 				mutex_exit(&stp->sd_lock);
4556 				return (EINVAL);
4557 			}
4558 		}
4559 
4560 		/*
4561 		 * Recalculate OR of sig events.
4562 		 */
4563 		stp->sd_sigflags = 0;
4564 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4565 			stp->sd_sigflags |= ssp->ss_events;
4566 		mutex_exit(&stp->sd_lock);
4567 		return (0);
4568 	    }
4569 
4570 	case I_GETSIG:
4571 		/*
4572 		 * Return (in arg) the current registration of events
4573 		 * for which the calling proc is to be signaled.
4574 		 */
4575 	    {
4576 		struct strsig *ssp;
4577 		struct pid  *pidp;
4578 
4579 		pidp = curproc->p_pidp;
4580 		mutex_enter(&stp->sd_lock);
4581 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4582 			if (ssp->ss_pidp == pidp) {
4583 				error = strcopyout(&ssp->ss_events, (void *)arg,
4584 				    sizeof (int), copyflag);
4585 				mutex_exit(&stp->sd_lock);
4586 				return (error);
4587 			}
4588 		mutex_exit(&stp->sd_lock);
4589 		return (EINVAL);
4590 	    }
4591 
4592 	case I_ESETSIG:
4593 		/*
4594 		 * Register the ss_pid to receive the SIGPOLL
4595 		 * signal based on the events is ss_events arg.  If
4596 		 * ss_events is zero, remove the proc from register list.
4597 		 */
4598 	{
4599 		struct strsig *ssp, *pssp;
4600 		struct proc *proc;
4601 		struct pid  *pidp;
4602 		pid_t pid;
4603 		struct strsigset ss;
4604 
4605 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4606 		if (error)
4607 			return (error);
4608 
4609 		pid = ss.ss_pid;
4610 
4611 		if (ss.ss_events != 0) {
4612 			/*
4613 			 * Permissions check by sending signal 0.
4614 			 * Note that when kill fails it does a set_errno
4615 			 * causing the system call to fail.
4616 			 */
4617 			error = kill(pid, 0);
4618 			if (error) {
4619 				return (error);
4620 			}
4621 		}
4622 		mutex_enter(&pidlock);
4623 		if (pid == 0)
4624 			proc = curproc;
4625 		else if (pid < 0)
4626 			proc = pgfind(-pid);
4627 		else
4628 			proc = prfind(pid);
4629 		if (proc == NULL) {
4630 			mutex_exit(&pidlock);
4631 			return (ESRCH);
4632 		}
4633 		if (pid < 0)
4634 			pidp = proc->p_pgidp;
4635 		else
4636 			pidp = proc->p_pidp;
4637 		ASSERT(pidp);
4638 		/*
4639 		 * Get a hold on the pid structure while referencing it.
4640 		 * There is a separate PID_HOLD should it be inserted
4641 		 * in the list below.
4642 		 */
4643 		PID_HOLD(pidp);
4644 		mutex_exit(&pidlock);
4645 
4646 		pssp = NULL;
4647 		/*
4648 		 * Hold sd_lock to prevent traversal of sd_siglist while
4649 		 * it is modified.
4650 		 */
4651 		mutex_enter(&stp->sd_lock);
4652 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4653 				pssp = ssp, ssp = ssp->ss_next)
4654 			;
4655 
4656 		if (ss.ss_events) {
4657 			if (ss.ss_events &
4658 			    ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4659 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4660 				mutex_exit(&stp->sd_lock);
4661 				mutex_enter(&pidlock);
4662 				PID_RELE(pidp);
4663 				mutex_exit(&pidlock);
4664 				return (EINVAL);
4665 			}
4666 			if ((ss.ss_events & S_BANDURG) &&
4667 			    !(ss.ss_events & S_RDBAND)) {
4668 				mutex_exit(&stp->sd_lock);
4669 				mutex_enter(&pidlock);
4670 				PID_RELE(pidp);
4671 				mutex_exit(&pidlock);
4672 				return (EINVAL);
4673 			}
4674 
4675 			/*
4676 			 * If proc not already registered, add it
4677 			 * to list.
4678 			 */
4679 			if (!ssp) {
4680 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4681 				ssp->ss_pidp = pidp;
4682 				ssp->ss_pid = pid;
4683 				ssp->ss_next = NULL;
4684 				if (pssp)
4685 					pssp->ss_next = ssp;
4686 				else
4687 					stp->sd_siglist = ssp;
4688 				mutex_enter(&pidlock);
4689 				PID_HOLD(pidp);
4690 				mutex_exit(&pidlock);
4691 			}
4692 
4693 			/*
4694 			 * Set events.
4695 			 */
4696 			ssp->ss_events = ss.ss_events;
4697 		} else {
4698 			/*
4699 			 * Remove proc from register list.
4700 			 */
4701 			if (ssp) {
4702 				mutex_enter(&pidlock);
4703 				PID_RELE(pidp);
4704 				mutex_exit(&pidlock);
4705 				if (pssp)
4706 					pssp->ss_next = ssp->ss_next;
4707 				else
4708 					stp->sd_siglist = ssp->ss_next;
4709 				kmem_free(ssp, sizeof (strsig_t));
4710 			} else {
4711 				mutex_exit(&stp->sd_lock);
4712 				mutex_enter(&pidlock);
4713 				PID_RELE(pidp);
4714 				mutex_exit(&pidlock);
4715 				return (EINVAL);
4716 			}
4717 		}
4718 
4719 		/*
4720 		 * Recalculate OR of sig events.
4721 		 */
4722 		stp->sd_sigflags = 0;
4723 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4724 			stp->sd_sigflags |= ssp->ss_events;
4725 		mutex_exit(&stp->sd_lock);
4726 		mutex_enter(&pidlock);
4727 		PID_RELE(pidp);
4728 		mutex_exit(&pidlock);
4729 		return (0);
4730 	    }
4731 
4732 	case I_EGETSIG:
4733 		/*
4734 		 * Return (in arg) the current registration of events
4735 		 * for which the calling proc is to be signaled.
4736 		 */
4737 	    {
4738 		struct strsig *ssp;
4739 		struct proc *proc;
4740 		pid_t pid;
4741 		struct pid  *pidp;
4742 		struct strsigset ss;
4743 
4744 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4745 		if (error)
4746 			return (error);
4747 
4748 		pid = ss.ss_pid;
4749 		mutex_enter(&pidlock);
4750 		if (pid == 0)
4751 			proc = curproc;
4752 		else if (pid < 0)
4753 			proc = pgfind(-pid);
4754 		else
4755 			proc = prfind(pid);
4756 		if (proc == NULL) {
4757 			mutex_exit(&pidlock);
4758 			return (ESRCH);
4759 		}
4760 		if (pid < 0)
4761 			pidp = proc->p_pgidp;
4762 		else
4763 			pidp = proc->p_pidp;
4764 
4765 		/* Prevent the pidp from being reassigned */
4766 		PID_HOLD(pidp);
4767 		mutex_exit(&pidlock);
4768 
4769 		mutex_enter(&stp->sd_lock);
4770 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4771 			if (ssp->ss_pid == pid) {
4772 				ss.ss_pid = ssp->ss_pid;
4773 				ss.ss_events = ssp->ss_events;
4774 				error = strcopyout(&ss, (void *)arg,
4775 				    sizeof (struct strsigset), copyflag);
4776 				mutex_exit(&stp->sd_lock);
4777 				mutex_enter(&pidlock);
4778 				PID_RELE(pidp);
4779 				mutex_exit(&pidlock);
4780 				return (error);
4781 			}
4782 		mutex_exit(&stp->sd_lock);
4783 		mutex_enter(&pidlock);
4784 		PID_RELE(pidp);
4785 		mutex_exit(&pidlock);
4786 		return (EINVAL);
4787 	    }
4788 
4789 	case I_PEEK:
4790 	    {
4791 		STRUCT_DECL(strpeek, strpeek);
4792 		size_t n;
4793 		mblk_t *fmp, *tmp_mp = NULL;
4794 
4795 		STRUCT_INIT(strpeek, flag);
4796 
4797 		error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4798 		    STRUCT_SIZE(strpeek), copyflag);
4799 		if (error)
4800 			return (error);
4801 
4802 		mutex_enter(QLOCK(rdq));
4803 		/*
4804 		 * Skip the invalid messages
4805 		 */
4806 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4807 			if (mp->b_datap->db_type != M_SIG)
4808 				break;
4809 
4810 		/*
4811 		 * If user has requested to peek at a high priority message
4812 		 * and first message is not, return 0
4813 		 */
4814 		if (mp != NULL) {
4815 			if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
4816 			    queclass(mp) == QNORM) {
4817 				*rvalp = 0;
4818 				mutex_exit(QLOCK(rdq));
4819 				return (0);
4820 			}
4821 		} else if (stp->sd_struiordq == NULL ||
4822 		    (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
4823 			/*
4824 			 * No mblks to look at at the streamhead and
4825 			 * 1). This isn't a synch stream or
4826 			 * 2). This is a synch stream but caller wants high
4827 			 *	priority messages which is not supported by
4828 			 *	the synch stream. (it only supports QNORM)
4829 			 */
4830 			*rvalp = 0;
4831 			mutex_exit(QLOCK(rdq));
4832 			return (0);
4833 		}
4834 
4835 		fmp = mp;
4836 
4837 		if (mp && mp->b_datap->db_type == M_PASSFP) {
4838 			mutex_exit(QLOCK(rdq));
4839 			return (EBADMSG);
4840 		}
4841 
4842 		ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
4843 		    mp->b_datap->db_type == M_PROTO ||
4844 		    mp->b_datap->db_type == M_DATA);
4845 
4846 		if (mp && mp->b_datap->db_type == M_PCPROTO) {
4847 			STRUCT_FSET(strpeek, flags, RS_HIPRI);
4848 		} else {
4849 			STRUCT_FSET(strpeek, flags, 0);
4850 		}
4851 
4852 
4853 		if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
4854 			mutex_exit(QLOCK(rdq));
4855 			return (ENOSR);
4856 		}
4857 		mutex_exit(QLOCK(rdq));
4858 
4859 		/*
4860 		 * set mp = tmp_mp, so that I_PEEK processing can continue.
4861 		 * tmp_mp is used to free the dup'd message.
4862 		 */
4863 		mp = tmp_mp;
4864 
4865 		uio.uio_fmode = 0;
4866 		uio.uio_extflg = UIO_COPY_CACHED;
4867 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
4868 		    UIO_SYSSPACE;
4869 		uio.uio_limit = 0;
4870 		/*
4871 		 * First process PROTO blocks, if any.
4872 		 * If user doesn't want to get ctl info by setting maxlen <= 0,
4873 		 * then set len to -1/0 and skip control blocks part.
4874 		 */
4875 		if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
4876 			STRUCT_FSET(strpeek, ctlbuf.len, -1);
4877 		else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
4878 			STRUCT_FSET(strpeek, ctlbuf.len, 0);
4879 		else {
4880 			int	ctl_part = 0;
4881 
4882 			iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
4883 			iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
4884 			uio.uio_iov = &iov;
4885 			uio.uio_resid = iov.iov_len;
4886 			uio.uio_loffset = 0;
4887 			uio.uio_iovcnt = 1;
4888 			while (mp && mp->b_datap->db_type != M_DATA &&
4889 			    uio.uio_resid >= 0) {
4890 				ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
4891 				    mp->b_datap->db_type == M_PROTO :
4892 				    mp->b_datap->db_type == M_PCPROTO);
4893 
4894 				if ((n = MIN(uio.uio_resid,
4895 				    mp->b_wptr - mp->b_rptr)) != 0 &&
4896 				    (error = uiomove((char *)mp->b_rptr, n,
4897 				    UIO_READ, &uio)) != 0) {
4898 					freemsg(tmp_mp);
4899 					return (error);
4900 				}
4901 				ctl_part = 1;
4902 				mp = mp->b_cont;
4903 			}
4904 			/* No ctl message */
4905 			if (ctl_part == 0)
4906 				STRUCT_FSET(strpeek, ctlbuf.len, -1);
4907 			else
4908 				STRUCT_FSET(strpeek, ctlbuf.len,
4909 				    STRUCT_FGET(strpeek, ctlbuf.maxlen) -
4910 				    uio.uio_resid);
4911 		}
4912 
4913 		/*
4914 		 * Now process DATA blocks, if any.
4915 		 * If user doesn't want to get data info by setting maxlen <= 0,
4916 		 * then set len to -1/0 and skip data blocks part.
4917 		 */
4918 		if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
4919 			STRUCT_FSET(strpeek, databuf.len, -1);
4920 		else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
4921 			STRUCT_FSET(strpeek, databuf.len, 0);
4922 		else {
4923 			int	data_part = 0;
4924 
4925 			iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
4926 			iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
4927 			uio.uio_iov = &iov;
4928 			uio.uio_resid = iov.iov_len;
4929 			uio.uio_loffset = 0;
4930 			uio.uio_iovcnt = 1;
4931 			while (mp && uio.uio_resid) {
4932 				if (mp->b_datap->db_type == M_DATA) {
4933 					if ((n = MIN(uio.uio_resid,
4934 					    mp->b_wptr - mp->b_rptr)) != 0 &&
4935 					    (error = uiomove((char *)mp->b_rptr,
4936 						n, UIO_READ, &uio)) != 0) {
4937 						freemsg(tmp_mp);
4938 						return (error);
4939 					}
4940 					data_part = 1;
4941 				}
4942 				ASSERT(data_part == 0 ||
4943 				    mp->b_datap->db_type == M_DATA);
4944 				mp = mp->b_cont;
4945 			}
4946 			/* No data message */
4947 			if (data_part == 0)
4948 				STRUCT_FSET(strpeek, databuf.len, -1);
4949 			else
4950 				STRUCT_FSET(strpeek, databuf.len,
4951 				    STRUCT_FGET(strpeek, databuf.maxlen) -
4952 				    uio.uio_resid);
4953 		}
4954 		freemsg(tmp_mp);
4955 
4956 		/*
4957 		 * It is a synch stream and user wants to get
4958 		 * data (maxlen > 0).
4959 		 * uio setup is done by the codes that process DATA
4960 		 * blocks above.
4961 		 */
4962 		if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
4963 			infod_t infod;
4964 
4965 			infod.d_cmd = INFOD_COPYOUT;
4966 			infod.d_res = 0;
4967 			infod.d_uiop = &uio;
4968 			error = infonext(rdq, &infod);
4969 			if (error == EINVAL || error == EBUSY)
4970 				error = 0;
4971 			if (error)
4972 				return (error);
4973 			STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
4974 			    databuf.maxlen) - uio.uio_resid);
4975 			if (STRUCT_FGET(strpeek, databuf.len) == 0) {
4976 				/*
4977 				 * No data found by the infonext().
4978 				 */
4979 				STRUCT_FSET(strpeek, databuf.len, -1);
4980 			}
4981 		}
4982 		error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
4983 		    STRUCT_SIZE(strpeek), copyflag);
4984 		if (error) {
4985 			return (error);
4986 		}
4987 		/*
4988 		 * If there is no message retrieved, set return code to 0
4989 		 * otherwise, set it to 1.
4990 		 */
4991 		if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
4992 		    STRUCT_FGET(strpeek, databuf.len) == -1)
4993 			*rvalp = 0;
4994 		else
4995 			*rvalp = 1;
4996 		return (0);
4997 	    }
4998 
4999 	case I_FDINSERT:
5000 	    {
5001 		STRUCT_DECL(strfdinsert, strfdinsert);
5002 		struct file *resftp;
5003 		struct stdata *resstp;
5004 		t_uscalar_t	ival;
5005 		ssize_t msgsize;
5006 		struct strbuf mctl;
5007 
5008 		STRUCT_INIT(strfdinsert, flag);
5009 		if (stp->sd_flag & STRHUP)
5010 			return (ENXIO);
5011 		/*
5012 		 * STRDERR, STWRERR and STPLEX tested above.
5013 		 */
5014 		error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5015 		    STRUCT_SIZE(strfdinsert), copyflag);
5016 		if (error)
5017 			return (error);
5018 
5019 		if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5020 		    (STRUCT_FGET(strfdinsert, offset) %
5021 		    sizeof (t_uscalar_t)) != 0)
5022 			return (EINVAL);
5023 		if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5024 			if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5025 				releasef(STRUCT_FGET(strfdinsert, fildes));
5026 				return (EINVAL);
5027 			}
5028 		} else
5029 			return (EINVAL);
5030 
5031 		mutex_enter(&resstp->sd_lock);
5032 		if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5033 			error = strgeterr(resstp,
5034 					STRDERR|STWRERR|STRHUP|STPLEX, 0);
5035 			if (error != 0) {
5036 				mutex_exit(&resstp->sd_lock);
5037 				releasef(STRUCT_FGET(strfdinsert, fildes));
5038 				return (error);
5039 			}
5040 		}
5041 		mutex_exit(&resstp->sd_lock);
5042 
5043 #ifdef	_ILP32
5044 		{
5045 			queue_t	*q;
5046 			queue_t	*mate = NULL;
5047 
5048 			/* get read queue of stream terminus */
5049 			claimstr(resstp->sd_wrq);
5050 			for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5051 			    q = q->q_next)
5052 				if (!STRMATED(resstp) && STREAM(q) != resstp &&
5053 				    mate == NULL) {
5054 					ASSERT(q->q_qinfo->qi_srvp);
5055 					ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5056 					claimstr(q);
5057 					mate = q;
5058 				}
5059 			q = _RD(q);
5060 			if (mate)
5061 				releasestr(mate);
5062 			releasestr(resstp->sd_wrq);
5063 			ival = (t_uscalar_t)q;
5064 		}
5065 #else
5066 		ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5067 #endif	/* _ILP32 */
5068 
5069 		if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5070 		    STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5071 			releasef(STRUCT_FGET(strfdinsert, fildes));
5072 			return (EINVAL);
5073 		}
5074 
5075 		/*
5076 		 * Check for legal flag value.
5077 		 */
5078 		if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5079 			releasef(STRUCT_FGET(strfdinsert, fildes));
5080 			return (EINVAL);
5081 		}
5082 
5083 		/* get these values from those cached in the stream head */
5084 		mutex_enter(QLOCK(stp->sd_wrq));
5085 		rmin = stp->sd_qn_minpsz;
5086 		rmax = stp->sd_qn_maxpsz;
5087 		mutex_exit(QLOCK(stp->sd_wrq));
5088 
5089 		/*
5090 		 * Make sure ctl and data sizes together fall within
5091 		 * the limits of the max and min receive packet sizes
5092 		 * and do not exceed system limit.  A negative data
5093 		 * length means that no data part is to be sent.
5094 		 */
5095 		ASSERT((rmax >= 0) || (rmax == INFPSZ));
5096 		if (rmax == 0) {
5097 			releasef(STRUCT_FGET(strfdinsert, fildes));
5098 			return (ERANGE);
5099 		}
5100 		if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5101 			msgsize = 0;
5102 		if ((msgsize < rmin) ||
5103 		    ((msgsize > rmax) && (rmax != INFPSZ)) ||
5104 		    (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5105 			releasef(STRUCT_FGET(strfdinsert, fildes));
5106 			return (ERANGE);
5107 		}
5108 
5109 		mutex_enter(&stp->sd_lock);
5110 		while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5111 		    !canputnext(stp->sd_wrq)) {
5112 			if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5113 			    flag, -1, &done)) != 0 || done) {
5114 				mutex_exit(&stp->sd_lock);
5115 				releasef(STRUCT_FGET(strfdinsert, fildes));
5116 				return (error);
5117 			}
5118 			if (stp->sd_sidp != NULL &&
5119 			    stp->sd_vnode->v_type != VFIFO) {
5120 				mutex_exit(&stp->sd_lock);
5121 				if (error = straccess(stp, access)) {
5122 					releasef(
5123 					    STRUCT_FGET(strfdinsert, fildes));
5124 					return (error);
5125 				}
5126 				mutex_enter(&stp->sd_lock);
5127 			}
5128 		}
5129 		mutex_exit(&stp->sd_lock);
5130 
5131 		/*
5132 		 * Copy strfdinsert.ctlbuf into native form of
5133 		 * ctlbuf to pass down into strmakemsg().
5134 		 */
5135 		mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5136 		mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5137 		mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5138 
5139 		iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5140 		iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5141 		uio.uio_iov = &iov;
5142 		uio.uio_iovcnt = 1;
5143 		uio.uio_loffset = 0;
5144 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5145 		    UIO_SYSSPACE;
5146 		uio.uio_fmode = 0;
5147 		uio.uio_extflg = UIO_COPY_CACHED;
5148 		uio.uio_resid = iov.iov_len;
5149 		if ((error = strmakemsg(&mctl,
5150 		    &msgsize, &uio, stp,
5151 		    STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5152 			STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5153 			releasef(STRUCT_FGET(strfdinsert, fildes));
5154 			return (error);
5155 		}
5156 
5157 		STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5158 
5159 		/*
5160 		 * Place the possibly reencoded queue pointer 'offset' bytes
5161 		 * from the start of the control portion of the message.
5162 		 */
5163 		*((t_uscalar_t *)(mp->b_rptr +
5164 		    STRUCT_FGET(strfdinsert, offset))) = ival;
5165 
5166 		/*
5167 		 * Put message downstream.
5168 		 */
5169 		stream_willservice(stp);
5170 		putnext(stp->sd_wrq, mp);
5171 		stream_runservice(stp);
5172 		releasef(STRUCT_FGET(strfdinsert, fildes));
5173 		return (error);
5174 	    }
5175 
5176 	case I_SENDFD:
5177 	    {
5178 		struct file *fp;
5179 
5180 		if ((fp = getf((int)arg)) == NULL)
5181 			return (EBADF);
5182 		error = do_sendfp(stp, fp, crp);
5183 #ifdef C2_AUDIT
5184 		if (audit_active) {
5185 			audit_fdsend((int)arg, fp, error);
5186 		}
5187 #endif
5188 		releasef((int)arg);
5189 		return (error);
5190 	    }
5191 
5192 	case I_RECVFD:
5193 	case I_E_RECVFD:
5194 	    {
5195 		struct k_strrecvfd *srf;
5196 		int i, fd;
5197 
5198 		mutex_enter(&stp->sd_lock);
5199 		while (!(mp = getq(rdq))) {
5200 			if (stp->sd_flag & (STRHUP|STREOF)) {
5201 				mutex_exit(&stp->sd_lock);
5202 				return (ENXIO);
5203 			}
5204 			if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5205 			    flag, -1, &done)) != 0 || done) {
5206 				mutex_exit(&stp->sd_lock);
5207 				return (error);
5208 			}
5209 			if (stp->sd_sidp != NULL &&
5210 			    stp->sd_vnode->v_type != VFIFO) {
5211 				mutex_exit(&stp->sd_lock);
5212 				if (error = straccess(stp, access))
5213 					return (error);
5214 				mutex_enter(&stp->sd_lock);
5215 			}
5216 		}
5217 		if (mp->b_datap->db_type != M_PASSFP) {
5218 			putback(stp, rdq, mp, mp->b_band);
5219 			mutex_exit(&stp->sd_lock);
5220 			return (EBADMSG);
5221 		}
5222 		mutex_exit(&stp->sd_lock);
5223 
5224 		srf = (struct k_strrecvfd *)mp->b_rptr;
5225 		if ((fd = ufalloc(0)) == -1) {
5226 			mutex_enter(&stp->sd_lock);
5227 			putback(stp, rdq, mp, mp->b_band);
5228 			mutex_exit(&stp->sd_lock);
5229 			return (EMFILE);
5230 		}
5231 		if (cmd == I_RECVFD) {
5232 			struct o_strrecvfd	ostrfd;
5233 
5234 			/* check to see if uid/gid values are too large. */
5235 
5236 			if (srf->uid > (o_uid_t)USHRT_MAX ||
5237 			    srf->gid > (o_gid_t)USHRT_MAX) {
5238 				mutex_enter(&stp->sd_lock);
5239 				putback(stp, rdq, mp, mp->b_band);
5240 				mutex_exit(&stp->sd_lock);
5241 				setf(fd, NULL);	/* release fd entry */
5242 				return (EOVERFLOW);
5243 			}
5244 
5245 			ostrfd.fd = fd;
5246 			ostrfd.uid = (o_uid_t)srf->uid;
5247 			ostrfd.gid = (o_gid_t)srf->gid;
5248 
5249 			/* Null the filler bits */
5250 			for (i = 0; i < 8; i++)
5251 				ostrfd.fill[i] = 0;
5252 
5253 			error = strcopyout(&ostrfd, (void *)arg,
5254 			    sizeof (struct o_strrecvfd), copyflag);
5255 		} else {		/* I_E_RECVFD */
5256 			struct strrecvfd	strfd;
5257 
5258 			strfd.fd = fd;
5259 			strfd.uid = srf->uid;
5260 			strfd.gid = srf->gid;
5261 
5262 			/* null the filler bits */
5263 			for (i = 0; i < 8; i++)
5264 				strfd.fill[i] = 0;
5265 
5266 			error = strcopyout(&strfd, (void *)arg,
5267 			    sizeof (struct strrecvfd), copyflag);
5268 		}
5269 
5270 		if (error) {
5271 			setf(fd, NULL);	/* release fd entry */
5272 			mutex_enter(&stp->sd_lock);
5273 			putback(stp, rdq, mp, mp->b_band);
5274 			mutex_exit(&stp->sd_lock);
5275 			return (error);
5276 		}
5277 #ifdef C2_AUDIT
5278 		if (audit_active) {
5279 			audit_fdrecv(fd, srf->fp);
5280 		}
5281 #endif
5282 
5283 		/*
5284 		 * Always increment f_count since the freemsg() below will
5285 		 * always call free_passfp() which performs a closef().
5286 		 */
5287 		mutex_enter(&srf->fp->f_tlock);
5288 		srf->fp->f_count++;
5289 		mutex_exit(&srf->fp->f_tlock);
5290 		setf(fd, srf->fp);
5291 		freemsg(mp);
5292 		return (0);
5293 	    }
5294 
5295 	case I_SWROPT:
5296 		/*
5297 		 * Set/clear the write options. arg is a bit
5298 		 * mask with any of the following bits set...
5299 		 * 	SNDZERO - send zero length message
5300 		 *	SNDPIPE - send sigpipe to process if
5301 		 *		sd_werror is set and process is
5302 		 *		doing a write or putmsg.
5303 		 * The new stream head write options should reflect
5304 		 * what is in arg.
5305 		 */
5306 		if (arg & ~(SNDZERO|SNDPIPE))
5307 			return (EINVAL);
5308 
5309 		mutex_enter(&stp->sd_lock);
5310 		stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5311 		if (arg & SNDZERO)
5312 			stp->sd_wput_opt |= SW_SNDZERO;
5313 		if (arg & SNDPIPE)
5314 			stp->sd_wput_opt |= SW_SIGPIPE;
5315 		mutex_exit(&stp->sd_lock);
5316 		return (0);
5317 
5318 	case I_GWROPT:
5319 	    {
5320 		int wropt = 0;
5321 
5322 		if (stp->sd_wput_opt & SW_SNDZERO)
5323 			wropt |= SNDZERO;
5324 		if (stp->sd_wput_opt & SW_SIGPIPE)
5325 			wropt |= SNDPIPE;
5326 		return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5327 		    copyflag));
5328 	    }
5329 
5330 	case I_LIST:
5331 		/*
5332 		 * Returns all the modules found on this stream,
5333 		 * upto the driver. If argument is NULL, return the
5334 		 * number of modules (including driver). If argument
5335 		 * is not NULL, copy the names into the structure
5336 		 * provided.
5337 		 */
5338 
5339 	    {
5340 		queue_t *q;
5341 		int num_modules, space_allocated;
5342 		STRUCT_DECL(str_list, strlist);
5343 		struct str_mlist *mlist_ptr;
5344 
5345 		if (arg == NULL) { /* Return number of modules plus driver */
5346 			q = stp->sd_wrq;
5347 			if (stp->sd_vnode->v_type == VFIFO) {
5348 				*rvalp = stp->sd_pushcnt;
5349 			} else {
5350 				*rvalp = stp->sd_pushcnt + 1;
5351 			}
5352 		} else {
5353 			STRUCT_INIT(strlist, flag);
5354 
5355 			error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5356 			    STRUCT_SIZE(strlist), copyflag);
5357 			if (error)
5358 				return (error);
5359 
5360 			space_allocated = STRUCT_FGET(strlist, sl_nmods);
5361 			if ((space_allocated) <= 0)
5362 				return (EINVAL);
5363 			claimstr(stp->sd_wrq);
5364 			q = stp->sd_wrq;
5365 			num_modules = 0;
5366 			while (_SAMESTR(q) && (space_allocated != 0)) {
5367 				char *name =
5368 				    q->q_next->q_qinfo->qi_minfo->mi_idname;
5369 
5370 				mlist_ptr = STRUCT_FGETP(strlist, sl_modlist);
5371 
5372 				error = strcopyout(name, mlist_ptr,
5373 				    strlen(name) + 1, copyflag);
5374 
5375 				if (error) {
5376 					releasestr(stp->sd_wrq);
5377 					return (error);
5378 				}
5379 				q = q->q_next;
5380 				space_allocated--;
5381 				num_modules++;
5382 				mlist_ptr =
5383 				    (struct str_mlist *)((uintptr_t)mlist_ptr +
5384 				    sizeof (struct str_mlist));
5385 				STRUCT_FSETP(strlist, sl_modlist, mlist_ptr);
5386 			}
5387 			releasestr(stp->sd_wrq);
5388 			error = strcopyout(&num_modules, (void *)arg,
5389 			    sizeof (int), copyflag);
5390 		}
5391 		return (error);
5392 	    }
5393 
5394 	case I_CKBAND:
5395 	    {
5396 		queue_t *q;
5397 		qband_t *qbp;
5398 
5399 		if ((arg < 0) || (arg >= NBAND))
5400 			return (EINVAL);
5401 		q = _RD(stp->sd_wrq);
5402 		mutex_enter(QLOCK(q));
5403 		if (arg > (int)q->q_nband) {
5404 			*rvalp = 0;
5405 		} else {
5406 			if (arg == 0) {
5407 				if (q->q_first)
5408 					*rvalp = 1;
5409 				else
5410 					*rvalp = 0;
5411 			} else {
5412 				qbp = q->q_bandp;
5413 				while (--arg > 0)
5414 					qbp = qbp->qb_next;
5415 				if (qbp->qb_first)
5416 					*rvalp = 1;
5417 				else
5418 					*rvalp = 0;
5419 			}
5420 		}
5421 		mutex_exit(QLOCK(q));
5422 		return (0);
5423 	    }
5424 
5425 	case I_GETBAND:
5426 	    {
5427 		int intpri;
5428 		queue_t *q;
5429 
5430 		q = _RD(stp->sd_wrq);
5431 		mutex_enter(QLOCK(q));
5432 		mp = q->q_first;
5433 		if (!mp) {
5434 			mutex_exit(QLOCK(q));
5435 			return (ENODATA);
5436 		}
5437 		intpri = (int)mp->b_band;
5438 		error = strcopyout(&intpri, (void *)arg, sizeof (int),
5439 		    copyflag);
5440 		mutex_exit(QLOCK(q));
5441 		return (error);
5442 	    }
5443 
5444 	case I_ATMARK:
5445 	    {
5446 		queue_t *q;
5447 
5448 		if (arg & ~(ANYMARK|LASTMARK))
5449 			return (EINVAL);
5450 		q = _RD(stp->sd_wrq);
5451 		mutex_enter(&stp->sd_lock);
5452 		if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5453 			*rvalp = 1;
5454 		} else {
5455 			mutex_enter(QLOCK(q));
5456 			mp = q->q_first;
5457 
5458 			if (mp == NULL)
5459 				*rvalp = 0;
5460 			else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5461 				*rvalp = 1;
5462 			else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5463 				*rvalp = 1;
5464 			else
5465 				*rvalp = 0;
5466 			mutex_exit(QLOCK(q));
5467 		}
5468 		mutex_exit(&stp->sd_lock);
5469 		return (0);
5470 	    }
5471 
5472 	case I_CANPUT:
5473 	    {
5474 		char band;
5475 
5476 		if ((arg < 0) || (arg >= NBAND))
5477 			return (EINVAL);
5478 		band = (char)arg;
5479 		*rvalp = bcanputnext(stp->sd_wrq, band);
5480 		return (0);
5481 	    }
5482 
5483 	case I_SETCLTIME:
5484 	    {
5485 		int closetime;
5486 
5487 		error = strcopyin((void *)arg, &closetime, sizeof (int),
5488 		    copyflag);
5489 		if (error)
5490 			return (error);
5491 		if (closetime < 0)
5492 			return (EINVAL);
5493 
5494 		stp->sd_closetime = closetime;
5495 		return (0);
5496 	    }
5497 
5498 	case I_GETCLTIME:
5499 	    {
5500 		int closetime;
5501 
5502 		closetime = stp->sd_closetime;
5503 		return (strcopyout(&closetime, (void *)arg, sizeof (int),
5504 		    copyflag));
5505 	    }
5506 
5507 	case TIOCGSID:
5508 	{
5509 		pid_t sid;
5510 
5511 		mutex_enter(&pidlock);
5512 		if (stp->sd_sidp == NULL) {
5513 			mutex_exit(&pidlock);
5514 			return (ENOTTY);
5515 		}
5516 		sid = stp->sd_sidp->pid_id;
5517 		mutex_exit(&pidlock);
5518 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5519 		    copyflag));
5520 	}
5521 
5522 	case TIOCSPGRP:
5523 	{
5524 		pid_t pgrp;
5525 		proc_t *q;
5526 		pid_t	sid, fg_pgid, bg_pgid;
5527 
5528 		if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5529 		    copyflag))
5530 			return (error);
5531 		mutex_enter(&stp->sd_lock);
5532 		mutex_enter(&pidlock);
5533 		if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5534 			mutex_exit(&pidlock);
5535 			mutex_exit(&stp->sd_lock);
5536 			return (ENOTTY);
5537 		}
5538 		if (pgrp == stp->sd_pgidp->pid_id) {
5539 			mutex_exit(&pidlock);
5540 			mutex_exit(&stp->sd_lock);
5541 			return (0);
5542 		}
5543 		if (pgrp <= 0 || pgrp >= maxpid) {
5544 			mutex_exit(&pidlock);
5545 			mutex_exit(&stp->sd_lock);
5546 			return (EINVAL);
5547 		}
5548 		if ((q = pgfind(pgrp)) == NULL ||
5549 		    q->p_sessp != ttoproc(curthread)->p_sessp) {
5550 			mutex_exit(&pidlock);
5551 			mutex_exit(&stp->sd_lock);
5552 			return (EPERM);
5553 		}
5554 		sid = stp->sd_sidp->pid_id;
5555 		fg_pgid = q->p_pgrp;
5556 		bg_pgid = stp->sd_pgidp->pid_id;
5557 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5558 		PID_RELE(stp->sd_pgidp);
5559 		stp->sd_pgidp = q->p_pgidp;
5560 		PID_HOLD(stp->sd_pgidp);
5561 		mutex_exit(&pidlock);
5562 		mutex_exit(&stp->sd_lock);
5563 		return (0);
5564 	}
5565 
5566 	case TIOCGPGRP:
5567 	{
5568 		pid_t pgrp;
5569 
5570 		mutex_enter(&pidlock);
5571 		if (stp->sd_sidp == NULL) {
5572 			mutex_exit(&pidlock);
5573 			return (ENOTTY);
5574 		}
5575 		pgrp = stp->sd_pgidp->pid_id;
5576 		mutex_exit(&pidlock);
5577 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5578 		    copyflag));
5579 	}
5580 
5581 	case FIONBIO:
5582 	case FIOASYNC:
5583 		return (0);	/* handled by the upper layer */
5584 	}
5585 }
5586 
5587 /*
5588  * Custom free routine used for M_PASSFP messages.
5589  */
5590 static void
5591 free_passfp(struct k_strrecvfd *srf)
5592 {
5593 	(void) closef(srf->fp);
5594 	kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5595 }
5596 
5597 /* ARGSUSED */
5598 int
5599 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5600 {
5601 	queue_t *qp, *nextqp;
5602 	struct k_strrecvfd *srf;
5603 	mblk_t *mp;
5604 	frtn_t *frtnp;
5605 	size_t bufsize;
5606 	queue_t	*mate = NULL;
5607 	syncq_t	*sq = NULL;
5608 	int retval = 0;
5609 
5610 	if (stp->sd_flag & STRHUP)
5611 		return (ENXIO);
5612 
5613 	claimstr(stp->sd_wrq);
5614 
5615 	/* Fastpath, we have a pipe, and we are already mated, use it. */
5616 	if (STRMATED(stp)) {
5617 		qp = _RD(stp->sd_mate->sd_wrq);
5618 		claimstr(qp);
5619 		mate = qp;
5620 	} else { /* Not already mated. */
5621 
5622 		/*
5623 		 * Walk the stream to the end of this one.
5624 		 * assumes that the claimstr() will prevent
5625 		 * plumbing between the stream head and the
5626 		 * driver from changing
5627 		 */
5628 		qp = stp->sd_wrq;
5629 
5630 		/*
5631 		 * Loop until we reach the end of this stream.
5632 		 * On completion, qp points to the write queue
5633 		 * at the end of the stream, or the read queue
5634 		 * at the stream head if this is a fifo.
5635 		 */
5636 		while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5637 			;
5638 
5639 		/*
5640 		 * Just in case we get a q_next which is NULL, but
5641 		 * not at the end of the stream.  This is actually
5642 		 * broken, so we set an assert to catch it in
5643 		 * debug, and set an error and return if not debug.
5644 		 */
5645 		ASSERT(qp);
5646 		if (qp == NULL) {
5647 			releasestr(stp->sd_wrq);
5648 			return (EINVAL);
5649 		}
5650 
5651 		/*
5652 		 * Enter the syncq for the driver, so (hopefully)
5653 		 * the queue values will not change on us.
5654 		 * XXXX - This will only prevent the race IFF only
5655 		 *   the write side modifies the q_next member, and
5656 		 *   the put procedure is protected by at least
5657 		 *   MT_PERQ.
5658 		 */
5659 		if ((sq = qp->q_syncq) != NULL)
5660 			entersq(sq, SQ_PUT);
5661 
5662 		/* Now get the q_next value from this qp. */
5663 		nextqp = qp->q_next;
5664 
5665 		/*
5666 		 * If nextqp exists and the other stream is different
5667 		 * from this one claim the stream, set the mate, and
5668 		 * get the read queue at the stream head of the other
5669 		 * stream.  Assumes that nextqp was at least valid when
5670 		 * we got it.  Hopefully the entersq of the driver
5671 		 * will prevent it from changing on us.
5672 		 */
5673 		if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5674 			ASSERT(qp->q_qinfo->qi_srvp);
5675 			ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5676 			ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5677 			claimstr(nextqp);
5678 
5679 			/* Make sure we still have a q_next */
5680 			if (nextqp != qp->q_next) {
5681 				releasestr(stp->sd_wrq);
5682 				releasestr(nextqp);
5683 				return (EINVAL);
5684 			}
5685 
5686 			qp = _RD(STREAM(nextqp)->sd_wrq);
5687 			mate = qp;
5688 		}
5689 		/* If we entered the synq above, leave it. */
5690 		if (sq != NULL)
5691 			leavesq(sq, SQ_PUT);
5692 	} /*  STRMATED(STP)  */
5693 
5694 	/* XXX prevents substitution of the ops vector */
5695 	if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5696 		retval = EINVAL;
5697 		goto out;
5698 	}
5699 
5700 	if (qp->q_flag & QFULL) {
5701 		retval = EAGAIN;
5702 		goto out;
5703 	}
5704 
5705 	/*
5706 	 * Since M_PASSFP messages include a file descriptor, we use
5707 	 * esballoc() and specify a custom free routine (free_passfp()) that
5708 	 * will close the descriptor as part of freeing the message.  For
5709 	 * convenience, we stash the frtn_t right after the data block.
5710 	 */
5711 	bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5712 	srf = kmem_alloc(bufsize, KM_NOSLEEP);
5713 	if (srf == NULL) {
5714 		retval = EAGAIN;
5715 		goto out;
5716 	}
5717 
5718 	frtnp = (frtn_t *)(srf + 1);
5719 	frtnp->free_arg = (caddr_t)srf;
5720 	frtnp->free_func = free_passfp;
5721 
5722 	mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5723 	if (mp == NULL) {
5724 		kmem_free(srf, bufsize);
5725 		retval = EAGAIN;
5726 		goto out;
5727 	}
5728 	mp->b_wptr += sizeof (struct k_strrecvfd);
5729 	mp->b_datap->db_type = M_PASSFP;
5730 
5731 	srf->fp = fp;
5732 	srf->uid = crgetuid(curthread->t_cred);
5733 	srf->gid = crgetgid(curthread->t_cred);
5734 	mutex_enter(&fp->f_tlock);
5735 	fp->f_count++;
5736 	mutex_exit(&fp->f_tlock);
5737 
5738 	put(qp, mp);
5739 out:
5740 	releasestr(stp->sd_wrq);
5741 	if (mate)
5742 		releasestr(mate);
5743 	return (retval);
5744 }
5745 
5746 /*
5747  * Send an ioctl message downstream and wait for acknowledgement.
5748  * flags may be set to either U_TO_K or K_TO_K and a combination
5749  * of STR_NOERROR or STR_NOSIG
5750  * STR_NOSIG: Signals are essentially ignored or held and have
5751  *	no effect for the duration of the call.
5752  * STR_NOERROR: Ignores stream head read, write and hup errors.
5753  *	Additionally, if an existing ioctl times out, it is assumed
5754  *	lost and and this ioctl will continue as if the previous ioctl had
5755  *	finished.  ETIME may be returned if this ioctl times out (i.e.
5756  *	ic_timout is not INFTIM).  Non-stream head errors may be returned if
5757  *	the ioc_error indicates that the driver/module had problems,
5758  *	an EFAULT was found when accessing user data, a lack of
5759  * 	resources, etc.
5760  */
5761 int
5762 strdoioctl(
5763 	struct stdata *stp,
5764 	struct strioctl *strioc,
5765 	int fflags,		/* file flags with model info */
5766 	int flag,
5767 	cred_t *crp,
5768 	int *rvalp)
5769 {
5770 	mblk_t *bp;
5771 	struct iocblk *iocbp;
5772 	struct copyreq *reqp;
5773 	struct copyresp *resp;
5774 	int id;
5775 	int transparent = 0;
5776 	int error = 0;
5777 	int len = 0;
5778 	caddr_t taddr;
5779 	int copyflag = (flag & (U_TO_K | K_TO_K));
5780 	int sigflag = (flag & STR_NOSIG);
5781 	int errs;
5782 	uint_t waitflags;
5783 
5784 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5785 	ASSERT((fflags & FMODELS) != 0);
5786 
5787 	TRACE_2(TR_FAC_STREAMS_FR,
5788 		TR_STRDOIOCTL,
5789 		"strdoioctl:stp %p strioc %p", stp, strioc);
5790 	if (strioc->ic_len == TRANSPARENT) {	/* send arg in M_DATA block */
5791 		transparent = 1;
5792 		strioc->ic_len = sizeof (intptr_t);
5793 	}
5794 
5795 	if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5796 		return (EINVAL);
5797 
5798 	if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5799 	    crp)) == NULL)
5800 			return (error);
5801 
5802 	bzero(bp->b_wptr, sizeof (union ioctypes));
5803 
5804 	iocbp = (struct iocblk *)bp->b_wptr;
5805 	iocbp->ioc_count = strioc->ic_len;
5806 	iocbp->ioc_cmd = strioc->ic_cmd;
5807 	iocbp->ioc_flag = (fflags & FMODELS);
5808 
5809 	crhold(crp);
5810 	iocbp->ioc_cr = crp;
5811 	DB_TYPE(bp) = M_IOCTL;
5812 	DB_CPID(bp) = curproc->p_pid;
5813 	bp->b_wptr += sizeof (struct iocblk);
5814 
5815 	if (flag & STR_NOERROR)
5816 		errs = STPLEX;
5817 	else
5818 		errs = STRHUP|STRDERR|STWRERR|STPLEX;
5819 
5820 	/*
5821 	 * If there is data to copy into ioctl block, do so.
5822 	 */
5823 	if (iocbp->ioc_count > 0) {
5824 		if (transparent)
5825 			/*
5826 			 * Note: STR_NOERROR does not have an effect
5827 			 * in putiocd()
5828 			 */
5829 			id = K_TO_K | sigflag;
5830 		else
5831 			id = flag;
5832 		if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
5833 			freemsg(bp);
5834 			crfree(crp);
5835 			return (error);
5836 		}
5837 
5838 		/*
5839 		 * We could have slept copying in user pages.
5840 		 * Recheck the stream head state (the other end
5841 		 * of a pipe could have gone away).
5842 		 */
5843 		if (stp->sd_flag & errs) {
5844 			mutex_enter(&stp->sd_lock);
5845 			error = strgeterr(stp, errs, 0);
5846 			mutex_exit(&stp->sd_lock);
5847 			if (error != 0) {
5848 				freemsg(bp);
5849 				crfree(crp);
5850 				return (error);
5851 			}
5852 		}
5853 	}
5854 	if (transparent)
5855 		iocbp->ioc_count = TRANSPARENT;
5856 
5857 	/*
5858 	 * Block for up to STRTIMOUT milliseconds if there is an outstanding
5859 	 * ioctl for this stream already running.  All processes
5860 	 * sleeping here will be awakened as a result of an ACK
5861 	 * or NAK being received for the outstanding ioctl, or
5862 	 * as a result of the timer expiring on the outstanding
5863 	 * ioctl (a failure), or as a result of any waiting
5864 	 * process's timer expiring (also a failure).
5865 	 */
5866 
5867 	error = 0;
5868 	mutex_enter(&stp->sd_lock);
5869 	while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) {
5870 		clock_t cv_rval;
5871 
5872 		TRACE_0(TR_FAC_STREAMS_FR,
5873 			TR_STRDOIOCTL_WAIT,
5874 			"strdoioctl sleeps - IOCWAIT");
5875 		cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
5876 		    STRTIMOUT, sigflag);
5877 		if (cv_rval <= 0) {
5878 			if (cv_rval == 0) {
5879 				error = EINTR;
5880 			} else {
5881 				if (flag & STR_NOERROR) {
5882 					/*
5883 					 * Terminating current ioctl in
5884 					 * progress -- assume it got lost and
5885 					 * wake up the other thread so that the
5886 					 * operation completes.
5887 					 */
5888 					if (!(stp->sd_flag & IOCWAITNE)) {
5889 						stp->sd_flag |= IOCWAITNE;
5890 						cv_broadcast(&stp->sd_monitor);
5891 					}
5892 					/*
5893 					 * Otherwise, there's a running
5894 					 * STR_NOERROR -- we have no choice
5895 					 * here but to wait forever (or until
5896 					 * interrupted).
5897 					 */
5898 				} else {
5899 					/*
5900 					 * pending ioctl has caused
5901 					 * us to time out
5902 					 */
5903 					error = ETIME;
5904 				}
5905 			}
5906 		} else if ((stp->sd_flag & errs)) {
5907 			error = strgeterr(stp, errs, 0);
5908 		}
5909 		if (error) {
5910 			mutex_exit(&stp->sd_lock);
5911 			freemsg(bp);
5912 			crfree(crp);
5913 			return (error);
5914 		}
5915 	}
5916 
5917 	/*
5918 	 * Have control of ioctl mechanism.
5919 	 * Send down ioctl packet and wait for response.
5920 	 */
5921 	if (stp->sd_iocblk != (mblk_t *)-1) {
5922 		freemsg(stp->sd_iocblk);
5923 	}
5924 	stp->sd_iocblk = NULL;
5925 
5926 	/*
5927 	 * If this is marked with 'noerror' (internal; mostly
5928 	 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
5929 	 * in here by setting IOCWAITNE.
5930 	 */
5931 	waitflags = IOCWAIT;
5932 	if (flag & STR_NOERROR)
5933 		waitflags |= IOCWAITNE;
5934 
5935 	stp->sd_flag |= waitflags;
5936 
5937 	/*
5938 	 * Assign sequence number.
5939 	 */
5940 	iocbp->ioc_id = stp->sd_iocid = getiocseqno();
5941 
5942 	mutex_exit(&stp->sd_lock);
5943 
5944 	TRACE_1(TR_FAC_STREAMS_FR,
5945 		TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
5946 	stream_willservice(stp);
5947 	putnext(stp->sd_wrq, bp);
5948 	stream_runservice(stp);
5949 
5950 	/*
5951 	 * Timed wait for acknowledgment.  The wait time is limited by the
5952 	 * timeout value, which must be a positive integer (number of
5953 	 * milliseconds to wait, or 0 (use default value of STRTIMOUT
5954 	 * milliseconds), or -1 (wait forever).  This will be awakened
5955 	 * either by an ACK/NAK message arriving, the timer expiring, or
5956 	 * the timer expiring on another ioctl waiting for control of the
5957 	 * mechanism.
5958 	 */
5959 waitioc:
5960 	mutex_enter(&stp->sd_lock);
5961 
5962 
5963 	/*
5964 	 * If the reply has already arrived, don't sleep.  If awakened from
5965 	 * the sleep, fail only if the reply has not arrived by then.
5966 	 * Otherwise, process the reply.
5967 	 */
5968 	while (!stp->sd_iocblk) {
5969 		clock_t cv_rval;
5970 
5971 		if (stp->sd_flag & errs) {
5972 			error = strgeterr(stp, errs, 0);
5973 			if (error != 0) {
5974 				stp->sd_flag &= ~waitflags;
5975 				cv_broadcast(&stp->sd_iocmonitor);
5976 				mutex_exit(&stp->sd_lock);
5977 				crfree(crp);
5978 				return (error);
5979 			}
5980 		}
5981 
5982 		TRACE_0(TR_FAC_STREAMS_FR,
5983 			TR_STRDOIOCTL_WAIT2,
5984 			"strdoioctl sleeps awaiting reply");
5985 		ASSERT(error == 0);
5986 
5987 		cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
5988 		    (strioc->ic_timout ?
5989 		    strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
5990 
5991 		/*
5992 		 * There are four possible cases here: interrupt, timeout,
5993 		 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
5994 		 * valid M_IOCTL reply).
5995 		 *
5996 		 * If we've been awakened by a STR_NOERROR ioctl on some other
5997 		 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
5998 		 * will be set.  Pretend as if we just timed out.  Note that
5999 		 * this other thread waited at least STRTIMOUT before trying to
6000 		 * awaken our thread, so this is indistinguishable (even for
6001 		 * INFTIM) from the case where we failed with ETIME waiting on
6002 		 * IOCWAIT in the prior loop.
6003 		 */
6004 		if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6005 		    stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6006 			cv_rval = -1;
6007 		}
6008 
6009 		/*
6010 		 * note: STR_NOERROR does not protect
6011 		 * us here.. use ic_timout < 0
6012 		 */
6013 		if (cv_rval <= 0) {
6014 			if (cv_rval == 0) {
6015 				error = EINTR;
6016 			} else {
6017 				error =  ETIME;
6018 			}
6019 			/*
6020 			 * A message could have come in after we were scheduled
6021 			 * but before we were actually run.
6022 			 */
6023 			bp = stp->sd_iocblk;
6024 			stp->sd_iocblk = NULL;
6025 			if (bp != NULL) {
6026 				if ((bp->b_datap->db_type == M_COPYIN) ||
6027 				    (bp->b_datap->db_type == M_COPYOUT)) {
6028 					mutex_exit(&stp->sd_lock);
6029 					if (bp->b_cont) {
6030 						freemsg(bp->b_cont);
6031 						bp->b_cont = NULL;
6032 					}
6033 					bp->b_datap->db_type = M_IOCDATA;
6034 					bp->b_wptr = bp->b_rptr +
6035 						sizeof (struct copyresp);
6036 					resp = (struct copyresp *)bp->b_rptr;
6037 					resp->cp_rval =
6038 					    (caddr_t)1; /* failure */
6039 					stream_willservice(stp);
6040 					putnext(stp->sd_wrq, bp);
6041 					stream_runservice(stp);
6042 					mutex_enter(&stp->sd_lock);
6043 				} else {
6044 					freemsg(bp);
6045 				}
6046 			}
6047 			stp->sd_flag &= ~waitflags;
6048 			cv_broadcast(&stp->sd_iocmonitor);
6049 			mutex_exit(&stp->sd_lock);
6050 			crfree(crp);
6051 			return (error);
6052 		}
6053 	}
6054 	bp = stp->sd_iocblk;
6055 	/*
6056 	 * Note: it is strictly impossible to get here with sd_iocblk set to
6057 	 * -1.  This is because the initial loop above doesn't allow any new
6058 	 * ioctls into the fray until all others have passed this point.
6059 	 */
6060 	ASSERT(bp != NULL && bp != (mblk_t *)-1);
6061 	TRACE_1(TR_FAC_STREAMS_FR,
6062 		TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6063 	if ((bp->b_datap->db_type == M_IOCACK) ||
6064 	    (bp->b_datap->db_type == M_IOCNAK)) {
6065 		/* for detection of duplicate ioctl replies */
6066 		stp->sd_iocblk = (mblk_t *)-1;
6067 		stp->sd_flag &= ~waitflags;
6068 		cv_broadcast(&stp->sd_iocmonitor);
6069 		mutex_exit(&stp->sd_lock);
6070 	} else {
6071 		/*
6072 		 * flags not cleared here because we're still doing
6073 		 * copy in/out for ioctl.
6074 		 */
6075 		stp->sd_iocblk = NULL;
6076 		mutex_exit(&stp->sd_lock);
6077 	}
6078 
6079 
6080 	/*
6081 	 * Have received acknowledgment.
6082 	 */
6083 
6084 	switch (bp->b_datap->db_type) {
6085 	case M_IOCACK:
6086 		/*
6087 		 * Positive ack.
6088 		 */
6089 		iocbp = (struct iocblk *)bp->b_rptr;
6090 
6091 		/*
6092 		 * Set error if indicated.
6093 		 */
6094 		if (iocbp->ioc_error) {
6095 			error = iocbp->ioc_error;
6096 			break;
6097 		}
6098 
6099 		/*
6100 		 * Set return value.
6101 		 */
6102 		*rvalp = iocbp->ioc_rval;
6103 
6104 		/*
6105 		 * Data may have been returned in ACK message (ioc_count > 0).
6106 		 * If so, copy it out to the user's buffer.
6107 		 */
6108 		if (iocbp->ioc_count && !transparent) {
6109 			if (error = getiocd(bp, strioc->ic_dp, copyflag))
6110 				break;
6111 		}
6112 		if (!transparent) {
6113 			if (len)	/* an M_COPYOUT was used with I_STR */
6114 				strioc->ic_len = len;
6115 			else
6116 				strioc->ic_len = (int)iocbp->ioc_count;
6117 		}
6118 		break;
6119 
6120 	case M_IOCNAK:
6121 		/*
6122 		 * Negative ack.
6123 		 *
6124 		 * The only thing to do is set error as specified
6125 		 * in neg ack packet.
6126 		 */
6127 		iocbp = (struct iocblk *)bp->b_rptr;
6128 
6129 		error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6130 		break;
6131 
6132 	case M_COPYIN:
6133 		/*
6134 		 * Driver or module has requested user ioctl data.
6135 		 */
6136 		reqp = (struct copyreq *)bp->b_rptr;
6137 
6138 		/*
6139 		 * M_COPYIN should *never* have a message attached, though
6140 		 * it's harmless if it does -- thus, panic on a DEBUG
6141 		 * kernel and just free it on a non-DEBUG build.
6142 		 */
6143 		ASSERT(bp->b_cont == NULL);
6144 		if (bp->b_cont != NULL) {
6145 			freemsg(bp->b_cont);
6146 			bp->b_cont = NULL;
6147 		}
6148 
6149 		error = putiocd(bp, reqp->cq_addr, flag, crp);
6150 		if (error && bp->b_cont) {
6151 			freemsg(bp->b_cont);
6152 			bp->b_cont = NULL;
6153 		}
6154 
6155 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6156 		bp->b_datap->db_type = M_IOCDATA;
6157 
6158 		mblk_setcred(bp, crp);
6159 		DB_CPID(bp) = curproc->p_pid;
6160 		resp = (struct copyresp *)bp->b_rptr;
6161 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6162 		resp->cp_flag = (fflags & FMODELS);
6163 
6164 		stream_willservice(stp);
6165 		putnext(stp->sd_wrq, bp);
6166 		stream_runservice(stp);
6167 
6168 		if (error) {
6169 			mutex_enter(&stp->sd_lock);
6170 			stp->sd_flag &= ~waitflags;
6171 			cv_broadcast(&stp->sd_iocmonitor);
6172 			mutex_exit(&stp->sd_lock);
6173 			crfree(crp);
6174 			return (error);
6175 		}
6176 
6177 		goto waitioc;
6178 
6179 	case M_COPYOUT:
6180 		/*
6181 		 * Driver or module has ioctl data for a user.
6182 		 */
6183 		reqp = (struct copyreq *)bp->b_rptr;
6184 		ASSERT(bp->b_cont != NULL);
6185 
6186 		/*
6187 		 * Always (transparent or non-transparent )
6188 		 * use the address specified in the request
6189 		 */
6190 		taddr = reqp->cq_addr;
6191 		if (!transparent)
6192 			len = (int)reqp->cq_size;
6193 
6194 		/* copyout data to the provided address */
6195 		error = getiocd(bp, taddr, copyflag);
6196 
6197 		freemsg(bp->b_cont);
6198 		bp->b_cont = NULL;
6199 
6200 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6201 		bp->b_datap->db_type = M_IOCDATA;
6202 
6203 		mblk_setcred(bp, crp);
6204 		DB_CPID(bp) = curproc->p_pid;
6205 		resp = (struct copyresp *)bp->b_rptr;
6206 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6207 		resp->cp_flag = (fflags & FMODELS);
6208 
6209 		stream_willservice(stp);
6210 		putnext(stp->sd_wrq, bp);
6211 		stream_runservice(stp);
6212 
6213 		if (error) {
6214 			mutex_enter(&stp->sd_lock);
6215 			stp->sd_flag &= ~waitflags;
6216 			cv_broadcast(&stp->sd_iocmonitor);
6217 			mutex_exit(&stp->sd_lock);
6218 			crfree(crp);
6219 			return (error);
6220 		}
6221 		goto waitioc;
6222 
6223 	default:
6224 		ASSERT(0);
6225 		mutex_enter(&stp->sd_lock);
6226 		stp->sd_flag &= ~waitflags;
6227 		cv_broadcast(&stp->sd_iocmonitor);
6228 		mutex_exit(&stp->sd_lock);
6229 		break;
6230 	}
6231 
6232 	freemsg(bp);
6233 	crfree(crp);
6234 	return (error);
6235 }
6236 
6237 /*
6238  * For the SunOS keyboard driver.
6239  * Return the next available "ioctl" sequence number.
6240  * Exported, so that streams modules can send "ioctl" messages
6241  * downstream from their open routine.
6242  */
6243 int
6244 getiocseqno(void)
6245 {
6246 	int	i;
6247 
6248 	mutex_enter(&strresources);
6249 	i = ++ioc_id;
6250 	mutex_exit(&strresources);
6251 	return (i);
6252 }
6253 
6254 /*
6255  * Get the next message from the read queue.  If the message is
6256  * priority, STRPRI will have been set by strrput().  This flag
6257  * should be reset only when the entire message at the front of the
6258  * queue as been consumed.
6259  *
6260  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6261  */
6262 int
6263 strgetmsg(
6264 	struct vnode *vp,
6265 	struct strbuf *mctl,
6266 	struct strbuf *mdata,
6267 	unsigned char *prip,
6268 	int *flagsp,
6269 	int fmode,
6270 	rval_t *rvp)
6271 {
6272 	struct stdata *stp;
6273 	mblk_t *bp, *nbp;
6274 	mblk_t *savemp = NULL;
6275 	mblk_t *savemptail = NULL;
6276 	uint_t old_sd_flag;
6277 	int flg;
6278 	int more = 0;
6279 	int error = 0;
6280 	char first = 1;
6281 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6282 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6283 	unsigned char pri = 0;
6284 	queue_t *q;
6285 	int	pr = 0;			/* Partial read successful */
6286 	struct uio uios;
6287 	struct uio *uiop = &uios;
6288 	struct iovec iovs;
6289 	unsigned char type;
6290 
6291 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6292 		"strgetmsg:%p", vp);
6293 
6294 	ASSERT(vp->v_stream);
6295 	stp = vp->v_stream;
6296 	rvp->r_val1 = 0;
6297 
6298 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6299 		if (error = straccess(stp, JCREAD))
6300 			return (error);
6301 
6302 	/* Fast check of flags before acquiring the lock */
6303 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6304 		mutex_enter(&stp->sd_lock);
6305 		error = strgeterr(stp, STRDERR|STPLEX, 0);
6306 		mutex_exit(&stp->sd_lock);
6307 		if (error != 0)
6308 			return (error);
6309 	}
6310 
6311 	switch (*flagsp) {
6312 	case MSG_HIPRI:
6313 		if (*prip != 0)
6314 			return (EINVAL);
6315 		break;
6316 
6317 	case MSG_ANY:
6318 	case MSG_BAND:
6319 		break;
6320 
6321 	default:
6322 		return (EINVAL);
6323 	}
6324 	/*
6325 	 * Setup uio and iov for data part
6326 	 */
6327 	iovs.iov_base = mdata->buf;
6328 	iovs.iov_len = mdata->maxlen;
6329 	uios.uio_iov = &iovs;
6330 	uios.uio_iovcnt = 1;
6331 	uios.uio_loffset = 0;
6332 	uios.uio_segflg = UIO_USERSPACE;
6333 	uios.uio_fmode = 0;
6334 	uios.uio_extflg = UIO_COPY_CACHED;
6335 	uios.uio_resid = mdata->maxlen;
6336 	uios.uio_offset = 0;
6337 
6338 	q = _RD(stp->sd_wrq);
6339 	mutex_enter(&stp->sd_lock);
6340 	old_sd_flag = stp->sd_flag;
6341 	mark = 0;
6342 	for (;;) {
6343 		int done = 0;
6344 		mblk_t *q_first = q->q_first;
6345 
6346 		/*
6347 		 * Get the next message of appropriate priority
6348 		 * from the stream head.  If the caller is interested
6349 		 * in band or hipri messages, then they should already
6350 		 * be enqueued at the stream head.  On the other hand
6351 		 * if the caller wants normal (band 0) messages, they
6352 		 * might be deferred in a synchronous stream and they
6353 		 * will need to be pulled up.
6354 		 *
6355 		 * After we have dequeued a message, we might find that
6356 		 * it was a deferred M_SIG that was enqueued at the
6357 		 * stream head.  It must now be posted as part of the
6358 		 * read by calling strsignal_nolock().
6359 		 *
6360 		 * Also note that strrput does not enqueue an M_PCSIG,
6361 		 * and there cannot be more than one hipri message,
6362 		 * so there was no need to have the M_PCSIG case.
6363 		 *
6364 		 * At some time it might be nice to try and wrap the
6365 		 * functionality of kstrgetmsg() and strgetmsg() into
6366 		 * a common routine so to reduce the amount of replicated
6367 		 * code (since they are extremely similar).
6368 		 */
6369 		if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6370 			/* Asking for normal, band0 data */
6371 			bp = strget(stp, q, uiop, first, &error);
6372 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6373 			if (bp != NULL) {
6374 				if (bp->b_datap->db_type == M_SIG) {
6375 					strsignal_nolock(stp, *bp->b_rptr,
6376 					    (int32_t)bp->b_band);
6377 					continue;
6378 				} else {
6379 					break;
6380 				}
6381 			}
6382 			if (error != 0) {
6383 				goto getmout;
6384 			}
6385 
6386 		/*
6387 		 * We can't depend on the value of STRPRI here because
6388 		 * the stream head may be in transit. Therefore, we
6389 		 * must look at the type of the first message to
6390 		 * determine if a high priority messages is waiting
6391 		 */
6392 		} else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6393 			    q_first->b_datap->db_type >= QPCTL &&
6394 			    (bp = getq_noenab(q)) != NULL) {
6395 			/* Asked for HIPRI and got one */
6396 			ASSERT(bp->b_datap->db_type >= QPCTL);
6397 			break;
6398 		} else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6399 			    ((q_first->b_band >= *prip) ||
6400 			    q_first->b_datap->db_type >= QPCTL) &&
6401 			    (bp = getq_noenab(q)) != NULL) {
6402 			/*
6403 			 * Asked for at least band "prip" and got either at
6404 			 * least that band or a hipri message.
6405 			 */
6406 			ASSERT(bp->b_band >= *prip ||
6407 				bp->b_datap->db_type >= QPCTL);
6408 			if (bp->b_datap->db_type == M_SIG) {
6409 				strsignal_nolock(stp, *bp->b_rptr,
6410 				    (int32_t)bp->b_band);
6411 				continue;
6412 			} else {
6413 				break;
6414 			}
6415 		}
6416 
6417 		/* No data. Time to sleep? */
6418 		qbackenable(q, 0);
6419 
6420 		/*
6421 		 * If STRHUP or STREOF, return 0 length control and data.
6422 		 * If resid is 0, then a read(fd,buf,0) was done. Do not
6423 		 * sleep to satisfy this request because by default we have
6424 		 * zero bytes to return.
6425 		 */
6426 		if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6427 		    mdata->maxlen == 0)) {
6428 			mctl->len = mdata->len = 0;
6429 			*flagsp = 0;
6430 			mutex_exit(&stp->sd_lock);
6431 			return (0);
6432 		}
6433 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6434 			"strgetmsg calls strwaitq:%p, %p",
6435 			vp, uiop);
6436 		if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6437 		    &done)) != 0) || done) {
6438 			TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6439 				"strgetmsg error or done:%p, %p",
6440 				vp, uiop);
6441 			mutex_exit(&stp->sd_lock);
6442 			return (error);
6443 		}
6444 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6445 			"strgetmsg awakes:%p, %p", vp, uiop);
6446 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
6447 			mutex_exit(&stp->sd_lock);
6448 			if (error = straccess(stp, JCREAD))
6449 				return (error);
6450 			mutex_enter(&stp->sd_lock);
6451 		}
6452 		first = 0;
6453 	}
6454 	ASSERT(bp != NULL);
6455 	/*
6456 	 * Extract any mark information. If the message is not completely
6457 	 * consumed this information will be put in the mblk
6458 	 * that is putback.
6459 	 * If MSGMARKNEXT is set and the message is completely consumed
6460 	 * the STRATMARK flag will be set below. Likewise, if
6461 	 * MSGNOTMARKNEXT is set and the message is
6462 	 * completely consumed STRNOTATMARK will be set.
6463 	 */
6464 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6465 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6466 		(MSGMARKNEXT|MSGNOTMARKNEXT));
6467 	if (mark != 0 && bp == stp->sd_mark) {
6468 		mark |= _LASTMARK;
6469 		stp->sd_mark = NULL;
6470 	}
6471 	/*
6472 	 * keep track of the original message type and priority
6473 	 */
6474 	pri = bp->b_band;
6475 	type = bp->b_datap->db_type;
6476 	if (type == M_PASSFP) {
6477 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6478 			stp->sd_mark = bp;
6479 		bp->b_flag |= mark & ~_LASTMARK;
6480 		putback(stp, q, bp, pri);
6481 		qbackenable(q, pri);
6482 		mutex_exit(&stp->sd_lock);
6483 		return (EBADMSG);
6484 	}
6485 	ASSERT(type != M_SIG);
6486 
6487 	/*
6488 	 * Set this flag so strrput will not generate signals. Need to
6489 	 * make sure this flag is cleared before leaving this routine
6490 	 * else signals will stop being sent.
6491 	 */
6492 	stp->sd_flag |= STRGETINPROG;
6493 	mutex_exit(&stp->sd_lock);
6494 
6495 	if (STREAM_NEEDSERVICE(stp))
6496 		stream_runservice(stp);
6497 
6498 	/*
6499 	 * Set HIPRI flag if message is priority.
6500 	 */
6501 	if (type >= QPCTL)
6502 		flg = MSG_HIPRI;
6503 	else
6504 		flg = MSG_BAND;
6505 
6506 	/*
6507 	 * First process PROTO or PCPROTO blocks, if any.
6508 	 */
6509 	if (mctl->maxlen >= 0 && type != M_DATA) {
6510 		size_t	n, bcnt;
6511 		char	*ubuf;
6512 
6513 		bcnt = mctl->maxlen;
6514 		ubuf = mctl->buf;
6515 		while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6516 			if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6517 			    copyout(bp->b_rptr, ubuf, n)) {
6518 				error = EFAULT;
6519 				mutex_enter(&stp->sd_lock);
6520 				/*
6521 				 * clear stream head pri flag based on
6522 				 * first message type
6523 				 */
6524 				if (type >= QPCTL) {
6525 					ASSERT(type == M_PCPROTO);
6526 					stp->sd_flag &= ~STRPRI;
6527 				}
6528 				more = 0;
6529 				freemsg(bp);
6530 				goto getmout;
6531 			}
6532 			ubuf += n;
6533 			bp->b_rptr += n;
6534 			if (bp->b_rptr >= bp->b_wptr) {
6535 				nbp = bp;
6536 				bp = bp->b_cont;
6537 				freeb(nbp);
6538 			}
6539 			ASSERT(n <= bcnt);
6540 			bcnt -= n;
6541 			if (bcnt == 0)
6542 				break;
6543 		}
6544 		mctl->len = mctl->maxlen - bcnt;
6545 	} else
6546 		mctl->len = -1;
6547 
6548 	if (bp && bp->b_datap->db_type != M_DATA) {
6549 		/*
6550 		 * More PROTO blocks in msg.
6551 		 */
6552 		more |= MORECTL;
6553 		savemp = bp;
6554 		while (bp && bp->b_datap->db_type != M_DATA) {
6555 			savemptail = bp;
6556 			bp = bp->b_cont;
6557 		}
6558 		savemptail->b_cont = NULL;
6559 	}
6560 
6561 	/*
6562 	 * Now process DATA blocks, if any.
6563 	 */
6564 	if (mdata->maxlen >= 0 && bp) {
6565 		/*
6566 		 * struiocopyout will consume a potential zero-length
6567 		 * M_DATA even if uio_resid is zero.
6568 		 */
6569 		size_t oldresid = uiop->uio_resid;
6570 
6571 		bp = struiocopyout(bp, uiop, &error);
6572 		if (error != 0) {
6573 			mutex_enter(&stp->sd_lock);
6574 			/*
6575 			 * clear stream head hi pri flag based on
6576 			 * first message
6577 			 */
6578 			if (type >= QPCTL) {
6579 				ASSERT(type == M_PCPROTO);
6580 				stp->sd_flag &= ~STRPRI;
6581 			}
6582 			more = 0;
6583 			freemsg(savemp);
6584 			goto getmout;
6585 		}
6586 		/*
6587 		 * (pr == 1) indicates a partial read.
6588 		 */
6589 		if (oldresid > uiop->uio_resid)
6590 			pr = 1;
6591 		mdata->len = mdata->maxlen - uiop->uio_resid;
6592 	} else
6593 		mdata->len = -1;
6594 
6595 	if (bp) {			/* more data blocks in msg */
6596 		more |= MOREDATA;
6597 		if (savemp)
6598 			savemptail->b_cont = bp;
6599 		else
6600 			savemp = bp;
6601 	}
6602 
6603 	mutex_enter(&stp->sd_lock);
6604 	if (savemp) {
6605 		if (pr && (savemp->b_datap->db_type == M_DATA) &&
6606 		    msgnodata(savemp)) {
6607 			/*
6608 			 * Avoid queuing a zero-length tail part of
6609 			 * a message. pr=1 indicates that we read some of
6610 			 * the message.
6611 			 */
6612 			freemsg(savemp);
6613 			more &= ~MOREDATA;
6614 			/*
6615 			 * clear stream head hi pri flag based on
6616 			 * first message
6617 			 */
6618 			if (type >= QPCTL) {
6619 				ASSERT(type == M_PCPROTO);
6620 				stp->sd_flag &= ~STRPRI;
6621 			}
6622 		} else {
6623 			savemp->b_band = pri;
6624 			/*
6625 			 * If the first message was HIPRI and the one we're
6626 			 * putting back isn't, then clear STRPRI, otherwise
6627 			 * set STRPRI again.  Note that we must set STRPRI
6628 			 * again since the flush logic in strrput_nondata()
6629 			 * may have cleared it while we had sd_lock dropped.
6630 			 */
6631 			if (type >= QPCTL) {
6632 				ASSERT(type == M_PCPROTO);
6633 				if (queclass(savemp) < QPCTL)
6634 					stp->sd_flag &= ~STRPRI;
6635 				else
6636 					stp->sd_flag |= STRPRI;
6637 			} else if (queclass(savemp) >= QPCTL) {
6638 				/*
6639 				 * The first message was not a HIPRI message,
6640 				 * but the one we are about to putback is.
6641 				 * For simplicitly, we do not allow for HIPRI
6642 				 * messages to be embedded in the message
6643 				 * body, so just force it to same type as
6644 				 * first message.
6645 				 */
6646 				ASSERT(type == M_DATA || type == M_PROTO);
6647 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6648 				savemp->b_datap->db_type = type;
6649 			}
6650 			if (mark != 0) {
6651 				savemp->b_flag |= mark & ~_LASTMARK;
6652 				if ((mark & _LASTMARK) &&
6653 				    (stp->sd_mark == NULL)) {
6654 					/*
6655 					 * If another marked message arrived
6656 					 * while sd_lock was not held sd_mark
6657 					 * would be non-NULL.
6658 					 */
6659 					stp->sd_mark = savemp;
6660 				}
6661 			}
6662 			putback(stp, q, savemp, pri);
6663 		}
6664 	} else {
6665 		/*
6666 		 * The complete message was consumed.
6667 		 *
6668 		 * If another M_PCPROTO arrived while sd_lock was not held
6669 		 * it would have been discarded since STRPRI was still set.
6670 		 *
6671 		 * Move the MSG*MARKNEXT information
6672 		 * to the stream head just in case
6673 		 * the read queue becomes empty.
6674 		 * clear stream head hi pri flag based on
6675 		 * first message
6676 		 *
6677 		 * If the stream head was at the mark
6678 		 * (STRATMARK) before we dropped sd_lock above
6679 		 * and some data was consumed then we have
6680 		 * moved past the mark thus STRATMARK is
6681 		 * cleared. However, if a message arrived in
6682 		 * strrput during the copyout above causing
6683 		 * STRATMARK to be set we can not clear that
6684 		 * flag.
6685 		 */
6686 		if (type >= QPCTL) {
6687 			ASSERT(type == M_PCPROTO);
6688 			stp->sd_flag &= ~STRPRI;
6689 		}
6690 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
6691 			if (mark & MSGMARKNEXT) {
6692 				stp->sd_flag &= ~STRNOTATMARK;
6693 				stp->sd_flag |= STRATMARK;
6694 			} else if (mark & MSGNOTMARKNEXT) {
6695 				stp->sd_flag &= ~STRATMARK;
6696 				stp->sd_flag |= STRNOTATMARK;
6697 			} else {
6698 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
6699 			}
6700 		} else if (pr && (old_sd_flag & STRATMARK)) {
6701 			stp->sd_flag &= ~STRATMARK;
6702 		}
6703 	}
6704 
6705 	*flagsp = flg;
6706 	*prip = pri;
6707 
6708 	/*
6709 	 * Getmsg cleanup processing - if the state of the queue has changed
6710 	 * some signals may need to be sent and/or poll awakened.
6711 	 */
6712 getmout:
6713 	qbackenable(q, pri);
6714 
6715 	/*
6716 	 * We dropped the stream head lock above. Send all M_SIG messages
6717 	 * before processing stream head for SIGPOLL messages.
6718 	 */
6719 	ASSERT(MUTEX_HELD(&stp->sd_lock));
6720 	while ((bp = q->q_first) != NULL &&
6721 	    (bp->b_datap->db_type == M_SIG)) {
6722 		/*
6723 		 * sd_lock is held so the content of the read queue can not
6724 		 * change.
6725 		 */
6726 		bp = getq(q);
6727 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
6728 
6729 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
6730 		mutex_exit(&stp->sd_lock);
6731 		freemsg(bp);
6732 		if (STREAM_NEEDSERVICE(stp))
6733 			stream_runservice(stp);
6734 		mutex_enter(&stp->sd_lock);
6735 	}
6736 
6737 	/*
6738 	 * stream head cannot change while we make the determination
6739 	 * whether or not to send a signal. Drop the flag to allow strrput
6740 	 * to send firstmsgsigs again.
6741 	 */
6742 	stp->sd_flag &= ~STRGETINPROG;
6743 
6744 	/*
6745 	 * If the type of message at the front of the queue changed
6746 	 * due to the receive the appropriate signals and pollwakeup events
6747 	 * are generated. The type of changes are:
6748 	 *	Processed a hipri message, q_first is not hipri.
6749 	 *	Processed a band X message, and q_first is band Y.
6750 	 * The generated signals and pollwakeups are identical to what
6751 	 * strrput() generates should the message that is now on q_first
6752 	 * arrive to an empty read queue.
6753 	 *
6754 	 * Note: only strrput will send a signal for a hipri message.
6755 	 */
6756 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
6757 		strsigset_t signals = 0;
6758 		strpollset_t pollwakeups = 0;
6759 
6760 		if (flg & MSG_HIPRI) {
6761 			/*
6762 			 * Removed a hipri message. Regular data at
6763 			 * the front of  the queue.
6764 			 */
6765 			if (bp->b_band == 0) {
6766 				signals = S_INPUT | S_RDNORM;
6767 				pollwakeups = POLLIN | POLLRDNORM;
6768 			} else {
6769 				signals = S_INPUT | S_RDBAND;
6770 				pollwakeups = POLLIN | POLLRDBAND;
6771 			}
6772 		} else if (pri != bp->b_band) {
6773 			/*
6774 			 * The band is different for the new q_first.
6775 			 */
6776 			if (bp->b_band == 0) {
6777 				signals = S_RDNORM;
6778 				pollwakeups = POLLIN | POLLRDNORM;
6779 			} else {
6780 				signals = S_RDBAND;
6781 				pollwakeups = POLLIN | POLLRDBAND;
6782 			}
6783 		}
6784 
6785 		if (pollwakeups != 0) {
6786 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
6787 				if (!(stp->sd_rput_opt & SR_POLLIN))
6788 					goto no_pollwake;
6789 				stp->sd_rput_opt &= ~SR_POLLIN;
6790 			}
6791 			mutex_exit(&stp->sd_lock);
6792 			pollwakeup(&stp->sd_pollist, pollwakeups);
6793 			mutex_enter(&stp->sd_lock);
6794 		}
6795 no_pollwake:
6796 
6797 		if (stp->sd_sigflags & signals)
6798 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
6799 	}
6800 	mutex_exit(&stp->sd_lock);
6801 
6802 	rvp->r_val1 = more;
6803 	return (error);
6804 #undef	_LASTMARK
6805 }
6806 
6807 /*
6808  * Get the next message from the read queue.  If the message is
6809  * priority, STRPRI will have been set by strrput().  This flag
6810  * should be reset only when the entire message at the front of the
6811  * queue as been consumed.
6812  *
6813  * If uiop is NULL all data is returned in mctlp.
6814  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
6815  * not enabled.
6816  * The timeout parameter is in milliseconds; -1 for infinity.
6817  * This routine handles the consolidation private flags:
6818  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
6819  *	MSG_DELAYERROR	Defer the error check until the queue is empty.
6820  *	MSG_HOLDSIG	Hold signals while waiting for data.
6821  *	MSG_IPEEK	Only peek at messages.
6822  *	MSG_DISCARDTAIL	Discard the tail M_DATA part of the message
6823  *			that doesn't fit.
6824  *	MSG_NOMARK	If the message is marked leave it on the queue.
6825  *
6826  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6827  */
6828 int
6829 kstrgetmsg(
6830 	struct vnode *vp,
6831 	mblk_t **mctlp,
6832 	struct uio *uiop,
6833 	unsigned char *prip,
6834 	int *flagsp,
6835 	clock_t timout,
6836 	rval_t *rvp)
6837 {
6838 	struct stdata *stp;
6839 	mblk_t *bp, *nbp;
6840 	mblk_t *savemp = NULL;
6841 	mblk_t *savemptail = NULL;
6842 	int flags;
6843 	uint_t old_sd_flag;
6844 	int flg;
6845 	int more = 0;
6846 	int error = 0;
6847 	char first = 1;
6848 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6849 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6850 	unsigned char pri = 0;
6851 	queue_t *q;
6852 	int	pr = 0;			/* Partial read successful */
6853 	unsigned char type;
6854 
6855 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
6856 		"kstrgetmsg:%p", vp);
6857 
6858 	ASSERT(vp->v_stream);
6859 	stp = vp->v_stream;
6860 	rvp->r_val1 = 0;
6861 
6862 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6863 		if (error = straccess(stp, JCREAD))
6864 			return (error);
6865 
6866 	flags = *flagsp;
6867 	/* Fast check of flags before acquiring the lock */
6868 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6869 		if ((stp->sd_flag & STPLEX) ||
6870 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
6871 			mutex_enter(&stp->sd_lock);
6872 			error = strgeterr(stp, STRDERR|STPLEX,
6873 					(flags & MSG_IPEEK));
6874 			mutex_exit(&stp->sd_lock);
6875 			if (error != 0)
6876 				return (error);
6877 		}
6878 	}
6879 
6880 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
6881 	case MSG_HIPRI:
6882 		if (*prip != 0)
6883 			return (EINVAL);
6884 		break;
6885 
6886 	case MSG_ANY:
6887 	case MSG_BAND:
6888 		break;
6889 
6890 	default:
6891 		return (EINVAL);
6892 	}
6893 
6894 retry:
6895 	q = _RD(stp->sd_wrq);
6896 	mutex_enter(&stp->sd_lock);
6897 	old_sd_flag = stp->sd_flag;
6898 	mark = 0;
6899 	for (;;) {
6900 		int done = 0;
6901 		int waitflag;
6902 		int fmode;
6903 		mblk_t *q_first = q->q_first;
6904 
6905 		/*
6906 		 * This section of the code operates just like the code
6907 		 * in strgetmsg().  There is a comment there about what
6908 		 * is going on here.
6909 		 */
6910 		if (!(flags & (MSG_HIPRI|MSG_BAND))) {
6911 			/* Asking for normal, band0 data */
6912 			bp = strget(stp, q, uiop, first, &error);
6913 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6914 			if (bp != NULL) {
6915 				if (bp->b_datap->db_type == M_SIG) {
6916 					strsignal_nolock(stp, *bp->b_rptr,
6917 					    (int32_t)bp->b_band);
6918 					continue;
6919 				} else {
6920 					break;
6921 				}
6922 			}
6923 			if (error != 0) {
6924 				goto getmout;
6925 			}
6926 		/*
6927 		 * We can't depend on the value of STRPRI here because
6928 		 * the stream head may be in transit. Therefore, we
6929 		 * must look at the type of the first message to
6930 		 * determine if a high priority messages is waiting
6931 		 */
6932 		} else if ((flags & MSG_HIPRI) && q_first != NULL &&
6933 			    q_first->b_datap->db_type >= QPCTL &&
6934 			    (bp = getq_noenab(q)) != NULL) {
6935 			ASSERT(bp->b_datap->db_type >= QPCTL);
6936 			break;
6937 		} else if ((flags & MSG_BAND) && q_first != NULL &&
6938 			    ((q_first->b_band >= *prip) ||
6939 			    q_first->b_datap->db_type >= QPCTL) &&
6940 			    (bp = getq_noenab(q)) != NULL) {
6941 			/*
6942 			 * Asked for at least band "prip" and got either at
6943 			 * least that band or a hipri message.
6944 			 */
6945 			ASSERT(bp->b_band >= *prip ||
6946 				bp->b_datap->db_type >= QPCTL);
6947 			if (bp->b_datap->db_type == M_SIG) {
6948 				strsignal_nolock(stp, *bp->b_rptr,
6949 				    (int32_t)bp->b_band);
6950 				continue;
6951 			} else {
6952 				break;
6953 			}
6954 		}
6955 
6956 		/* No data. Time to sleep? */
6957 		qbackenable(q, 0);
6958 
6959 		/*
6960 		 * Delayed error notification?
6961 		 */
6962 		if ((stp->sd_flag & (STRDERR|STPLEX)) &&
6963 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
6964 			error = strgeterr(stp, STRDERR|STPLEX,
6965 					(flags & MSG_IPEEK));
6966 			if (error != 0) {
6967 				mutex_exit(&stp->sd_lock);
6968 				return (error);
6969 			}
6970 		}
6971 
6972 		/*
6973 		 * If STRHUP or STREOF, return 0 length control and data.
6974 		 * If a read(fd,buf,0) has been done, do not sleep, just
6975 		 * return.
6976 		 *
6977 		 * If mctlp == NULL and uiop == NULL, then the code will
6978 		 * do the strwaitq. This is an understood way of saying
6979 		 * sleep "polling" until a message is received.
6980 		 */
6981 		if ((stp->sd_flag & (STRHUP|STREOF)) ||
6982 		    (uiop != NULL && uiop->uio_resid == 0)) {
6983 			if (mctlp != NULL)
6984 				*mctlp = NULL;
6985 			*flagsp = 0;
6986 			mutex_exit(&stp->sd_lock);
6987 			return (0);
6988 		}
6989 
6990 		waitflag = GETWAIT;
6991 		if (flags &
6992 		    (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
6993 			if (flags & MSG_HOLDSIG)
6994 				waitflag |= STR_NOSIG;
6995 			if (flags & MSG_IGNERROR)
6996 				waitflag |= STR_NOERROR;
6997 			if (flags & MSG_IPEEK)
6998 				waitflag |= STR_PEEK;
6999 			if (flags & MSG_DELAYERROR)
7000 				waitflag |= STR_DELAYERR;
7001 		}
7002 		if (uiop != NULL)
7003 			fmode = uiop->uio_fmode;
7004 		else
7005 			fmode = 0;
7006 
7007 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7008 			"kstrgetmsg calls strwaitq:%p, %p",
7009 			vp, uiop);
7010 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7011 		    fmode, timout, &done)) != 0) || done) {
7012 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7013 				"kstrgetmsg error or done:%p, %p",
7014 				vp, uiop);
7015 			mutex_exit(&stp->sd_lock);
7016 			return (error);
7017 		}
7018 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7019 			"kstrgetmsg awakes:%p, %p", vp, uiop);
7020 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7021 			mutex_exit(&stp->sd_lock);
7022 			if (error = straccess(stp, JCREAD))
7023 				return (error);
7024 			mutex_enter(&stp->sd_lock);
7025 		}
7026 		first = 0;
7027 	}
7028 	ASSERT(bp != NULL);
7029 	/*
7030 	 * Extract any mark information. If the message is not completely
7031 	 * consumed this information will be put in the mblk
7032 	 * that is putback.
7033 	 * If MSGMARKNEXT is set and the message is completely consumed
7034 	 * the STRATMARK flag will be set below. Likewise, if
7035 	 * MSGNOTMARKNEXT is set and the message is
7036 	 * completely consumed STRNOTATMARK will be set.
7037 	 */
7038 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7039 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7040 		(MSGMARKNEXT|MSGNOTMARKNEXT));
7041 	pri = bp->b_band;
7042 	if (mark != 0) {
7043 		/*
7044 		 * If the caller doesn't want the mark return.
7045 		 * Used to implement MSG_WAITALL in sockets.
7046 		 */
7047 		if (flags & MSG_NOMARK) {
7048 			putback(stp, q, bp, pri);
7049 			qbackenable(q, pri);
7050 			mutex_exit(&stp->sd_lock);
7051 			return (EWOULDBLOCK);
7052 		}
7053 		if (bp == stp->sd_mark) {
7054 			mark |= _LASTMARK;
7055 			stp->sd_mark = NULL;
7056 		}
7057 	}
7058 
7059 	/*
7060 	 * keep track of the first message type
7061 	 */
7062 	type = bp->b_datap->db_type;
7063 
7064 	if (bp->b_datap->db_type == M_PASSFP) {
7065 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7066 			stp->sd_mark = bp;
7067 		bp->b_flag |= mark & ~_LASTMARK;
7068 		putback(stp, q, bp, pri);
7069 		qbackenable(q, pri);
7070 		mutex_exit(&stp->sd_lock);
7071 		return (EBADMSG);
7072 	}
7073 	ASSERT(type != M_SIG);
7074 
7075 	if (flags & MSG_IPEEK) {
7076 		/*
7077 		 * Clear any struioflag - we do the uiomove over again
7078 		 * when peeking since it simplifies the code.
7079 		 *
7080 		 * Dup the message and put the original back on the queue.
7081 		 * If dupmsg() fails, try again with copymsg() to see if
7082 		 * there is indeed a shortage of memory.  dupmsg() may fail
7083 		 * if db_ref in any of the messages reaches its limit.
7084 		 */
7085 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7086 			/*
7087 			 * Restore the state of the stream head since we
7088 			 * need to drop sd_lock (strwaitbuf is sleeping).
7089 			 */
7090 			size_t size = msgdsize(bp);
7091 
7092 			if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7093 				stp->sd_mark = bp;
7094 			bp->b_flag |= mark & ~_LASTMARK;
7095 			putback(stp, q, bp, pri);
7096 			mutex_exit(&stp->sd_lock);
7097 			error = strwaitbuf(size, BPRI_HI);
7098 			if (error) {
7099 				/*
7100 				 * There is no net change to the queue thus
7101 				 * no need to qbackenable.
7102 				 */
7103 				return (error);
7104 			}
7105 			goto retry;
7106 		}
7107 
7108 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7109 			stp->sd_mark = bp;
7110 		bp->b_flag |= mark & ~_LASTMARK;
7111 		putback(stp, q, bp, pri);
7112 		bp = nbp;
7113 	}
7114 
7115 	/*
7116 	 * Set this flag so strrput will not generate signals. Need to
7117 	 * make sure this flag is cleared before leaving this routine
7118 	 * else signals will stop being sent.
7119 	 */
7120 	stp->sd_flag |= STRGETINPROG;
7121 	mutex_exit(&stp->sd_lock);
7122 
7123 	if (STREAM_NEEDSERVICE(stp))
7124 		stream_runservice(stp);
7125 
7126 	/*
7127 	 * Set HIPRI flag if message is priority.
7128 	 */
7129 	if (type >= QPCTL)
7130 		flg = MSG_HIPRI;
7131 	else
7132 		flg = MSG_BAND;
7133 
7134 	/*
7135 	 * First process PROTO or PCPROTO blocks, if any.
7136 	 */
7137 	if (mctlp != NULL && type != M_DATA) {
7138 		mblk_t *nbp;
7139 
7140 		*mctlp = bp;
7141 		while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7142 			bp = bp->b_cont;
7143 		nbp = bp->b_cont;
7144 		bp->b_cont = NULL;
7145 		bp = nbp;
7146 	}
7147 
7148 	if (bp && bp->b_datap->db_type != M_DATA) {
7149 		/*
7150 		 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7151 		 */
7152 		more |= MORECTL;
7153 		savemp = bp;
7154 		while (bp && bp->b_datap->db_type != M_DATA) {
7155 			savemptail = bp;
7156 			bp = bp->b_cont;
7157 		}
7158 		savemptail->b_cont = NULL;
7159 	}
7160 
7161 	/*
7162 	 * Now process DATA blocks, if any.
7163 	 */
7164 	if (uiop == NULL) {
7165 		/* Append data to tail of mctlp */
7166 		if (mctlp != NULL) {
7167 			mblk_t **mpp = mctlp;
7168 
7169 			while (*mpp != NULL)
7170 				mpp = &((*mpp)->b_cont);
7171 			*mpp = bp;
7172 			bp = NULL;
7173 		}
7174 	} else if (uiop->uio_resid >= 0 && bp) {
7175 		size_t oldresid = uiop->uio_resid;
7176 
7177 		/*
7178 		 * If a streams message is likely to consist
7179 		 * of many small mblks, it is pulled up into
7180 		 * one continuous chunk of memory.
7181 		 * see longer comment at top of page
7182 		 * by mblk_pull_len declaration.
7183 		 */
7184 
7185 		if (MBLKL(bp) < mblk_pull_len) {
7186 			(void) pullupmsg(bp, -1);
7187 		}
7188 
7189 		bp = struiocopyout(bp, uiop, &error);
7190 		if (error != 0) {
7191 			if (mctlp != NULL) {
7192 				freemsg(*mctlp);
7193 				*mctlp = NULL;
7194 			} else
7195 				freemsg(savemp);
7196 			mutex_enter(&stp->sd_lock);
7197 			/*
7198 			 * clear stream head hi pri flag based on
7199 			 * first message
7200 			 */
7201 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7202 				ASSERT(type == M_PCPROTO);
7203 				stp->sd_flag &= ~STRPRI;
7204 			}
7205 			more = 0;
7206 			goto getmout;
7207 		}
7208 		/*
7209 		 * (pr == 1) indicates a partial read.
7210 		 */
7211 		if (oldresid > uiop->uio_resid)
7212 			pr = 1;
7213 	}
7214 
7215 	if (bp) {			/* more data blocks in msg */
7216 		more |= MOREDATA;
7217 		if (savemp)
7218 			savemptail->b_cont = bp;
7219 		else
7220 			savemp = bp;
7221 	}
7222 
7223 	mutex_enter(&stp->sd_lock);
7224 	if (savemp) {
7225 		if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7226 			/*
7227 			 * When MSG_DISCARDTAIL is set or
7228 			 * when peeking discard any tail. When peeking this
7229 			 * is the tail of the dup that was copied out - the
7230 			 * message has already been putback on the queue.
7231 			 * Return MOREDATA to the caller even though the data
7232 			 * is discarded. This is used by sockets (to
7233 			 * set MSG_TRUNC).
7234 			 */
7235 			freemsg(savemp);
7236 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7237 				ASSERT(type == M_PCPROTO);
7238 				stp->sd_flag &= ~STRPRI;
7239 			}
7240 		} else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7241 			    msgnodata(savemp)) {
7242 			/*
7243 			 * Avoid queuing a zero-length tail part of
7244 			 * a message. pr=1 indicates that we read some of
7245 			 * the message.
7246 			 */
7247 			freemsg(savemp);
7248 			more &= ~MOREDATA;
7249 			if (type >= QPCTL) {
7250 				ASSERT(type == M_PCPROTO);
7251 				stp->sd_flag &= ~STRPRI;
7252 			}
7253 		} else {
7254 			savemp->b_band = pri;
7255 			/*
7256 			 * If the first message was HIPRI and the one we're
7257 			 * putting back isn't, then clear STRPRI, otherwise
7258 			 * set STRPRI again.  Note that we must set STRPRI
7259 			 * again since the flush logic in strrput_nondata()
7260 			 * may have cleared it while we had sd_lock dropped.
7261 			 */
7262 			if (type >= QPCTL) {
7263 				ASSERT(type == M_PCPROTO);
7264 				if (queclass(savemp) < QPCTL)
7265 					stp->sd_flag &= ~STRPRI;
7266 				else
7267 					stp->sd_flag |= STRPRI;
7268 			} else if (queclass(savemp) >= QPCTL) {
7269 				/*
7270 				 * The first message was not a HIPRI message,
7271 				 * but the one we are about to putback is.
7272 				 * For simplicitly, we do not allow for HIPRI
7273 				 * messages to be embedded in the message
7274 				 * body, so just force it to same type as
7275 				 * first message.
7276 				 */
7277 				ASSERT(type == M_DATA || type == M_PROTO);
7278 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7279 				savemp->b_datap->db_type = type;
7280 			}
7281 			if (mark != 0) {
7282 				if ((mark & _LASTMARK) &&
7283 				    (stp->sd_mark == NULL)) {
7284 					/*
7285 					 * If another marked message arrived
7286 					 * while sd_lock was not held sd_mark
7287 					 * would be non-NULL.
7288 					 */
7289 					stp->sd_mark = savemp;
7290 				}
7291 				savemp->b_flag |= mark & ~_LASTMARK;
7292 			}
7293 			putback(stp, q, savemp, pri);
7294 		}
7295 	} else if (!(flags & MSG_IPEEK)) {
7296 		/*
7297 		 * The complete message was consumed.
7298 		 *
7299 		 * If another M_PCPROTO arrived while sd_lock was not held
7300 		 * it would have been discarded since STRPRI was still set.
7301 		 *
7302 		 * Move the MSG*MARKNEXT information
7303 		 * to the stream head just in case
7304 		 * the read queue becomes empty.
7305 		 * clear stream head hi pri flag based on
7306 		 * first message
7307 		 *
7308 		 * If the stream head was at the mark
7309 		 * (STRATMARK) before we dropped sd_lock above
7310 		 * and some data was consumed then we have
7311 		 * moved past the mark thus STRATMARK is
7312 		 * cleared. However, if a message arrived in
7313 		 * strrput during the copyout above causing
7314 		 * STRATMARK to be set we can not clear that
7315 		 * flag.
7316 		 * XXX A "perimeter" would help by single-threading strrput,
7317 		 * strread, strgetmsg and kstrgetmsg.
7318 		 */
7319 		if (type >= QPCTL) {
7320 			ASSERT(type == M_PCPROTO);
7321 			stp->sd_flag &= ~STRPRI;
7322 		}
7323 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7324 			if (mark & MSGMARKNEXT) {
7325 				stp->sd_flag &= ~STRNOTATMARK;
7326 				stp->sd_flag |= STRATMARK;
7327 			} else if (mark & MSGNOTMARKNEXT) {
7328 				stp->sd_flag &= ~STRATMARK;
7329 				stp->sd_flag |= STRNOTATMARK;
7330 			} else {
7331 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7332 			}
7333 		} else if (pr && (old_sd_flag & STRATMARK)) {
7334 			stp->sd_flag &= ~STRATMARK;
7335 		}
7336 	}
7337 
7338 	*flagsp = flg;
7339 	*prip = pri;
7340 
7341 	/*
7342 	 * Getmsg cleanup processing - if the state of the queue has changed
7343 	 * some signals may need to be sent and/or poll awakened.
7344 	 */
7345 getmout:
7346 	qbackenable(q, pri);
7347 
7348 	/*
7349 	 * We dropped the stream head lock above. Send all M_SIG messages
7350 	 * before processing stream head for SIGPOLL messages.
7351 	 */
7352 	ASSERT(MUTEX_HELD(&stp->sd_lock));
7353 	while ((bp = q->q_first) != NULL &&
7354 	    (bp->b_datap->db_type == M_SIG)) {
7355 		/*
7356 		 * sd_lock is held so the content of the read queue can not
7357 		 * change.
7358 		 */
7359 		bp = getq(q);
7360 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7361 
7362 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
7363 		mutex_exit(&stp->sd_lock);
7364 		freemsg(bp);
7365 		if (STREAM_NEEDSERVICE(stp))
7366 			stream_runservice(stp);
7367 		mutex_enter(&stp->sd_lock);
7368 	}
7369 
7370 	/*
7371 	 * stream head cannot change while we make the determination
7372 	 * whether or not to send a signal. Drop the flag to allow strrput
7373 	 * to send firstmsgsigs again.
7374 	 */
7375 	stp->sd_flag &= ~STRGETINPROG;
7376 
7377 	/*
7378 	 * If the type of message at the front of the queue changed
7379 	 * due to the receive the appropriate signals and pollwakeup events
7380 	 * are generated. The type of changes are:
7381 	 *	Processed a hipri message, q_first is not hipri.
7382 	 *	Processed a band X message, and q_first is band Y.
7383 	 * The generated signals and pollwakeups are identical to what
7384 	 * strrput() generates should the message that is now on q_first
7385 	 * arrive to an empty read queue.
7386 	 *
7387 	 * Note: only strrput will send a signal for a hipri message.
7388 	 */
7389 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7390 		strsigset_t signals = 0;
7391 		strpollset_t pollwakeups = 0;
7392 
7393 		if (flg & MSG_HIPRI) {
7394 			/*
7395 			 * Removed a hipri message. Regular data at
7396 			 * the front of  the queue.
7397 			 */
7398 			if (bp->b_band == 0) {
7399 				signals = S_INPUT | S_RDNORM;
7400 				pollwakeups = POLLIN | POLLRDNORM;
7401 			} else {
7402 				signals = S_INPUT | S_RDBAND;
7403 				pollwakeups = POLLIN | POLLRDBAND;
7404 			}
7405 		} else if (pri != bp->b_band) {
7406 			/*
7407 			 * The band is different for the new q_first.
7408 			 */
7409 			if (bp->b_band == 0) {
7410 				signals = S_RDNORM;
7411 				pollwakeups = POLLIN | POLLRDNORM;
7412 			} else {
7413 				signals = S_RDBAND;
7414 				pollwakeups = POLLIN | POLLRDBAND;
7415 			}
7416 		}
7417 
7418 		if (pollwakeups != 0) {
7419 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
7420 				if (!(stp->sd_rput_opt & SR_POLLIN))
7421 					goto no_pollwake;
7422 				stp->sd_rput_opt &= ~SR_POLLIN;
7423 			}
7424 			mutex_exit(&stp->sd_lock);
7425 			pollwakeup(&stp->sd_pollist, pollwakeups);
7426 			mutex_enter(&stp->sd_lock);
7427 		}
7428 no_pollwake:
7429 
7430 		if (stp->sd_sigflags & signals)
7431 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7432 	}
7433 	mutex_exit(&stp->sd_lock);
7434 
7435 	rvp->r_val1 = more;
7436 	return (error);
7437 #undef	_LASTMARK
7438 }
7439 
7440 /*
7441  * Put a message downstream.
7442  *
7443  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7444  */
7445 int
7446 strputmsg(
7447 	struct vnode *vp,
7448 	struct strbuf *mctl,
7449 	struct strbuf *mdata,
7450 	unsigned char pri,
7451 	int flag,
7452 	int fmode)
7453 {
7454 	struct stdata *stp;
7455 	queue_t *wqp;
7456 	mblk_t *mp;
7457 	ssize_t msgsize;
7458 	ssize_t rmin, rmax;
7459 	int error;
7460 	struct uio uios;
7461 	struct uio *uiop = &uios;
7462 	struct iovec iovs;
7463 	int xpg4 = 0;
7464 
7465 	ASSERT(vp->v_stream);
7466 	stp = vp->v_stream;
7467 	wqp = stp->sd_wrq;
7468 
7469 	/*
7470 	 * If it is an XPG4 application, we need to send
7471 	 * SIGPIPE below
7472 	 */
7473 
7474 	xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7475 	flag &= ~MSG_XPG4;
7476 
7477 #ifdef C2_AUDIT
7478 	if (audit_active)
7479 		audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7480 #endif
7481 
7482 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7483 		if (error = straccess(stp, JCWRITE))
7484 			return (error);
7485 
7486 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7487 		mutex_enter(&stp->sd_lock);
7488 		error = strwriteable(stp, B_FALSE, xpg4);
7489 		mutex_exit(&stp->sd_lock);
7490 		if (error != 0)
7491 			return (error);
7492 	}
7493 
7494 	/*
7495 	 * Check for legal flag value.
7496 	 */
7497 	switch (flag) {
7498 	case MSG_HIPRI:
7499 		if ((mctl->len < 0) || (pri != 0))
7500 			return (EINVAL);
7501 		break;
7502 	case MSG_BAND:
7503 		break;
7504 
7505 	default:
7506 		return (EINVAL);
7507 	}
7508 
7509 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7510 		"strputmsg in:stp %p", stp);
7511 
7512 	/* get these values from those cached in the stream head */
7513 	rmin = stp->sd_qn_minpsz;
7514 	rmax = stp->sd_qn_maxpsz;
7515 
7516 	/*
7517 	 * Make sure ctl and data sizes together fall within the
7518 	 * limits of the max and min receive packet sizes and do
7519 	 * not exceed system limit.
7520 	 */
7521 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7522 	if (rmax == 0) {
7523 		return (ERANGE);
7524 	}
7525 	/*
7526 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7527 	 * Needed to prevent partial failures in the strmakedata loop.
7528 	 */
7529 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7530 		rmax = stp->sd_maxblk;
7531 
7532 	if ((msgsize = mdata->len) < 0) {
7533 		msgsize = 0;
7534 		rmin = 0;	/* no range check for NULL data part */
7535 	}
7536 	if ((msgsize < rmin) ||
7537 	    ((msgsize > rmax) && (rmax != INFPSZ)) ||
7538 	    (mctl->len > strctlsz)) {
7539 		return (ERANGE);
7540 	}
7541 
7542 	/*
7543 	 * Setup uio and iov for data part
7544 	 */
7545 	iovs.iov_base = mdata->buf;
7546 	iovs.iov_len = msgsize;
7547 	uios.uio_iov = &iovs;
7548 	uios.uio_iovcnt = 1;
7549 	uios.uio_loffset = 0;
7550 	uios.uio_segflg = UIO_USERSPACE;
7551 	uios.uio_fmode = fmode;
7552 	uios.uio_extflg = UIO_COPY_DEFAULT;
7553 	uios.uio_resid = msgsize;
7554 	uios.uio_offset = 0;
7555 
7556 	/* Ignore flow control in strput for HIPRI */
7557 	if (flag & MSG_HIPRI)
7558 		flag |= MSG_IGNFLOW;
7559 
7560 	for (;;) {
7561 		int done = 0;
7562 
7563 		/*
7564 		 * strput will always free the ctl mblk - even when strput
7565 		 * fails.
7566 		 */
7567 		if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7568 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7569 				"strputmsg out:stp %p out %d error %d",
7570 				stp, 1, error);
7571 			return (error);
7572 		}
7573 		/*
7574 		 * Verify that the whole message can be transferred by
7575 		 * strput.
7576 		 */
7577 		ASSERT(stp->sd_maxblk == INFPSZ ||
7578 			stp->sd_maxblk >= mdata->len);
7579 
7580 		msgsize = mdata->len;
7581 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7582 		mdata->len = msgsize;
7583 
7584 		if (error == 0)
7585 			break;
7586 
7587 		if (error != EWOULDBLOCK)
7588 			goto out;
7589 
7590 		mutex_enter(&stp->sd_lock);
7591 		/*
7592 		 * Check for a missed wakeup.
7593 		 * Needed since strput did not hold sd_lock across
7594 		 * the canputnext.
7595 		 */
7596 		if (bcanputnext(wqp, pri)) {
7597 			/* Try again */
7598 			mutex_exit(&stp->sd_lock);
7599 			continue;
7600 		}
7601 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7602 			"strputmsg wait:stp %p waits pri %d", stp, pri);
7603 		if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7604 		    &done)) != 0) || done) {
7605 			mutex_exit(&stp->sd_lock);
7606 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7607 				"strputmsg out:q %p out %d error %d",
7608 				stp, 0, error);
7609 			return (error);
7610 		}
7611 		TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7612 			"strputmsg wake:stp %p wakes", stp);
7613 		mutex_exit(&stp->sd_lock);
7614 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7615 			if (error = straccess(stp, JCWRITE))
7616 				return (error);
7617 	}
7618 out:
7619 	/*
7620 	 * For historic reasons, applications expect EAGAIN
7621 	 * when data mblk could not be allocated. so change
7622 	 * ENOMEM back to EAGAIN
7623 	 */
7624 	if (error == ENOMEM)
7625 		error = EAGAIN;
7626 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7627 		"strputmsg out:stp %p out %d error %d", stp, 2, error);
7628 	return (error);
7629 }
7630 
7631 /*
7632  * Put a message downstream.
7633  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7634  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7635  * and the fmode parameter.
7636  *
7637  * This routine handles the consolidation private flags:
7638  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
7639  *	MSG_HOLDSIG	Hold signals while waiting for data.
7640  *	MSG_IGNFLOW	Don't check streams flow control.
7641  *
7642  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7643  */
7644 int
7645 kstrputmsg(
7646 	struct vnode *vp,
7647 	mblk_t *mctl,
7648 	struct uio *uiop,
7649 	ssize_t msgsize,
7650 	unsigned char pri,
7651 	int flag,
7652 	int fmode)
7653 {
7654 	struct stdata *stp;
7655 	queue_t *wqp;
7656 	ssize_t rmin, rmax;
7657 	int error;
7658 
7659 	ASSERT(vp->v_stream);
7660 	stp = vp->v_stream;
7661 	wqp = stp->sd_wrq;
7662 #ifdef C2_AUDIT
7663 	if (audit_active)
7664 		audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
7665 #endif
7666 	if (mctl == NULL)
7667 		return (EINVAL);
7668 
7669 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7670 		if (error = straccess(stp, JCWRITE)) {
7671 			freemsg(mctl);
7672 			return (error);
7673 		}
7674 	}
7675 
7676 	if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
7677 		if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7678 			mutex_enter(&stp->sd_lock);
7679 			error = strwriteable(stp, B_FALSE, B_TRUE);
7680 			mutex_exit(&stp->sd_lock);
7681 			if (error != 0) {
7682 				freemsg(mctl);
7683 				return (error);
7684 			}
7685 		}
7686 	}
7687 
7688 	/*
7689 	 * Check for legal flag value.
7690 	 */
7691 	switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
7692 	case MSG_HIPRI:
7693 		if (pri != 0) {
7694 			freemsg(mctl);
7695 			return (EINVAL);
7696 		}
7697 		break;
7698 	case MSG_BAND:
7699 		break;
7700 	default:
7701 		freemsg(mctl);
7702 		return (EINVAL);
7703 	}
7704 
7705 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
7706 		"kstrputmsg in:stp %p", stp);
7707 
7708 	/* get these values from those cached in the stream head */
7709 	rmin = stp->sd_qn_minpsz;
7710 	rmax = stp->sd_qn_maxpsz;
7711 
7712 	/*
7713 	 * Make sure ctl and data sizes together fall within the
7714 	 * limits of the max and min receive packet sizes and do
7715 	 * not exceed system limit.
7716 	 */
7717 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7718 	if (rmax == 0) {
7719 		freemsg(mctl);
7720 		return (ERANGE);
7721 	}
7722 	/*
7723 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7724 	 * Needed to prevent partial failures in the strmakedata loop.
7725 	 */
7726 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7727 		rmax = stp->sd_maxblk;
7728 
7729 	if (uiop == NULL) {
7730 		msgsize = -1;
7731 		rmin = -1;	/* no range check for NULL data part */
7732 	} else {
7733 		/* Use uio flags as well as the fmode parameter flags */
7734 		fmode |= uiop->uio_fmode;
7735 
7736 		if ((msgsize < rmin) ||
7737 		    ((msgsize > rmax) && (rmax != INFPSZ))) {
7738 			freemsg(mctl);
7739 			return (ERANGE);
7740 		}
7741 	}
7742 
7743 	/* Ignore flow control in strput for HIPRI */
7744 	if (flag & MSG_HIPRI)
7745 		flag |= MSG_IGNFLOW;
7746 
7747 	for (;;) {
7748 		int done = 0;
7749 		int waitflag;
7750 		mblk_t *mp;
7751 
7752 		/*
7753 		 * strput will always free the ctl mblk - even when strput
7754 		 * fails. If MSG_IGNFLOW is set then any error returned
7755 		 * will cause us to break the loop, so we don't need a copy
7756 		 * of the message. If MSG_IGNFLOW is not set, then we can
7757 		 * get hit by flow control and be forced to try again. In
7758 		 * this case we need to have a copy of the message. We
7759 		 * do this using copymsg since the message may get modified
7760 		 * by something below us.
7761 		 *
7762 		 * We've observed that many TPI providers do not check db_ref
7763 		 * on the control messages but blindly reuse them for the
7764 		 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
7765 		 * friendly to such providers than using dupmsg. Also, note
7766 		 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
7767 		 * Only data messages are subject to flow control, hence
7768 		 * subject to this copymsg.
7769 		 */
7770 		if (flag & MSG_IGNFLOW) {
7771 			mp = mctl;
7772 			mctl = NULL;
7773 		} else {
7774 			do {
7775 				/*
7776 				 * If a message has a free pointer, the message
7777 				 * must be dupmsg to maintain this pointer.
7778 				 * Code using this facility must be sure
7779 				 * that modules below will not change the
7780 				 * contents of the dblk without checking db_ref
7781 				 * first. If db_ref is > 1, then the module
7782 				 * needs to do a copymsg first. Otherwise,
7783 				 * the contents of the dblk may become
7784 				 * inconsistent because the freesmg/freeb below
7785 				 * may end up calling atomic_add_32_nv.
7786 				 * The atomic_add_32_nv in freeb (accessing
7787 				 * all of db_ref, db_type, db_flags, and
7788 				 * db_struioflag) does not prevent other threads
7789 				 * from concurrently trying to modify e.g.
7790 				 * db_type.
7791 				 */
7792 				if (mctl->b_datap->db_frtnp != NULL)
7793 					mp = dupmsg(mctl);
7794 				else
7795 					mp = copymsg(mctl);
7796 
7797 				if (mp != NULL)
7798 					break;
7799 
7800 				error = strwaitbuf(msgdsize(mctl), BPRI_MED);
7801 				if (error) {
7802 					freemsg(mctl);
7803 					return (error);
7804 				}
7805 			} while (mp == NULL);
7806 		}
7807 		/*
7808 		 * Verify that all of msgsize can be transferred by
7809 		 * strput.
7810 		 */
7811 		ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
7812 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7813 		if (error == 0)
7814 			break;
7815 
7816 		if (error != EWOULDBLOCK)
7817 			goto out;
7818 
7819 		/*
7820 		 * IF MSG_IGNFLOW is set we should have broken out of loop
7821 		 * above.
7822 		 */
7823 		ASSERT(!(flag & MSG_IGNFLOW));
7824 		mutex_enter(&stp->sd_lock);
7825 		/*
7826 		 * Check for a missed wakeup.
7827 		 * Needed since strput did not hold sd_lock across
7828 		 * the canputnext.
7829 		 */
7830 		if (bcanputnext(wqp, pri)) {
7831 			/* Try again */
7832 			mutex_exit(&stp->sd_lock);
7833 			continue;
7834 		}
7835 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
7836 			"kstrputmsg wait:stp %p waits pri %d", stp, pri);
7837 
7838 		waitflag = WRITEWAIT;
7839 		if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
7840 			if (flag & MSG_HOLDSIG)
7841 				waitflag |= STR_NOSIG;
7842 			if (flag & MSG_IGNERROR)
7843 				waitflag |= STR_NOERROR;
7844 		}
7845 		if (((error = strwaitq(stp, waitflag,
7846 		    (ssize_t)0, fmode, -1, &done)) != 0) || done) {
7847 			mutex_exit(&stp->sd_lock);
7848 			TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7849 				"kstrputmsg out:stp %p out %d error %d",
7850 				stp, 0, error);
7851 			freemsg(mctl);
7852 			return (error);
7853 		}
7854 		TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
7855 			"kstrputmsg wake:stp %p wakes", stp);
7856 		mutex_exit(&stp->sd_lock);
7857 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7858 			if (error = straccess(stp, JCWRITE)) {
7859 				freemsg(mctl);
7860 				return (error);
7861 			}
7862 		}
7863 	}
7864 out:
7865 	freemsg(mctl);
7866 	/*
7867 	 * For historic reasons, applications expect EAGAIN
7868 	 * when data mblk could not be allocated. so change
7869 	 * ENOMEM back to EAGAIN
7870 	 */
7871 	if (error == ENOMEM)
7872 		error = EAGAIN;
7873 	TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7874 		"kstrputmsg out:stp %p out %d error %d", stp, 2, error);
7875 	return (error);
7876 }
7877 
7878 /*
7879  * Determines whether the necessary conditions are set on a stream
7880  * for it to be readable, writeable, or have exceptions.
7881  *
7882  * strpoll handles the consolidation private events:
7883  *	POLLNOERR	Do not return POLLERR even if there are stream
7884  *			head errors.
7885  *			Used by sockfs.
7886  *	POLLRDDATA	Do not return POLLIN unless at least one message on
7887  *			the queue contains one or more M_DATA mblks. Thus
7888  *			when this flag is set a queue with only
7889  *			M_PROTO/M_PCPROTO mblks does not return POLLIN.
7890  *			Used by sockfs to ignore T_EXDATA_IND messages.
7891  *
7892  * Note: POLLRDDATA assumes that synch streams only return messages with
7893  * an M_DATA attached (i.e. not messages consisting of only
7894  * an M_PROTO/M_PCPROTO part).
7895  */
7896 int
7897 strpoll(
7898 	struct stdata *stp,
7899 	short events_arg,
7900 	int anyyet,
7901 	short *reventsp,
7902 	struct pollhead **phpp)
7903 {
7904 	int events = (ushort_t)events_arg;
7905 	int retevents = 0;
7906 	mblk_t *mp;
7907 	qband_t *qbp;
7908 	long sd_flags = stp->sd_flag;
7909 	int headlocked = 0;
7910 
7911 	/*
7912 	 * For performance, a single 'if' tests for most possible edge
7913 	 * conditions in one shot
7914 	 */
7915 	if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
7916 		if (sd_flags & STPLEX) {
7917 			*reventsp = POLLNVAL;
7918 			return (EINVAL);
7919 		}
7920 		if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
7921 		    (sd_flags & STRDERR)) ||
7922 		    ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
7923 		    (sd_flags & STWRERR))) {
7924 			if (!(events & POLLNOERR)) {
7925 				*reventsp = POLLERR;
7926 				return (0);
7927 			}
7928 		}
7929 	}
7930 	if (sd_flags & STRHUP) {
7931 		retevents |= POLLHUP;
7932 	} else if (events & (POLLWRNORM | POLLWRBAND)) {
7933 		queue_t *tq;
7934 		queue_t	*qp = stp->sd_wrq;
7935 
7936 		claimstr(qp);
7937 		/* Find next module forward that has a service procedure */
7938 		tq = qp->q_next->q_nfsrv;
7939 		ASSERT(tq != NULL);
7940 
7941 		polllock(&stp->sd_pollist, QLOCK(tq));
7942 		if (events & POLLWRNORM) {
7943 			queue_t *sqp;
7944 
7945 			if (tq->q_flag & QFULL)
7946 				/* ensure backq svc procedure runs */
7947 				tq->q_flag |= QWANTW;
7948 			else if ((sqp = stp->sd_struiowrq) != NULL) {
7949 				/* Check sync stream barrier write q */
7950 				mutex_exit(QLOCK(tq));
7951 				polllock(&stp->sd_pollist, QLOCK(sqp));
7952 				if (sqp->q_flag & QFULL)
7953 					/* ensure pollwakeup() is done */
7954 					sqp->q_flag |= QWANTWSYNC;
7955 				else
7956 					retevents |= POLLOUT;
7957 				/* More write events to process ??? */
7958 				if (! (events & POLLWRBAND)) {
7959 					mutex_exit(QLOCK(sqp));
7960 					releasestr(qp);
7961 					goto chkrd;
7962 				}
7963 				mutex_exit(QLOCK(sqp));
7964 				polllock(&stp->sd_pollist, QLOCK(tq));
7965 			} else
7966 				retevents |= POLLOUT;
7967 		}
7968 		if (events & POLLWRBAND) {
7969 			qbp = tq->q_bandp;
7970 			if (qbp) {
7971 				while (qbp) {
7972 					if (qbp->qb_flag & QB_FULL)
7973 						qbp->qb_flag |= QB_WANTW;
7974 					else
7975 						retevents |= POLLWRBAND;
7976 					qbp = qbp->qb_next;
7977 				}
7978 			} else {
7979 				retevents |= POLLWRBAND;
7980 			}
7981 		}
7982 		mutex_exit(QLOCK(tq));
7983 		releasestr(qp);
7984 	}
7985 chkrd:
7986 	if (sd_flags & STRPRI) {
7987 		retevents |= (events & POLLPRI);
7988 	} else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
7989 		queue_t	*qp = _RD(stp->sd_wrq);
7990 		int normevents = (events & (POLLIN | POLLRDNORM));
7991 
7992 		/*
7993 		 * Note: Need to do polllock() here since ps_lock may be
7994 		 * held. See bug 4191544.
7995 		 */
7996 		polllock(&stp->sd_pollist, &stp->sd_lock);
7997 		headlocked = 1;
7998 		mp = qp->q_first;
7999 		while (mp) {
8000 			/*
8001 			 * For POLLRDDATA we scan b_cont and b_next until we
8002 			 * find an M_DATA.
8003 			 */
8004 			if ((events & POLLRDDATA) &&
8005 			    mp->b_datap->db_type != M_DATA) {
8006 				mblk_t *nmp = mp->b_cont;
8007 
8008 				while (nmp != NULL &&
8009 				    nmp->b_datap->db_type != M_DATA)
8010 					nmp = nmp->b_cont;
8011 				if (nmp == NULL) {
8012 					mp = mp->b_next;
8013 					continue;
8014 				}
8015 			}
8016 			if (mp->b_band == 0)
8017 				retevents |= normevents;
8018 			else
8019 				retevents |= (events & (POLLIN | POLLRDBAND));
8020 			break;
8021 		}
8022 		if (! (retevents & normevents) &&
8023 		    (stp->sd_wakeq & RSLEEP)) {
8024 			/*
8025 			 * Sync stream barrier read queue has data.
8026 			 */
8027 			retevents |= normevents;
8028 		}
8029 		/* Treat eof as normal data */
8030 		if (sd_flags & STREOF)
8031 			retevents |= normevents;
8032 	}
8033 
8034 	*reventsp = (short)retevents;
8035 	if (retevents) {
8036 		if (headlocked)
8037 			mutex_exit(&stp->sd_lock);
8038 		return (0);
8039 	}
8040 
8041 	/*
8042 	 * If poll() has not found any events yet, set up event cell
8043 	 * to wake up the poll if a requested event occurs on this
8044 	 * stream.  Check for collisions with outstanding poll requests.
8045 	 */
8046 	if (!anyyet) {
8047 		*phpp = &stp->sd_pollist;
8048 		if (headlocked == 0) {
8049 			polllock(&stp->sd_pollist, &stp->sd_lock);
8050 			headlocked = 1;
8051 		}
8052 		stp->sd_rput_opt |= SR_POLLIN;
8053 	}
8054 	if (headlocked)
8055 		mutex_exit(&stp->sd_lock);
8056 	return (0);
8057 }
8058 
8059 /*
8060  * The purpose of putback() is to assure sleeping polls/reads
8061  * are awakened when there are no new messages arriving at the,
8062  * stream head, and a message is placed back on the read queue.
8063  *
8064  * sd_lock must be held when messages are placed back on stream
8065  * head.  (getq() holds sd_lock when it removes messages from
8066  * the queue)
8067  */
8068 
8069 static void
8070 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8071 {
8072 	ASSERT(MUTEX_HELD(&stp->sd_lock));
8073 	(void) putbq(q, bp);
8074 	/*
8075 	 * A message may have come in when the sd_lock was dropped in the
8076 	 * calling routine. If this is the case and STR*ATMARK info was
8077 	 * received, need to move that from the stream head to the q_last
8078 	 * so that SIOCATMARK can return the proper value.
8079 	 */
8080 	if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8081 		unsigned short *flagp = &q->q_last->b_flag;
8082 		uint_t b_flag = (uint_t)*flagp;
8083 
8084 		if (stp->sd_flag & STRATMARK) {
8085 			b_flag &= ~MSGNOTMARKNEXT;
8086 			b_flag |= MSGMARKNEXT;
8087 			stp->sd_flag &= ~STRATMARK;
8088 		} else {
8089 			b_flag &= ~MSGMARKNEXT;
8090 			b_flag |= MSGNOTMARKNEXT;
8091 			stp->sd_flag &= ~STRNOTATMARK;
8092 		}
8093 		*flagp = (unsigned short) b_flag;
8094 	}
8095 
8096 #ifdef	DEBUG
8097 	/*
8098 	 * Make sure that the flags are not messed up.
8099 	 */
8100 	{
8101 		mblk_t *mp;
8102 		mp = q->q_last;
8103 		while (mp != NULL) {
8104 			ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8105 			    (MSGMARKNEXT|MSGNOTMARKNEXT));
8106 			mp = mp->b_cont;
8107 		}
8108 	}
8109 #endif
8110 	if (q->q_first == bp) {
8111 		short pollevents;
8112 
8113 		if (stp->sd_flag & RSLEEP) {
8114 			stp->sd_flag &= ~RSLEEP;
8115 			cv_broadcast(&q->q_wait);
8116 		}
8117 		if (stp->sd_flag & STRPRI) {
8118 			pollevents = POLLPRI;
8119 		} else {
8120 			if (band == 0) {
8121 				if (!(stp->sd_rput_opt & SR_POLLIN))
8122 					return;
8123 				stp->sd_rput_opt &= ~SR_POLLIN;
8124 				pollevents = POLLIN | POLLRDNORM;
8125 			} else {
8126 				pollevents = POLLIN | POLLRDBAND;
8127 			}
8128 		}
8129 		mutex_exit(&stp->sd_lock);
8130 		pollwakeup(&stp->sd_pollist, pollevents);
8131 		mutex_enter(&stp->sd_lock);
8132 	}
8133 }
8134 
8135 /*
8136  * Return the held vnode attached to the stream head of a
8137  * given queue
8138  * It is the responsibility of the calling routine to ensure
8139  * that the queue does not go away (e.g. pop).
8140  */
8141 vnode_t *
8142 strq2vp(queue_t *qp)
8143 {
8144 	vnode_t *vp;
8145 	vp = STREAM(qp)->sd_vnode;
8146 	ASSERT(vp != NULL);
8147 	VN_HOLD(vp);
8148 	return (vp);
8149 }
8150 
8151 /*
8152  * return the stream head write queue for the given vp
8153  * It is the responsibility of the calling routine to ensure
8154  * that the stream or vnode do not close.
8155  */
8156 queue_t *
8157 strvp2wq(vnode_t *vp)
8158 {
8159 	ASSERT(vp->v_stream != NULL);
8160 	return (vp->v_stream->sd_wrq);
8161 }
8162 
8163 /*
8164  * pollwakeup stream head
8165  * It is the responsibility of the calling routine to ensure
8166  * that the stream or vnode do not close.
8167  */
8168 void
8169 strpollwakeup(vnode_t *vp, short event)
8170 {
8171 	ASSERT(vp->v_stream);
8172 	pollwakeup(&vp->v_stream->sd_pollist, event);
8173 }
8174 
8175 /*
8176  * Mate the stream heads of two vnodes together. If the two vnodes are the
8177  * same, we just make the write-side point at the read-side -- otherwise,
8178  * we do a full mate.  Only works on vnodes associated with streams that are
8179  * still being built and thus have only a stream head.
8180  */
8181 void
8182 strmate(vnode_t *vp1, vnode_t *vp2)
8183 {
8184 	queue_t *wrq1 = strvp2wq(vp1);
8185 	queue_t *wrq2 = strvp2wq(vp2);
8186 
8187 	/*
8188 	 * Verify that there are no modules on the stream yet.  We also
8189 	 * rely on the stream head always having a service procedure to
8190 	 * avoid tweaking q_nfsrv.
8191 	 */
8192 	ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8193 	ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8194 	ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8195 
8196 	/*
8197 	 * If the queues are the same, just twist; otherwise do a full mate.
8198 	 */
8199 	if (wrq1 == wrq2) {
8200 		wrq1->q_next = _RD(wrq1);
8201 	} else {
8202 		wrq1->q_next = _RD(wrq2);
8203 		wrq2->q_next = _RD(wrq1);
8204 		STREAM(wrq1)->sd_mate = STREAM(wrq2);
8205 		STREAM(wrq1)->sd_flag |= STRMATE;
8206 		STREAM(wrq2)->sd_mate = STREAM(wrq1);
8207 		STREAM(wrq2)->sd_flag |= STRMATE;
8208 	}
8209 }
8210 
8211 /*
8212  * XXX will go away when console is correctly fixed.
8213  * Clean up the console PIDS, from previous I_SETSIG,
8214  * called only for cnopen which never calls strclean().
8215  */
8216 void
8217 str_cn_clean(struct vnode *vp)
8218 {
8219 	strsig_t *ssp, *pssp, *tssp;
8220 	struct stdata *stp;
8221 	struct pid  *pidp;
8222 	int update = 0;
8223 
8224 	ASSERT(vp->v_stream);
8225 	stp = vp->v_stream;
8226 	pssp = NULL;
8227 	mutex_enter(&stp->sd_lock);
8228 	ssp = stp->sd_siglist;
8229 	while (ssp) {
8230 		mutex_enter(&pidlock);
8231 		pidp = ssp->ss_pidp;
8232 		/*
8233 		 * Get rid of PID if the proc is gone.
8234 		 */
8235 		if (pidp->pid_prinactive) {
8236 			tssp = ssp->ss_next;
8237 			if (pssp)
8238 				pssp->ss_next = tssp;
8239 			else
8240 				stp->sd_siglist = tssp;
8241 			ASSERT(pidp->pid_ref <= 1);
8242 			PID_RELE(ssp->ss_pidp);
8243 			mutex_exit(&pidlock);
8244 			kmem_free(ssp, sizeof (strsig_t));
8245 			update = 1;
8246 			ssp = tssp;
8247 			continue;
8248 		} else
8249 			mutex_exit(&pidlock);
8250 		pssp = ssp;
8251 		ssp = ssp->ss_next;
8252 	}
8253 	if (update) {
8254 		stp->sd_sigflags = 0;
8255 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8256 			stp->sd_sigflags |= ssp->ss_events;
8257 	}
8258 	mutex_exit(&stp->sd_lock);
8259 }
8260 
8261 /*
8262  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8263  */
8264 static boolean_t
8265 msghasdata(mblk_t *bp)
8266 {
8267 	for (; bp; bp = bp->b_cont)
8268 		if (bp->b_datap->db_type == M_DATA) {
8269 			ASSERT(bp->b_wptr >= bp->b_rptr);
8270 			if (bp->b_wptr > bp->b_rptr)
8271 				return (B_TRUE);
8272 		}
8273 	return (B_FALSE);
8274 }
8275