xref: /illumos-gate/usr/src/uts/common/os/streamio.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/signal.h>
38 #include <sys/stat.h>
39 #include <sys/proc.h>
40 #include <sys/cred.h>
41 #include <sys/user.h>
42 #include <sys/vnode.h>
43 #include <sys/file.h>
44 #include <sys/stream.h>
45 #include <sys/strsubr.h>
46 #include <sys/stropts.h>
47 #include <sys/tihdr.h>
48 #include <sys/var.h>
49 #include <sys/poll.h>
50 #include <sys/termio.h>
51 #include <sys/ttold.h>
52 #include <sys/systm.h>
53 #include <sys/uio.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sad.h>
56 #include <sys/priocntl.h>
57 #include <sys/jioctl.h>
58 #include <sys/procset.h>
59 #include <sys/session.h>
60 #include <sys/kmem.h>
61 #include <sys/filio.h>
62 #include <sys/vtrace.h>
63 #include <sys/debug.h>
64 #include <sys/strredir.h>
65 #include <sys/fs/fifonode.h>
66 #include <sys/fs/snode.h>
67 #include <sys/strlog.h>
68 #include <sys/strsun.h>
69 #include <sys/project.h>
70 #include <sys/kbio.h>
71 #include <sys/msio.h>
72 #include <sys/tty.h>
73 #include <sys/ptyvar.h>
74 #include <sys/vuid_event.h>
75 #include <sys/modctl.h>
76 #include <sys/sunddi.h>
77 #include <sys/sunldi_impl.h>
78 #include <sys/autoconf.h>
79 #include <sys/policy.h>
80 
81 /*
82  * what is mblk_pull_len?
83  *
84  * If a streams message consists of many short messages,
85  * a performance degradation occurs from copyout overhead.
86  * To decrease the per mblk overhead, messages that are
87  * likely to consist of many small mblks are pulled up into
88  * one continuous chunk of memory.
89  *
90  * To avoid the processing overhead of examining every
91  * mblk, a quick heuristic is used. If the first mblk in
92  * the message is shorter than mblk_pull_len, it is likely
93  * that the rest of the mblk will be short.
94  *
95  * This heuristic was decided upon after performance tests
96  * indicated that anything more complex slowed down the main
97  * code path.
98  */
99 #define	MBLK_PULL_LEN 64
100 uint32_t mblk_pull_len = MBLK_PULL_LEN;
101 
102 /*
103  * The sgttyb_handling flag controls the handling of the old BSD
104  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
105  *
106  * 0 - Emit no warnings at all and retain old, broken behavior.
107  * 1 - Emit no warnings and silently handle new semantics.
108  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
109  *     (once per system invocation).  Handle with new semantics.
110  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
111  *     made (so that offenders drop core and are easy to debug).
112  *
113  * The "new semantics" are that TIOCGETP returns B38400 for
114  * sg_[io]speed if the corresponding value is over B38400, and that
115  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
116  * bit rate."
117  */
118 int sgttyb_handling = 1;
119 static boolean_t sgttyb_complaint;
120 
121 /* don't push drcompat module by default on Style-2 streams */
122 static int push_drcompat = 0;
123 
124 /*
125  * id value used to distinguish between different ioctl messages
126  */
127 static uint32_t ioc_id;
128 
129 static void putback(struct stdata *, queue_t *, mblk_t *, int);
130 static void strcleanall(struct vnode *);
131 static int strwsrv(queue_t *);
132 
133 /*
134  * qinit and module_info structures for stream head read and write queues
135  */
136 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
137 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
138 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
139 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
140 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
141     FIFOLOWAT };
142 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
143 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
144 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
145 
146 extern kmutex_t	strresources;	/* protects global resources */
147 extern kmutex_t muxifier;	/* single-threads multiplexor creation */
148 kmutex_t sad_lock;		/* protects sad drivers autopush */
149 
150 static boolean_t msghasdata(mblk_t *bp);
151 #define	msgnodata(bp) (!msghasdata(bp))
152 
153 /*
154  * Stream head locking notes:
155  *	There are four monitors associated with the stream head:
156  *	1. v_stream monitor: in stropen() and strclose() v_lock
157  *		is held while the association of vnode and stream
158  *		head is established or tested for.
159  *	2. open/close/push/pop monitor: sd_lock is held while each
160  *		thread bids for exclusive access to this monitor
161  *		for opening or closing a stream.  In addition, this
162  *		monitor is entered during pushes and pops.  This
163  *		guarantees that during plumbing operations there
164  *		is only one thread trying to change the plumbing.
165  *		Any other threads present in the stream are only
166  *		using the plumbing.
167  *	3. read/write monitor: in the case of read, a thread holds
168  *		sd_lock while trying to get data from the stream
169  *		head queue.  if there is none to fulfill a read
170  *		request, it sets RSLEEP and calls cv_wait_sig() down
171  *		in strwaitq() to await the arrival of new data.
172  *		when new data arrives in strrput(), sd_lock is acquired
173  *		before testing for RSLEEP and calling cv_broadcast().
174  *		the behavior of strwrite(), strwsrv(), and WSLEEP
175  *		mirror this.
176  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
177  *		thread is doing an ioctl at a time.
178  */
179 
180 static int
181 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
182     int anchor, cred_t *crp)
183 {
184 	int error;
185 	fmodsw_impl_t *fp;
186 
187 	if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
188 		error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
189 		return (error);
190 	}
191 	if (stp->sd_pushcnt >= nstrpush) {
192 		return (EINVAL);
193 	}
194 
195 	if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
196 		stp->sd_flag |= STREOPENFAIL;
197 		return (EINVAL);
198 	}
199 
200 	/*
201 	 * push new module and call its open routine via qattach
202 	 */
203 	if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
204 		return (error);
205 
206 	/*
207 	 * If flow control is on, don't break it - enable
208 	 * first back queue with svc procedure
209 	 */
210 	if (_RD(stp->sd_wrq)->q_flag & QWANTW) {
211 		/* Note: no setqback here - use pri -1. */
212 		backenable(_RD(stp->sd_wrq->q_next), -1);
213 	}
214 
215 	/*
216 	 * Check to see if caller wants a STREAMS anchor
217 	 * put at this place in the stream, and add if so.
218 	 */
219 	mutex_enter(&stp->sd_lock);
220 	if (anchor == stp->sd_pushcnt)
221 		stp->sd_anchor = stp->sd_pushcnt;
222 	mutex_exit(&stp->sd_lock);
223 
224 	return (0);
225 }
226 
227 /*
228  * Open a stream device.
229  */
230 int
231 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
232 {
233 	struct stdata *stp;
234 	queue_t *qp;
235 	int s;
236 	dev_t dummydev;
237 	struct autopush *ap;
238 	int error = 0;
239 	ssize_t	rmin, rmax;
240 	int cloneopen;
241 	queue_t *brq;
242 	major_t major;
243 
244 #ifdef C2_AUDIT
245 	if (audit_active)
246 		audit_stropen(vp, devp, flag, crp);
247 #endif
248 
249 	/*
250 	 * If the stream already exists, wait for any open in progress
251 	 * to complete, then call the open function of each module and
252 	 * driver in the stream.  Otherwise create the stream.
253 	 */
254 	TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
255 retry:
256 	mutex_enter(&vp->v_lock);
257 	if ((stp = vp->v_stream) != NULL) {
258 
259 		/*
260 		 * Waiting for stream to be created to device
261 		 * due to another open.
262 		 */
263 	    mutex_exit(&vp->v_lock);
264 
265 	    if (STRMATED(stp)) {
266 		struct stdata *strmatep = stp->sd_mate;
267 
268 		STRLOCKMATES(stp);
269 		if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
270 			if (flag & (FNDELAY|FNONBLOCK)) {
271 				error = EAGAIN;
272 				mutex_exit(&strmatep->sd_lock);
273 				goto ckreturn;
274 			}
275 			mutex_exit(&stp->sd_lock);
276 			if (!cv_wait_sig(&strmatep->sd_monitor,
277 			    &strmatep->sd_lock)) {
278 				error = EINTR;
279 				mutex_exit(&strmatep->sd_lock);
280 				mutex_enter(&stp->sd_lock);
281 				goto ckreturn;
282 			}
283 			mutex_exit(&strmatep->sd_lock);
284 			goto retry;
285 		}
286 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
287 			if (flag & (FNDELAY|FNONBLOCK)) {
288 				error = EAGAIN;
289 				mutex_exit(&strmatep->sd_lock);
290 				goto ckreturn;
291 			}
292 			mutex_exit(&strmatep->sd_lock);
293 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
294 				error = EINTR;
295 				goto ckreturn;
296 			}
297 			mutex_exit(&stp->sd_lock);
298 			goto retry;
299 		}
300 
301 		if (stp->sd_flag & (STRDERR|STWRERR)) {
302 			error = EIO;
303 			mutex_exit(&strmatep->sd_lock);
304 			goto ckreturn;
305 		}
306 
307 		stp->sd_flag |= STWOPEN;
308 		STRUNLOCKMATES(stp);
309 	    } else {
310 		mutex_enter(&stp->sd_lock);
311 		if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
312 			if (flag & (FNDELAY|FNONBLOCK)) {
313 				error = EAGAIN;
314 				goto ckreturn;
315 			}
316 			if (!cv_wait_sig(&stp->sd_monitor, &stp->sd_lock)) {
317 				error = EINTR;
318 				goto ckreturn;
319 			}
320 			mutex_exit(&stp->sd_lock);
321 			goto retry;  /* could be clone! */
322 		}
323 
324 		if (stp->sd_flag & (STRDERR|STWRERR)) {
325 			error = EIO;
326 			goto ckreturn;
327 		}
328 
329 		stp->sd_flag |= STWOPEN;
330 		mutex_exit(&stp->sd_lock);
331 	    }
332 
333 		/*
334 		 * Open all modules and devices down stream to notify
335 		 * that another user is streaming.  For modules, set the
336 		 * last argument to MODOPEN and do not pass any open flags.
337 		 * Ignore dummydev since this is not the first open.
338 		 */
339 	    claimstr(stp->sd_wrq);
340 	    qp = stp->sd_wrq;
341 	    while (_SAMESTR(qp)) {
342 		qp = qp->q_next;
343 		if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
344 			break;
345 	    }
346 	    releasestr(stp->sd_wrq);
347 	    mutex_enter(&stp->sd_lock);
348 	    stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
349 	    stp->sd_rerror = 0;
350 	    stp->sd_werror = 0;
351 ckreturn:
352 	    cv_broadcast(&stp->sd_monitor);
353 	    mutex_exit(&stp->sd_lock);
354 	    return (error);
355 	}
356 
357 	/*
358 	 * This vnode isn't streaming.  SPECFS already
359 	 * checked for multiple vnodes pointing to the
360 	 * same stream, so create a stream to the driver.
361 	 */
362 	qp = allocq();
363 	stp = shalloc(qp);
364 
365 	/*
366 	 * Initialize stream head.  shalloc() has given us
367 	 * exclusive access, and we have the vnode locked;
368 	 * we can do whatever we want with stp.
369 	 */
370 	stp->sd_flag = STWOPEN;
371 	stp->sd_siglist = NULL;
372 	stp->sd_pollist.ph_list = NULL;
373 	stp->sd_sigflags = 0;
374 	stp->sd_mark = NULL;
375 	stp->sd_closetime = STRTIMOUT;
376 	stp->sd_sidp = NULL;
377 	stp->sd_pgidp = NULL;
378 	stp->sd_vnode = vp;
379 	stp->sd_rerror = 0;
380 	stp->sd_werror = 0;
381 	stp->sd_wroff = 0;
382 	stp->sd_iocblk = NULL;
383 	stp->sd_pushcnt = 0;
384 	stp->sd_qn_minpsz = 0;
385 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
386 	stp->sd_maxblk = INFPSZ;
387 	qp->q_ptr = _WR(qp)->q_ptr = stp;
388 	STREAM(qp) = STREAM(_WR(qp)) = stp;
389 	vp->v_stream = stp;
390 	mutex_exit(&vp->v_lock);
391 	if (vp->v_type == VFIFO) {
392 		stp->sd_flag |= OLDNDELAY;
393 		/*
394 		 * This means, both for pipes and fifos
395 		 * strwrite will send SIGPIPE if the other
396 		 * end is closed. For putmsg it depends
397 		 * on whether it is a XPG4_2 application
398 		 * or not
399 		 */
400 		stp->sd_wput_opt = SW_SIGPIPE;
401 
402 		/* setq might sleep in kmem_alloc - avoid holding locks. */
403 		setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
404 		    SQ_CI|SQ_CO, B_FALSE);
405 
406 		set_qend(qp);
407 		stp->sd_strtab = (struct streamtab *)fifo_getinfo();
408 		_WR(qp)->q_nfsrv = _WR(qp);
409 		qp->q_nfsrv = qp;
410 		/*
411 		 * Wake up others that are waiting for stream to be created.
412 		 */
413 		mutex_enter(&stp->sd_lock);
414 		/*
415 		 * nothing is be pushed on stream yet, so
416 		 * optimized stream head packetsizes are just that
417 		 * of the read queue
418 		 */
419 		stp->sd_qn_minpsz = qp->q_minpsz;
420 		stp->sd_qn_maxpsz = qp->q_maxpsz;
421 		stp->sd_flag &= ~STWOPEN;
422 		goto fifo_opendone;
423 	}
424 	/* setq might sleep in kmem_alloc - avoid holding locks. */
425 	setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
426 
427 	set_qend(qp);
428 
429 	/*
430 	 * Open driver and create stream to it (via qattach).
431 	 */
432 	cloneopen = (getmajor(*devp) == clone_major);
433 	if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
434 		mutex_enter(&vp->v_lock);
435 		vp->v_stream = NULL;
436 		mutex_exit(&vp->v_lock);
437 		mutex_enter(&stp->sd_lock);
438 		cv_broadcast(&stp->sd_monitor);
439 		mutex_exit(&stp->sd_lock);
440 		freeq(_RD(qp));
441 		shfree(stp);
442 		return (error);
443 	}
444 	/*
445 	 * Set sd_strtab after open in order to handle clonable drivers
446 	 */
447 	stp->sd_strtab = STREAMSTAB(getmajor(*devp));
448 
449 	/*
450 	 * Historical note: dummydev used to be be prior to the initial
451 	 * open (via qattach above), which made the value seen
452 	 * inconsistent between an I_PUSH and an autopush of a module.
453 	 */
454 	dummydev = *devp;
455 
456 	/*
457 	 * For clone open of old style (Q not associated) network driver,
458 	 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
459 	 */
460 	brq = _RD(_WR(qp)->q_next);
461 	major = getmajor(*devp);
462 	if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
463 	    ((brq->q_flag & _QASSOCIATED) == 0)) {
464 		if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp) != 0)
465 			cmn_err(CE_WARN, "cannot push " DRMODNAME
466 			    " streams module");
467 	}
468 
469 	/*
470 	 * check for autopush
471 	 */
472 	mutex_enter(&sad_lock);
473 	ap = strphash(getemajor(*devp));
474 #define	DEVT(ap)	makedevice(ap->ap_major, ap->ap_minor)
475 #define	DEVLT(ap)	makedevice(ap->ap_major, ap->ap_lastminor)
476 
477 	while (ap) {
478 		if (ap->ap_major == (getemajor(*devp))) {
479 			if (ap->ap_type == SAP_ALL)
480 				break;
481 			else if ((ap->ap_type == SAP_ONE) &&
482 			    (getminor(DEVT(ap)) == getminor(*devp)))
483 				break;
484 			else if (ap->ap_type == SAP_RANGE &&
485 			    getminor(*devp) >= getminor(DEVT(ap)) &&
486 			    getminor(*devp) <= getminor(DEVLT(ap)))
487 				break;
488 		}
489 		ap = ap->ap_nextp;
490 	}
491 	if (ap == NULL) {
492 		mutex_exit(&sad_lock);
493 		goto opendone;
494 	}
495 	ap->ap_cnt++;
496 	mutex_exit(&sad_lock);
497 	for (s = 0; s < ap->ap_npush; s++) {
498 		error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
499 		    ap->ap_anchor, crp);
500 		if (error != 0)
501 			break;
502 	}
503 	mutex_enter(&sad_lock);
504 	if (--(ap->ap_cnt) <= 0)
505 		ap_free(ap);
506 	mutex_exit(&sad_lock);
507 
508 	/*
509 	 * let specfs know that open failed part way through
510 	 */
511 
512 	if (error) {
513 		mutex_enter(&stp->sd_lock);
514 		stp->sd_flag |= STREOPENFAIL;
515 		mutex_exit(&stp->sd_lock);
516 	}
517 
518 opendone:
519 
520 	/*
521 	 * Wake up others that are waiting for stream to be created.
522 	 */
523 	mutex_enter(&stp->sd_lock);
524 	stp->sd_flag &= ~STWOPEN;
525 
526 	/*
527 	 * As a performance concern we are caching the values of
528 	 * q_minpsz and q_maxpsz of the module below the stream
529 	 * head in the stream head.
530 	 */
531 	mutex_enter(QLOCK(stp->sd_wrq->q_next));
532 	rmin = stp->sd_wrq->q_next->q_minpsz;
533 	rmax = stp->sd_wrq->q_next->q_maxpsz;
534 	mutex_exit(QLOCK(stp->sd_wrq->q_next));
535 
536 	/* do this processing here as a performance concern */
537 	if (strmsgsz != 0) {
538 		if (rmax == INFPSZ)
539 			rmax = strmsgsz;
540 		else
541 			rmax = MIN(strmsgsz, rmax);
542 	}
543 
544 	mutex_enter(QLOCK(stp->sd_wrq));
545 	stp->sd_qn_minpsz = rmin;
546 	stp->sd_qn_maxpsz = rmax;
547 	mutex_exit(QLOCK(stp->sd_wrq));
548 
549 fifo_opendone:
550 	cv_broadcast(&stp->sd_monitor);
551 	mutex_exit(&stp->sd_lock);
552 	return (error);
553 }
554 
555 static int strsink(queue_t *, mblk_t *);
556 static struct qinit deadrend = {
557 	strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
558 };
559 static struct qinit deadwend = {
560 	NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
561 };
562 
563 /*
564  * Close a stream.
565  * This is called from closef() on the last close of an open stream.
566  * Strclean() will already have removed the siglist and pollist
567  * information, so all that remains is to remove all multiplexor links
568  * for the stream, pop all the modules (and the driver), and free the
569  * stream structure.
570  */
571 
572 int
573 strclose(struct vnode *vp, int flag, cred_t *crp)
574 {
575 	struct stdata *stp;
576 	queue_t *qp;
577 	int rval;
578 	int freestp = 1;
579 	queue_t *rmq;
580 
581 #ifdef C2_AUDIT
582 	if (audit_active)
583 		audit_strclose(vp, flag, crp);
584 #endif
585 
586 	TRACE_1(TR_FAC_STREAMS_FR,
587 		TR_STRCLOSE, "strclose:%p", vp);
588 	ASSERT(vp->v_stream);
589 
590 	stp = vp->v_stream;
591 	ASSERT(!(stp->sd_flag & STPLEX));
592 	qp = stp->sd_wrq;
593 
594 	/*
595 	 * Needed so that strpoll will return non-zero for this fd.
596 	 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
597 	 */
598 	mutex_enter(&stp->sd_lock);
599 	stp->sd_flag |= STRHUP;
600 	mutex_exit(&stp->sd_lock);
601 
602 	/*
603 	 * Since we call pollwakeup in close() now, the poll list should
604 	 * be empty in most cases. The only exception is the layered devices
605 	 * (e.g. the console drivers with redirection modules pushed on top
606 	 * of it).
607 	 */
608 	if (stp->sd_pollist.ph_list != NULL) {
609 		pollwakeup(&stp->sd_pollist, POLLERR);
610 		pollhead_clean(&stp->sd_pollist);
611 	}
612 	ASSERT(stp->sd_pollist.ph_list == NULL);
613 	ASSERT(stp->sd_sidp == NULL);
614 	ASSERT(stp->sd_pgidp == NULL);
615 
616 	/*
617 	 * If the registered process or process group did not have an
618 	 * open instance of this stream then strclean would not be
619 	 * called. Thus at the time of closing all remaining siglist entries
620 	 * are removed.
621 	 */
622 	if (stp->sd_siglist != NULL)
623 		strcleanall(vp);
624 
625 	ASSERT(stp->sd_siglist == NULL);
626 	ASSERT(stp->sd_sigflags == 0);
627 
628 	if (STRMATED(stp)) {
629 		struct stdata *strmatep = stp->sd_mate;
630 		int waited = 1;
631 
632 		STRLOCKMATES(stp);
633 		while (waited) {
634 			waited = 0;
635 			while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
636 				mutex_exit(&strmatep->sd_lock);
637 				cv_wait(&stp->sd_monitor, &stp->sd_lock);
638 				mutex_exit(&stp->sd_lock);
639 				STRLOCKMATES(stp);
640 				waited = 1;
641 			}
642 			while (strmatep->sd_flag &
643 			    (STWOPEN|STRCLOSE|STRPLUMB)) {
644 				mutex_exit(&stp->sd_lock);
645 				cv_wait(&strmatep->sd_monitor,
646 				    &strmatep->sd_lock);
647 				mutex_exit(&strmatep->sd_lock);
648 				STRLOCKMATES(stp);
649 				waited = 1;
650 			}
651 		}
652 		stp->sd_flag |= STRCLOSE;
653 		STRUNLOCKMATES(stp);
654 	} else {
655 		mutex_enter(&stp->sd_lock);
656 		stp->sd_flag |= STRCLOSE;
657 		mutex_exit(&stp->sd_lock);
658 	}
659 
660 	ASSERT(qp->q_first == NULL);	/* No more delayed write */
661 
662 	/* Check if an I_LINK was ever done on this stream */
663 	if (stp->sd_flag & STRHASLINKS) {
664 		(void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval);
665 	}
666 
667 	while (_SAMESTR(qp)) {
668 		/*
669 		 * Holding sd_lock prevents q_next from changing in
670 		 * this stream.
671 		 */
672 		mutex_enter(&stp->sd_lock);
673 		if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
674 
675 			/*
676 			 * sleep until awakened by strwsrv() or timeout
677 			 */
678 			for (;;) {
679 				mutex_enter(QLOCK(qp->q_next));
680 				if (!(qp->q_next->q_mblkcnt)) {
681 					mutex_exit(QLOCK(qp->q_next));
682 					break;
683 				}
684 				stp->sd_flag |= WSLEEP;
685 
686 				/* ensure strwsrv gets enabled */
687 				qp->q_next->q_flag |= QWANTW;
688 				mutex_exit(QLOCK(qp->q_next));
689 				/* get out if we timed out or recv'd a signal */
690 				if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
691 				    stp->sd_closetime, 0) <= 0) {
692 					break;
693 				}
694 			}
695 			stp->sd_flag &= ~WSLEEP;
696 		}
697 		mutex_exit(&stp->sd_lock);
698 
699 		rmq = qp->q_next;
700 		if (rmq->q_flag & QISDRV) {
701 			ASSERT(!_SAMESTR(rmq));
702 			wait_sq_svc(_RD(qp)->q_syncq);
703 		}
704 
705 		qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
706 	}
707 
708 	/* Prevent qenable from re-enabling the stream head queue */
709 	disable_svc(_RD(qp));
710 
711 	/*
712 	 * Wait until service procedure of each queue is
713 	 * run, if QINSERVICE is set.
714 	 */
715 	wait_svc(_RD(qp));
716 
717 	/*
718 	 * Now, flush both queues.
719 	 */
720 	flushq(_RD(qp), FLUSHALL);
721 	flushq(qp, FLUSHALL);
722 
723 	/*
724 	 * If the write queue of the stream head is pointing to a
725 	 * read queue, we have a twisted stream.  If the read queue
726 	 * is alive, convert the stream head queues into a dead end.
727 	 * If the read queue is dead, free the dead pair.
728 	 */
729 	if (qp->q_next && !_SAMESTR(qp)) {
730 		if (qp->q_next->q_qinfo == &deadrend) {	/* half-closed pipe */
731 			flushq(qp->q_next, FLUSHALL); /* ensure no message */
732 			shfree(qp->q_next->q_stream);
733 			freeq(qp->q_next);
734 			freeq(_RD(qp));
735 		} else if (qp->q_next == _RD(qp)) {	/* fifo */
736 			freeq(_RD(qp));
737 		} else {				/* pipe */
738 			freestp = 0;
739 			/*
740 			 * The q_info pointers are never accessed when
741 			 * SQLOCK is held.
742 			 */
743 			ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
744 			mutex_enter(SQLOCK(qp->q_syncq));
745 			qp->q_qinfo = &deadwend;
746 			_RD(qp)->q_qinfo = &deadrend;
747 			mutex_exit(SQLOCK(qp->q_syncq));
748 		}
749 	} else {
750 		freeq(_RD(qp)); /* free stream head queue pair */
751 	}
752 
753 	mutex_enter(&vp->v_lock);
754 	if (stp->sd_iocblk) {
755 		if (stp->sd_iocblk != (mblk_t *)-1) {
756 			freemsg(stp->sd_iocblk);
757 		}
758 		stp->sd_iocblk = NULL;
759 	}
760 	stp->sd_vnode = NULL;
761 	vp->v_stream = NULL;
762 	mutex_exit(&vp->v_lock);
763 	mutex_enter(&stp->sd_lock);
764 	stp->sd_flag &= ~STRCLOSE;
765 	cv_broadcast(&stp->sd_monitor);
766 	mutex_exit(&stp->sd_lock);
767 
768 	if (freestp)
769 		shfree(stp);
770 	return (0);
771 }
772 
773 static int
774 strsink(queue_t *q, mblk_t *bp)
775 {
776 	struct copyresp *resp;
777 
778 	switch (bp->b_datap->db_type) {
779 	case M_FLUSH:
780 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
781 			*bp->b_rptr &= ~FLUSHR;
782 			bp->b_flag |= MSGNOLOOP;
783 			/*
784 			 * Protect against the driver passing up
785 			 * messages after it has done a qprocsoff.
786 			 */
787 			if (_OTHERQ(q)->q_next == NULL)
788 				freemsg(bp);
789 			else
790 				qreply(q, bp);
791 		} else {
792 			freemsg(bp);
793 		}
794 		break;
795 
796 	case M_COPYIN:
797 	case M_COPYOUT:
798 		if (bp->b_cont) {
799 			freemsg(bp->b_cont);
800 			bp->b_cont = NULL;
801 		}
802 		bp->b_datap->db_type = M_IOCDATA;
803 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
804 		resp = (struct copyresp *)bp->b_rptr;
805 		resp->cp_rval = (caddr_t)1;	/* failure */
806 		/*
807 		 * Protect against the driver passing up
808 		 * messages after it has done a qprocsoff.
809 		 */
810 		if (_OTHERQ(q)->q_next == NULL)
811 			freemsg(bp);
812 		else
813 			qreply(q, bp);
814 		break;
815 
816 	case M_IOCTL:
817 		if (bp->b_cont) {
818 			freemsg(bp->b_cont);
819 			bp->b_cont = NULL;
820 		}
821 		bp->b_datap->db_type = M_IOCNAK;
822 		/*
823 		 * Protect against the driver passing up
824 		 * messages after it has done a qprocsoff.
825 		 */
826 		if (_OTHERQ(q)->q_next == NULL)
827 			freemsg(bp);
828 		else
829 			qreply(q, bp);
830 		break;
831 
832 	default:
833 		freemsg(bp);
834 		break;
835 	}
836 
837 	return (0);
838 }
839 
840 /*
841  * Clean up after a process when it closes a stream.  This is called
842  * from closef for all closes, whereas strclose is called only for the
843  * last close on a stream.  The siglist is scanned for entries for the
844  * current process, and these are removed.
845  */
846 void
847 strclean(struct vnode *vp)
848 {
849 	strsig_t *ssp, *pssp, *tssp;
850 	stdata_t *stp;
851 	int update = 0;
852 
853 	TRACE_1(TR_FAC_STREAMS_FR,
854 		TR_STRCLEAN, "strclean:%p", vp);
855 	stp = vp->v_stream;
856 	pssp = NULL;
857 	mutex_enter(&stp->sd_lock);
858 	ssp = stp->sd_siglist;
859 	while (ssp) {
860 		if (ssp->ss_pidp == curproc->p_pidp) {
861 			tssp = ssp->ss_next;
862 			if (pssp)
863 				pssp->ss_next = tssp;
864 			else
865 				stp->sd_siglist = tssp;
866 			mutex_enter(&pidlock);
867 			PID_RELE(ssp->ss_pidp);
868 			mutex_exit(&pidlock);
869 			kmem_free(ssp, sizeof (strsig_t));
870 			update = 1;
871 			ssp = tssp;
872 		} else {
873 			pssp = ssp;
874 			ssp = ssp->ss_next;
875 		}
876 	}
877 	if (update) {
878 		stp->sd_sigflags = 0;
879 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
880 			stp->sd_sigflags |= ssp->ss_events;
881 	}
882 	mutex_exit(&stp->sd_lock);
883 }
884 
885 /*
886  * Used on the last close to remove any remaining items on the siglist.
887  * These could be present on the siglist due to I_ESETSIG calls that
888  * use process groups or processed that do not have an open file descriptor
889  * for this stream (Such entries would not be removed by strclean).
890  */
891 static void
892 strcleanall(struct vnode *vp)
893 {
894 	strsig_t *ssp, *nssp;
895 	stdata_t *stp;
896 
897 	stp = vp->v_stream;
898 	mutex_enter(&stp->sd_lock);
899 	ssp = stp->sd_siglist;
900 	stp->sd_siglist = NULL;
901 	while (ssp) {
902 		nssp = ssp->ss_next;
903 		mutex_enter(&pidlock);
904 		PID_RELE(ssp->ss_pidp);
905 		mutex_exit(&pidlock);
906 		kmem_free(ssp, sizeof (strsig_t));
907 		ssp = nssp;
908 	}
909 	stp->sd_sigflags = 0;
910 	mutex_exit(&stp->sd_lock);
911 }
912 
913 /*
914  * Retrieve the next message from the logical stream head read queue
915  * using either rwnext (if sync stream) or getq_noenab.
916  * It is the callers responsibility to call qbackenable after
917  * it is finished with the message. The caller should not call
918  * qbackenable until after any putback calls to avoid spurious backenabling.
919  */
920 mblk_t *
921 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
922     int *errorp)
923 {
924 	mblk_t *bp;
925 	int error;
926 
927 	ASSERT(MUTEX_HELD(&stp->sd_lock));
928 	/* Holding sd_lock prevents the read queue from changing  */
929 
930 	if (uiop != NULL && stp->sd_struiordq != NULL &&
931 	    q->q_first == NULL &&
932 	    (!first || (stp->sd_wakeq & RSLEEP))) {
933 		/*
934 		 * Stream supports rwnext() for the read side.
935 		 * If this is the first time we're called by e.g. strread
936 		 * only do the downcall if there is a deferred wakeup
937 		 * (registered in sd_wakeq).
938 		 */
939 		struiod_t uiod;
940 
941 		if (first)
942 			stp->sd_wakeq &= ~RSLEEP;
943 
944 		(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
945 			sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
946 		uiod.d_mp = 0;
947 		/*
948 		 * Mark that a thread is in rwnext on the read side
949 		 * to prevent strrput from nacking ioctls immediately.
950 		 * When the last concurrent rwnext returns
951 		 * the ioctls are nack'ed.
952 		 */
953 		ASSERT(MUTEX_HELD(&stp->sd_lock));
954 		stp->sd_struiodnak++;
955 		/*
956 		 * Note: rwnext will drop sd_lock.
957 		 */
958 		error = rwnext(q, &uiod);
959 		ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
960 		mutex_enter(&stp->sd_lock);
961 		stp->sd_struiodnak--;
962 		while (stp->sd_struiodnak == 0 &&
963 		    ((bp = stp->sd_struionak) != NULL)) {
964 			stp->sd_struionak = bp->b_next;
965 			bp->b_next = NULL;
966 			bp->b_datap->db_type = M_IOCNAK;
967 			/*
968 			 * Protect against the driver passing up
969 			 * messages after it has done a qprocsoff.
970 			 */
971 			if (_OTHERQ(q)->q_next == NULL)
972 				freemsg(bp);
973 			else {
974 				mutex_exit(&stp->sd_lock);
975 				qreply(q, bp);
976 				mutex_enter(&stp->sd_lock);
977 			}
978 		}
979 		ASSERT(MUTEX_HELD(&stp->sd_lock));
980 		if (error == 0 || error == EWOULDBLOCK) {
981 			if ((bp = uiod.d_mp) != NULL) {
982 				*errorp = 0;
983 				ASSERT(MUTEX_HELD(&stp->sd_lock));
984 				return (bp);
985 			}
986 			error = 0;
987 		} else if (error == EINVAL) {
988 			/*
989 			 * The stream plumbing must have
990 			 * changed while we were away, so
991 			 * just turn off rwnext()s.
992 			 */
993 			error = 0;
994 		} else if (error == EBUSY) {
995 			/*
996 			 * The module might have data in transit using putnext
997 			 * Fall back on waiting + getq.
998 			 */
999 			error = 0;
1000 		} else {
1001 			*errorp = error;
1002 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1003 			return (NULL);
1004 		}
1005 		/*
1006 		 * Try a getq in case a rwnext() generated mblk
1007 		 * has bubbled up via strrput().
1008 		 */
1009 	}
1010 	*errorp = 0;
1011 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1012 	return (getq_noenab(q));
1013 }
1014 
1015 /*
1016  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1017  * If the message does not fit in the uio the remainder of it is returned;
1018  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1019  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1020  * the error code, the message is consumed, and NULL is returned.
1021  */
1022 static mblk_t *
1023 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1024 {
1025 	int error;
1026 	ptrdiff_t n;
1027 	mblk_t *nbp;
1028 
1029 	ASSERT(bp->b_wptr >= bp->b_rptr);
1030 
1031 	do {
1032 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1033 			ASSERT(n > 0);
1034 
1035 			error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1036 			if (error != 0) {
1037 				freemsg(bp);
1038 				*errorp = error;
1039 				return (NULL);
1040 			}
1041 		}
1042 
1043 		bp->b_rptr += n;
1044 		while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1045 			nbp = bp;
1046 			bp = bp->b_cont;
1047 			freeb(nbp);
1048 		}
1049 	} while (bp != NULL && uiop->uio_resid > 0);
1050 
1051 	*errorp = 0;
1052 	return (bp);
1053 }
1054 
1055 /*
1056  * Read a stream according to the mode flags in sd_flag:
1057  *
1058  * (default mode)		- Byte stream, msg boundaries are ignored
1059  * RD_MSGDIS (msg discard)	- Read on msg boundaries and throw away
1060  *				any data remaining in msg
1061  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1062  *				any remaining data on head of read queue
1063  *
1064  * Consume readable messages on the front of the queue until
1065  * ttolwp(curthread)->lwp_count
1066  * is satisfied, the readable messages are exhausted, or a message
1067  * boundary is reached in a message mode.  If no data was read and
1068  * the stream was not opened with the NDELAY flag, block until data arrives.
1069  * Otherwise return the data read and update the count.
1070  *
1071  * In default mode a 0 length message signifies end-of-file and terminates
1072  * a read in progress.  The 0 length message is removed from the queue
1073  * only if it is the only message read (no data is read).
1074  *
1075  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1076  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1077  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1078  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1079  * are unlinked from and M_DATA blocks in the message, the protos are
1080  * thrown away, and the data is read.
1081  */
1082 /* ARGSUSED */
1083 int
1084 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1085 {
1086 	struct stdata *stp;
1087 	mblk_t *bp, *nbp;
1088 	queue_t *q;
1089 	int error = 0;
1090 	uint_t old_sd_flag;
1091 	int first;
1092 	char rflg;
1093 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
1094 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
1095 	short delim;
1096 	unsigned char pri = 0;
1097 	char waitflag;
1098 	unsigned char type;
1099 
1100 	TRACE_1(TR_FAC_STREAMS_FR,
1101 		TR_STRREAD_ENTER, "strread:%p", vp);
1102 	ASSERT(vp->v_stream);
1103 	stp = vp->v_stream;
1104 
1105 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
1106 		if (error = straccess(stp, JCREAD))
1107 			return (error);
1108 
1109 	mutex_enter(&stp->sd_lock);
1110 	if (stp->sd_flag & (STRDERR|STPLEX)) {
1111 		error = strgeterr(stp, STRDERR|STPLEX, 0);
1112 		if (error != 0) {
1113 			mutex_exit(&stp->sd_lock);
1114 			return (error);
1115 		}
1116 	}
1117 
1118 	/*
1119 	 * Loop terminates when uiop->uio_resid == 0.
1120 	 */
1121 	rflg = 0;
1122 	waitflag = READWAIT;
1123 	q = _RD(stp->sd_wrq);
1124 	for (;;) {
1125 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1126 		old_sd_flag = stp->sd_flag;
1127 		mark = 0;
1128 		delim = 0;
1129 		first = 1;
1130 		while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1131 			int done = 0;
1132 
1133 			ASSERT(MUTEX_HELD(&stp->sd_lock));
1134 
1135 			if (error != 0)
1136 				goto oops;
1137 
1138 			if (stp->sd_flag & (STRHUP|STREOF)) {
1139 				goto oops;
1140 			}
1141 			if (rflg && !(stp->sd_flag & STRDELIM)) {
1142 				goto oops;
1143 			}
1144 			/*
1145 			 * If a read(fd,buf,0) has been done, there is no
1146 			 * need to sleep. We always have zero bytes to
1147 			 * return.
1148 			 */
1149 			if (uiop->uio_resid == 0) {
1150 				goto oops;
1151 			}
1152 
1153 			qbackenable(q, 0);
1154 
1155 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1156 				"strread calls strwaitq:%p, %p, %p",
1157 				vp, uiop, crp);
1158 			if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1159 			    uiop->uio_fmode, -1, &done)) != 0 || done) {
1160 				TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1161 					"strread error or done:%p, %p, %p",
1162 					vp, uiop, crp);
1163 				if ((uiop->uio_fmode & FNDELAY) &&
1164 				    (stp->sd_flag & OLDNDELAY) &&
1165 				    (error == EAGAIN))
1166 					error = 0;
1167 				goto oops;
1168 			}
1169 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1170 				"strread awakes:%p, %p, %p", vp, uiop, crp);
1171 			if (stp->sd_sidp != NULL &&
1172 			    stp->sd_vnode->v_type != VFIFO) {
1173 				mutex_exit(&stp->sd_lock);
1174 				if (error = straccess(stp, JCREAD))
1175 					goto oops1;
1176 				mutex_enter(&stp->sd_lock);
1177 			}
1178 			first = 0;
1179 		}
1180 		ASSERT(MUTEX_HELD(&stp->sd_lock));
1181 		ASSERT(bp);
1182 		pri = bp->b_band;
1183 		/*
1184 		 * Extract any mark information. If the message is not
1185 		 * completely consumed this information will be put in the mblk
1186 		 * that is putback.
1187 		 * If MSGMARKNEXT is set and the message is completely consumed
1188 		 * the STRATMARK flag will be set below. Likewise, if
1189 		 * MSGNOTMARKNEXT is set and the message is
1190 		 * completely consumed STRNOTATMARK will be set.
1191 		 *
1192 		 * For some unknown reason strread only breaks the read at the
1193 		 * last mark.
1194 		 */
1195 		mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1196 		ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1197 			(MSGMARKNEXT|MSGNOTMARKNEXT));
1198 		if (mark != 0 && bp == stp->sd_mark) {
1199 			if (rflg) {
1200 				putback(stp, q, bp, pri);
1201 				goto oops;
1202 			}
1203 			mark |= _LASTMARK;
1204 			stp->sd_mark = NULL;
1205 		}
1206 		if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1207 			delim = 1;
1208 		mutex_exit(&stp->sd_lock);
1209 
1210 		if (STREAM_NEEDSERVICE(stp))
1211 			stream_runservice(stp);
1212 
1213 		type = bp->b_datap->db_type;
1214 
1215 		switch (type) {
1216 
1217 		case M_DATA:
1218 ismdata:
1219 			if (msgnodata(bp)) {
1220 				if (mark || delim) {
1221 					freemsg(bp);
1222 				} else if (rflg) {
1223 
1224 					/*
1225 					 * If already read data put zero
1226 					 * length message back on queue else
1227 					 * free msg and return 0.
1228 					 */
1229 					bp->b_band = pri;
1230 					mutex_enter(&stp->sd_lock);
1231 					putback(stp, q, bp, pri);
1232 					mutex_exit(&stp->sd_lock);
1233 				} else {
1234 					freemsg(bp);
1235 				}
1236 				error =  0;
1237 				goto oops1;
1238 			}
1239 
1240 			rflg = 1;
1241 			waitflag |= NOINTR;
1242 			bp = struiocopyout(bp, uiop, &error);
1243 			if (error != 0)
1244 				goto oops1;
1245 
1246 			mutex_enter(&stp->sd_lock);
1247 			if (bp) {
1248 				/*
1249 				 * Have remaining data in message.
1250 				 * Free msg if in discard mode.
1251 				 */
1252 				if (stp->sd_read_opt & RD_MSGDIS) {
1253 					freemsg(bp);
1254 				} else {
1255 					bp->b_band = pri;
1256 					if ((mark & _LASTMARK) &&
1257 					    (stp->sd_mark == NULL))
1258 						stp->sd_mark = bp;
1259 					bp->b_flag |= mark & ~_LASTMARK;
1260 					if (delim)
1261 						bp->b_flag |= MSGDELIM;
1262 					if (msgnodata(bp))
1263 						freemsg(bp);
1264 					else
1265 						putback(stp, q, bp, pri);
1266 				}
1267 			} else {
1268 				/*
1269 				 * Consumed the complete message.
1270 				 * Move the MSG*MARKNEXT information
1271 				 * to the stream head just in case
1272 				 * the read queue becomes empty.
1273 				 *
1274 				 * If the stream head was at the mark
1275 				 * (STRATMARK) before we dropped sd_lock above
1276 				 * and some data was consumed then we have
1277 				 * moved past the mark thus STRATMARK is
1278 				 * cleared. However, if a message arrived in
1279 				 * strrput during the copyout above causing
1280 				 * STRATMARK to be set we can not clear that
1281 				 * flag.
1282 				 */
1283 				if (mark &
1284 				    (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1285 					if (mark & MSGMARKNEXT) {
1286 						stp->sd_flag &= ~STRNOTATMARK;
1287 						stp->sd_flag |= STRATMARK;
1288 					} else if (mark & MSGNOTMARKNEXT) {
1289 						stp->sd_flag &= ~STRATMARK;
1290 						stp->sd_flag |= STRNOTATMARK;
1291 					} else {
1292 						stp->sd_flag &=
1293 						    ~(STRATMARK|STRNOTATMARK);
1294 					}
1295 				} else if (rflg && (old_sd_flag & STRATMARK)) {
1296 					stp->sd_flag &= ~STRATMARK;
1297 				}
1298 			}
1299 
1300 			/*
1301 			 * Check for signal messages at the front of the read
1302 			 * queue and generate the signal(s) if appropriate.
1303 			 * The only signal that can be on queue is M_SIG at
1304 			 * this point.
1305 			 */
1306 			while ((((bp = q->q_first)) != NULL) &&
1307 				(bp->b_datap->db_type == M_SIG)) {
1308 				bp = getq_noenab(q);
1309 				/*
1310 				 * sd_lock is held so the content of the
1311 				 * read queue can not change.
1312 				 */
1313 				ASSERT(bp != NULL &&
1314 					bp->b_datap->db_type == M_SIG);
1315 				strsignal_nolock(stp, *bp->b_rptr,
1316 					(int32_t)bp->b_band);
1317 				mutex_exit(&stp->sd_lock);
1318 				freemsg(bp);
1319 				if (STREAM_NEEDSERVICE(stp))
1320 					stream_runservice(stp);
1321 				mutex_enter(&stp->sd_lock);
1322 			}
1323 
1324 			if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1325 			    delim ||
1326 			    (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1327 				goto oops;
1328 			}
1329 			continue;
1330 
1331 		case M_SIG:
1332 			strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1333 			freemsg(bp);
1334 			mutex_enter(&stp->sd_lock);
1335 			continue;
1336 
1337 		case M_PROTO:
1338 		case M_PCPROTO:
1339 			/*
1340 			 * Only data messages are readable.
1341 			 * Any others generate an error, unless
1342 			 * RD_PROTDIS or RD_PROTDAT is set.
1343 			 */
1344 			if (stp->sd_read_opt & RD_PROTDAT) {
1345 				for (nbp = bp; nbp; nbp = nbp->b_next) {
1346 				    if ((nbp->b_datap->db_type == M_PROTO) ||
1347 					(nbp->b_datap->db_type == M_PCPROTO))
1348 					nbp->b_datap->db_type = M_DATA;
1349 				    else
1350 					break;
1351 				}
1352 				/*
1353 				 * clear stream head hi pri flag based on
1354 				 * first message
1355 				 */
1356 				if (type == M_PCPROTO) {
1357 					mutex_enter(&stp->sd_lock);
1358 					stp->sd_flag &= ~STRPRI;
1359 					mutex_exit(&stp->sd_lock);
1360 				}
1361 				goto ismdata;
1362 			} else if (stp->sd_read_opt & RD_PROTDIS) {
1363 				/*
1364 				 * discard non-data messages
1365 				 */
1366 				while (bp &&
1367 				    ((bp->b_datap->db_type == M_PROTO) ||
1368 				    (bp->b_datap->db_type == M_PCPROTO))) {
1369 					nbp = unlinkb(bp);
1370 					freeb(bp);
1371 					bp = nbp;
1372 				}
1373 				/*
1374 				 * clear stream head hi pri flag based on
1375 				 * first message
1376 				 */
1377 				if (type == M_PCPROTO) {
1378 					mutex_enter(&stp->sd_lock);
1379 					stp->sd_flag &= ~STRPRI;
1380 					mutex_exit(&stp->sd_lock);
1381 				}
1382 				if (bp) {
1383 					bp->b_band = pri;
1384 					goto ismdata;
1385 				} else {
1386 					break;
1387 				}
1388 			}
1389 			/* FALLTHRU */
1390 		case M_PASSFP:
1391 			if ((bp->b_datap->db_type == M_PASSFP) &&
1392 			    (stp->sd_read_opt & RD_PROTDIS)) {
1393 				freemsg(bp);
1394 				break;
1395 			}
1396 			mutex_enter(&stp->sd_lock);
1397 			putback(stp, q, bp, pri);
1398 			mutex_exit(&stp->sd_lock);
1399 			if (rflg == 0)
1400 				error = EBADMSG;
1401 			goto oops1;
1402 
1403 		default:
1404 			/*
1405 			 * Garbage on stream head read queue.
1406 			 */
1407 			cmn_err(CE_WARN, "bad %x found at stream head\n",
1408 				bp->b_datap->db_type);
1409 			freemsg(bp);
1410 			goto oops1;
1411 		}
1412 		mutex_enter(&stp->sd_lock);
1413 	}
1414 oops:
1415 	mutex_exit(&stp->sd_lock);
1416 oops1:
1417 	qbackenable(q, pri);
1418 	return (error);
1419 #undef	_LASTMARK
1420 }
1421 
1422 /*
1423  * Default processing of M_PROTO/M_PCPROTO messages.
1424  * Determine which wakeups and signals are needed.
1425  * This can be replaced by a user-specified procedure for kernel users
1426  * of STREAMS.
1427  */
1428 /* ARGSUSED */
1429 mblk_t *
1430 strrput_proto(vnode_t *vp, mblk_t *mp,
1431     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1432     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1433 {
1434 	*wakeups = RSLEEP;
1435 	*allmsgsigs = 0;
1436 
1437 	switch (mp->b_datap->db_type) {
1438 	case M_PROTO:
1439 		if (mp->b_band == 0) {
1440 			*firstmsgsigs = S_INPUT | S_RDNORM;
1441 			*pollwakeups = POLLIN | POLLRDNORM;
1442 		} else {
1443 			*firstmsgsigs = S_INPUT | S_RDBAND;
1444 			*pollwakeups = POLLIN | POLLRDBAND;
1445 		}
1446 		break;
1447 	case M_PCPROTO:
1448 		*firstmsgsigs = S_HIPRI;
1449 		*pollwakeups = POLLPRI;
1450 		break;
1451 	}
1452 	return (mp);
1453 }
1454 
1455 /*
1456  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1457  * M_PASSFP messages.
1458  * Determine which wakeups and signals are needed.
1459  * This can be replaced by a user-specified procedure for kernel users
1460  * of STREAMS.
1461  */
1462 /* ARGSUSED */
1463 mblk_t *
1464 strrput_misc(vnode_t *vp, mblk_t *mp,
1465     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1466     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1467 {
1468 	*wakeups = 0;
1469 	*firstmsgsigs = 0;
1470 	*allmsgsigs = 0;
1471 	*pollwakeups = 0;
1472 	return (mp);
1473 }
1474 
1475 /*
1476  * Stream read put procedure.  Called from downstream driver/module
1477  * with messages for the stream head.  Data, protocol, and in-stream
1478  * signal messages are placed on the queue, others are handled directly.
1479  */
1480 int
1481 strrput(queue_t *q, mblk_t *bp)
1482 {
1483 	struct stdata	*stp;
1484 	ulong_t		rput_opt;
1485 	strwakeup_t	wakeups;
1486 	strsigset_t	firstmsgsigs;	/* Signals if first message on queue */
1487 	strsigset_t	allmsgsigs;	/* Signals for all messages */
1488 	strsigset_t	signals;	/* Signals events to generate */
1489 	strpollset_t	pollwakeups;
1490 	mblk_t		*nextbp;
1491 	uchar_t		band = 0;
1492 	int		hipri_sig;
1493 
1494 	stp = (struct stdata *)q->q_ptr;
1495 	/*
1496 	 * Use rput_opt for optimized access to the SR_ flags except
1497 	 * SR_POLLIN. That flag has to be checked under sd_lock since it
1498 	 * is modified by strpoll().
1499 	 */
1500 	rput_opt = stp->sd_rput_opt;
1501 
1502 	ASSERT(qclaimed(q));
1503 	TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1504 		"strrput called with message type:q %p bp %p", q, bp);
1505 
1506 	/*
1507 	 * Perform initial processing and pass to the parameterized functions.
1508 	 */
1509 	ASSERT(bp->b_next == NULL);
1510 
1511 	switch (bp->b_datap->db_type) {
1512 	case M_DATA:
1513 		/*
1514 		 * sockfs is the only consumer of STREOF and when it is set,
1515 		 * it implies that the receiver is not interested in receiving
1516 		 * any more data, hence the mblk is freed to prevent unnecessary
1517 		 * message queueing at the stream head.
1518 		 */
1519 		if (stp->sd_flag == STREOF) {
1520 			freemsg(bp);
1521 			return (0);
1522 		}
1523 		if ((rput_opt & SR_IGN_ZEROLEN) &&
1524 		    bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1525 			/*
1526 			 * Ignore zero-length M_DATA messages. These might be
1527 			 * generated by some transports.
1528 			 * The zero-length M_DATA messages, even if they
1529 			 * are ignored, should effect the atmark tracking and
1530 			 * should wake up a thread sleeping in strwaitmark.
1531 			 */
1532 			mutex_enter(&stp->sd_lock);
1533 			if (bp->b_flag & MSGMARKNEXT) {
1534 				/*
1535 				 * Record the position of the mark either
1536 				 * in q_last or in STRATMARK.
1537 				 */
1538 				if (q->q_last != NULL) {
1539 					q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1540 					q->q_last->b_flag |= MSGMARKNEXT;
1541 				} else {
1542 					stp->sd_flag &= ~STRNOTATMARK;
1543 					stp->sd_flag |= STRATMARK;
1544 				}
1545 			} else if (bp->b_flag & MSGNOTMARKNEXT) {
1546 				/*
1547 				 * Record that this is not the position of
1548 				 * the mark either in q_last or in
1549 				 * STRNOTATMARK.
1550 				 */
1551 				if (q->q_last != NULL) {
1552 					q->q_last->b_flag &= ~MSGMARKNEXT;
1553 					q->q_last->b_flag |= MSGNOTMARKNEXT;
1554 				} else {
1555 					stp->sd_flag &= ~STRATMARK;
1556 					stp->sd_flag |= STRNOTATMARK;
1557 				}
1558 			}
1559 			if (stp->sd_flag & RSLEEP) {
1560 				stp->sd_flag &= ~RSLEEP;
1561 				cv_broadcast(&q->q_wait);
1562 			}
1563 			mutex_exit(&stp->sd_lock);
1564 			freemsg(bp);
1565 			return (0);
1566 		}
1567 		wakeups = RSLEEP;
1568 		if (bp->b_band == 0) {
1569 			firstmsgsigs = S_INPUT | S_RDNORM;
1570 			pollwakeups = POLLIN | POLLRDNORM;
1571 		} else {
1572 			firstmsgsigs = S_INPUT | S_RDBAND;
1573 			pollwakeups = POLLIN | POLLRDBAND;
1574 		}
1575 		if (rput_opt & SR_SIGALLDATA)
1576 			allmsgsigs = firstmsgsigs;
1577 		else
1578 			allmsgsigs = 0;
1579 
1580 		mutex_enter(&stp->sd_lock);
1581 		if ((rput_opt & SR_CONSOL_DATA) &&
1582 		    (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1583 			/*
1584 			 * Consolidate on M_DATA message onto an M_DATA,
1585 			 * M_PROTO, or M_PCPROTO by merging it with q_last.
1586 			 * The consolidation does not take place if
1587 			 * the old message is marked with either of the
1588 			 * marks or the delim flag or if the new
1589 			 * message is marked with MSGMARK. The MSGMARK
1590 			 * check is needed to handle the odd semantics of
1591 			 * MSGMARK where essentially the whole message
1592 			 * is to be treated as marked.
1593 			 * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1594 			 * new message to the front of the b_cont chain.
1595 			 */
1596 			mblk_t *lbp;
1597 
1598 			lbp = q->q_last;
1599 			if (lbp != NULL &&
1600 			    (lbp->b_datap->db_type == M_DATA ||
1601 			    lbp->b_datap->db_type == M_PROTO ||
1602 			    lbp->b_datap->db_type == M_PCPROTO) &&
1603 			    !(lbp->b_flag & (MSGDELIM|MSGMARK|
1604 			    MSGMARKNEXT))) {
1605 				rmvq_noenab(q, lbp);
1606 				/*
1607 				 * The first message in the b_cont list
1608 				 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1609 				 * We need to handle the case where we
1610 				 * are appending
1611 				 *
1612 				 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1613 				 * 2) a MSGMARKNEXT to a plain message.
1614 				 * 3) a MSGNOTMARKNEXT to a plain message
1615 				 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1616 				 *    message.
1617 				 *
1618 				 * Thus we never append a MSGMARKNEXT or
1619 				 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1620 				 */
1621 				if (bp->b_flag & MSGMARKNEXT) {
1622 					lbp->b_flag |= MSGMARKNEXT;
1623 					lbp->b_flag &= ~MSGNOTMARKNEXT;
1624 					bp->b_flag &= ~MSGMARKNEXT;
1625 				} else if (bp->b_flag & MSGNOTMARKNEXT) {
1626 					lbp->b_flag |= MSGNOTMARKNEXT;
1627 					bp->b_flag &= ~MSGNOTMARKNEXT;
1628 				}
1629 
1630 				linkb(lbp, bp);
1631 				bp = lbp;
1632 				/*
1633 				 * The new message logically isn't the first
1634 				 * even though the q_first check below thinks
1635 				 * it is. Clear the firstmsgsigs to make it
1636 				 * not appear to be first.
1637 				 */
1638 				firstmsgsigs = 0;
1639 			}
1640 		}
1641 		break;
1642 
1643 	case M_PASSFP:
1644 		wakeups = RSLEEP;
1645 		allmsgsigs = 0;
1646 		if (bp->b_band == 0) {
1647 			firstmsgsigs = S_INPUT | S_RDNORM;
1648 			pollwakeups = POLLIN | POLLRDNORM;
1649 		} else {
1650 			firstmsgsigs = S_INPUT | S_RDBAND;
1651 			pollwakeups = POLLIN | POLLRDBAND;
1652 		}
1653 		mutex_enter(&stp->sd_lock);
1654 		break;
1655 
1656 	case M_PROTO:
1657 	case M_PCPROTO:
1658 		ASSERT(stp->sd_rprotofunc != NULL);
1659 		bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1660 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1661 #define	ALLSIG	(S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1662 		S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1663 #define	ALLPOLL	(POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1664 		POLLWRBAND)
1665 
1666 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1667 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1668 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1669 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1670 
1671 		mutex_enter(&stp->sd_lock);
1672 		break;
1673 
1674 	default:
1675 		ASSERT(stp->sd_rmiscfunc != NULL);
1676 		bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1677 			&wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1678 		ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1679 		ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1680 		ASSERT((allmsgsigs & ~ALLSIG) == 0);
1681 		ASSERT((pollwakeups & ~ALLPOLL) == 0);
1682 #undef	ALLSIG
1683 #undef	ALLPOLL
1684 		mutex_enter(&stp->sd_lock);
1685 		break;
1686 	}
1687 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1688 
1689 	/* By default generate superset of signals */
1690 	signals = (firstmsgsigs | allmsgsigs);
1691 
1692 	/*
1693 	 * The  proto and misc functions can return multiple messages
1694 	 * as a b_next chain. Such messages are processed separately.
1695 	 */
1696 one_more:
1697 	hipri_sig = 0;
1698 	if (bp == NULL) {
1699 		nextbp = NULL;
1700 	} else {
1701 		nextbp = bp->b_next;
1702 		bp->b_next = NULL;
1703 
1704 		switch (bp->b_datap->db_type) {
1705 		case M_PCPROTO:
1706 			/*
1707 			 * Only one priority protocol message is allowed at the
1708 			 * stream head at a time.
1709 			 */
1710 			if (stp->sd_flag & STRPRI) {
1711 				TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1712 				    "M_PCPROTO already at head");
1713 				freemsg(bp);
1714 				mutex_exit(&stp->sd_lock);
1715 				goto done;
1716 			}
1717 			stp->sd_flag |= STRPRI;
1718 			hipri_sig = 1;
1719 			/* FALLTHRU */
1720 		case M_DATA:
1721 		case M_PROTO:
1722 		case M_PASSFP:
1723 			band = bp->b_band;
1724 			/*
1725 			 * Marking doesn't work well when messages
1726 			 * are marked in more than one band.  We only
1727 			 * remember the last message received, even if
1728 			 * it is placed on the queue ahead of other
1729 			 * marked messages.
1730 			 */
1731 			if (bp->b_flag & MSGMARK)
1732 				stp->sd_mark = bp;
1733 			(void) putq(q, bp);
1734 
1735 			/*
1736 			 * If message is a PCPROTO message, always use
1737 			 * firstmsgsigs to determine if a signal should be
1738 			 * sent as strrput is the only place to send
1739 			 * signals for PCPROTO. Other messages are based on
1740 			 * the STRGETINPROG flag. The flag determines if
1741 			 * strrput or (k)strgetmsg will be responsible for
1742 			 * sending the signals, in the firstmsgsigs case.
1743 			 */
1744 			if ((hipri_sig == 1) ||
1745 			    (((stp->sd_flag & STRGETINPROG) == 0) &&
1746 			    (q->q_first == bp)))
1747 				signals = (firstmsgsigs | allmsgsigs);
1748 			else
1749 				signals = allmsgsigs;
1750 			break;
1751 
1752 		default:
1753 			mutex_exit(&stp->sd_lock);
1754 			(void) strrput_nondata(q, bp);
1755 			mutex_enter(&stp->sd_lock);
1756 			break;
1757 		}
1758 	}
1759 	ASSERT(MUTEX_HELD(&stp->sd_lock));
1760 	/*
1761 	 * Wake sleeping read/getmsg and cancel deferred wakeup
1762 	 */
1763 	if (wakeups & RSLEEP)
1764 		stp->sd_wakeq &= ~RSLEEP;
1765 
1766 	wakeups &= stp->sd_flag;
1767 	if (wakeups & RSLEEP) {
1768 		stp->sd_flag &= ~RSLEEP;
1769 		cv_broadcast(&q->q_wait);
1770 	}
1771 	if (wakeups & WSLEEP) {
1772 		stp->sd_flag &= ~WSLEEP;
1773 		cv_broadcast(&_WR(q)->q_wait);
1774 	}
1775 
1776 	if (pollwakeups != 0) {
1777 		if (pollwakeups == (POLLIN | POLLRDNORM)) {
1778 			/*
1779 			 * Can't use rput_opt since it was not
1780 			 * read when sd_lock was held and SR_POLLIN is changed
1781 			 * by strpoll() under sd_lock.
1782 			 */
1783 			if (!(stp->sd_rput_opt & SR_POLLIN))
1784 				goto no_pollwake;
1785 			stp->sd_rput_opt &= ~SR_POLLIN;
1786 		}
1787 		mutex_exit(&stp->sd_lock);
1788 		pollwakeup(&stp->sd_pollist, pollwakeups);
1789 		mutex_enter(&stp->sd_lock);
1790 	}
1791 no_pollwake:
1792 
1793 	/*
1794 	 * strsendsig can handle multiple signals with a
1795 	 * single call.
1796 	 */
1797 	if (stp->sd_sigflags & signals)
1798 		strsendsig(stp->sd_siglist, signals, band, 0);
1799 	mutex_exit(&stp->sd_lock);
1800 
1801 
1802 done:
1803 	if (nextbp == NULL)
1804 		return (0);
1805 
1806 	/*
1807 	 * Any signals were handled the first time.
1808 	 * Wakeups and pollwakeups are redone to avoid any race
1809 	 * conditions - all the messages are not queued until the
1810 	 * last message has been processed by strrput.
1811 	 */
1812 	bp = nextbp;
1813 	signals = firstmsgsigs = allmsgsigs = 0;
1814 	mutex_enter(&stp->sd_lock);
1815 	goto one_more;
1816 }
1817 
1818 static void
1819 log_dupioc(queue_t *rq, mblk_t *bp)
1820 {
1821 	queue_t *wq, *qp;
1822 	char *modnames, *mnp, *dname;
1823 	size_t maxmodstr;
1824 	boolean_t islast;
1825 
1826 	/*
1827 	 * Allocate a buffer large enough to hold the names of nstrpush modules
1828 	 * and one driver, with spaces between and NUL terminator.  If we can't
1829 	 * get memory, then we'll just log the driver name.
1830 	 */
1831 	maxmodstr = nstrpush * (FMNAMESZ + 1);
1832 	mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1833 
1834 	/* march down write side to print log message down to the driver */
1835 	wq = WR(rq);
1836 
1837 	/* make sure q_next doesn't shift around while we're grabbing data */
1838 	claimstr(wq);
1839 	qp = wq->q_next;
1840 	do {
1841 		if ((dname = qp->q_qinfo->qi_minfo->mi_idname) == NULL)
1842 			dname = "?";
1843 		islast = !SAMESTR(qp) || qp->q_next == NULL;
1844 		if (modnames == NULL) {
1845 			/*
1846 			 * If we don't have memory, then get the driver name in
1847 			 * the log where we can see it.  Note that memory
1848 			 * pressure is a possible cause of these sorts of bugs.
1849 			 */
1850 			if (islast) {
1851 				modnames = dname;
1852 				maxmodstr = 0;
1853 			}
1854 		} else {
1855 			mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1856 			if (!islast)
1857 				*mnp++ = ' ';
1858 		}
1859 		qp = qp->q_next;
1860 	} while (!islast);
1861 	releasestr(wq);
1862 	/* Cannot happen unless stream head is corrupt. */
1863 	ASSERT(modnames != NULL);
1864 	(void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1865 	    SL_CONSOLE|SL_TRACE|SL_ERROR,
1866 	    "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1867 	    rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1868 	    (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1869 	if (maxmodstr != 0)
1870 		kmem_free(modnames, maxmodstr);
1871 }
1872 
1873 int
1874 strrput_nondata(queue_t *q, mblk_t *bp)
1875 {
1876 	struct stdata *stp;
1877 	struct iocblk *iocbp;
1878 	struct stroptions *sop;
1879 	struct copyreq *reqp;
1880 	struct copyresp *resp;
1881 	unsigned char bpri;
1882 	unsigned char  flushed_already = 0;
1883 
1884 	stp = (struct stdata *)q->q_ptr;
1885 
1886 	ASSERT(!(stp->sd_flag & STPLEX));
1887 	ASSERT(qclaimed(q));
1888 
1889 	switch (bp->b_datap->db_type) {
1890 	case M_ERROR:
1891 		/*
1892 		 * An error has occurred downstream, the errno is in the first
1893 		 * bytes of the message.
1894 		 */
1895 		if ((bp->b_wptr - bp->b_rptr) == 2) {	/* New flavor */
1896 			unsigned char rw = 0;
1897 
1898 			mutex_enter(&stp->sd_lock);
1899 			if (*bp->b_rptr != NOERROR) {	/* read error */
1900 				if (*bp->b_rptr != 0) {
1901 					if (stp->sd_flag & STRDERR)
1902 						flushed_already |= FLUSHR;
1903 					stp->sd_flag |= STRDERR;
1904 					rw |= FLUSHR;
1905 				} else {
1906 					stp->sd_flag &= ~STRDERR;
1907 				}
1908 				stp->sd_rerror = *bp->b_rptr;
1909 			}
1910 			bp->b_rptr++;
1911 			if (*bp->b_rptr != NOERROR) {	/* write error */
1912 				if (*bp->b_rptr != 0) {
1913 					if (stp->sd_flag & STWRERR)
1914 						flushed_already |= FLUSHW;
1915 					stp->sd_flag |= STWRERR;
1916 					rw |= FLUSHW;
1917 				} else {
1918 					stp->sd_flag &= ~STWRERR;
1919 				}
1920 				stp->sd_werror = *bp->b_rptr;
1921 			}
1922 			if (rw) {
1923 				TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
1924 					"strrput cv_broadcast:q %p, bp %p",
1925 					q, bp);
1926 				cv_broadcast(&q->q_wait); /* readers */
1927 				cv_broadcast(&_WR(q)->q_wait); /* writers */
1928 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1929 
1930 				mutex_exit(&stp->sd_lock);
1931 				pollwakeup(&stp->sd_pollist, POLLERR);
1932 				mutex_enter(&stp->sd_lock);
1933 
1934 				if (stp->sd_sigflags & S_ERROR)
1935 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1936 					    ((rw & FLUSHR) ? stp->sd_rerror :
1937 					    stp->sd_werror));
1938 				mutex_exit(&stp->sd_lock);
1939 				/*
1940 				 * Send the M_FLUSH only
1941 				 * for the first M_ERROR
1942 				 * message on the stream
1943 				 */
1944 				if (flushed_already == rw) {
1945 					freemsg(bp);
1946 					return (0);
1947 				}
1948 
1949 				bp->b_datap->db_type = M_FLUSH;
1950 				*bp->b_rptr = rw;
1951 				bp->b_wptr = bp->b_rptr + 1;
1952 				/*
1953 				 * Protect against the driver
1954 				 * passing up messages after
1955 				 * it has done a qprocsoff
1956 				 */
1957 				if (_OTHERQ(q)->q_next == NULL)
1958 					freemsg(bp);
1959 				else
1960 					qreply(q, bp);
1961 				return (0);
1962 			} else
1963 				mutex_exit(&stp->sd_lock);
1964 		} else if (*bp->b_rptr != 0) {		/* Old flavor */
1965 				if (stp->sd_flag & (STRDERR|STWRERR))
1966 					flushed_already = FLUSHRW;
1967 				mutex_enter(&stp->sd_lock);
1968 				stp->sd_flag |= (STRDERR|STWRERR);
1969 				stp->sd_rerror = *bp->b_rptr;
1970 				stp->sd_werror = *bp->b_rptr;
1971 				TRACE_2(TR_FAC_STREAMS_FR,
1972 					TR_STRRPUT_WAKE2,
1973 					"strrput wakeup #2:q %p, bp %p", q, bp);
1974 				cv_broadcast(&q->q_wait); /* the readers */
1975 				cv_broadcast(&_WR(q)->q_wait); /* the writers */
1976 				cv_broadcast(&stp->sd_monitor); /* ioctllers */
1977 
1978 				mutex_exit(&stp->sd_lock);
1979 				pollwakeup(&stp->sd_pollist, POLLERR);
1980 				mutex_enter(&stp->sd_lock);
1981 
1982 				if (stp->sd_sigflags & S_ERROR)
1983 					strsendsig(stp->sd_siglist, S_ERROR, 0,
1984 					    (stp->sd_werror ? stp->sd_werror :
1985 					    stp->sd_rerror));
1986 				mutex_exit(&stp->sd_lock);
1987 
1988 				/*
1989 				 * Send the M_FLUSH only
1990 				 * for the first M_ERROR
1991 				 * message on the stream
1992 				 */
1993 				if (flushed_already != FLUSHRW) {
1994 					bp->b_datap->db_type = M_FLUSH;
1995 					*bp->b_rptr = FLUSHRW;
1996 					/*
1997 					 * Protect against the driver passing up
1998 					 * messages after it has done a
1999 					 * qprocsoff.
2000 					 */
2001 				if (_OTHERQ(q)->q_next == NULL)
2002 					freemsg(bp);
2003 				else
2004 					qreply(q, bp);
2005 				return (0);
2006 				}
2007 		}
2008 		freemsg(bp);
2009 		return (0);
2010 
2011 	case M_HANGUP:
2012 
2013 		freemsg(bp);
2014 		mutex_enter(&stp->sd_lock);
2015 		stp->sd_werror = ENXIO;
2016 		stp->sd_flag |= STRHUP;
2017 		stp->sd_flag &= ~(WSLEEP|RSLEEP);
2018 
2019 		/*
2020 		 * send signal if controlling tty
2021 		 */
2022 
2023 		if (stp->sd_sidp) {
2024 			prsignal(stp->sd_sidp, SIGHUP);
2025 			if (stp->sd_sidp != stp->sd_pgidp)
2026 				pgsignal(stp->sd_pgidp, SIGTSTP);
2027 		}
2028 
2029 		/*
2030 		 * wake up read, write, and exception pollers and
2031 		 * reset wakeup mechanism.
2032 		 */
2033 		cv_broadcast(&q->q_wait);	/* the readers */
2034 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
2035 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
2036 		mutex_exit(&stp->sd_lock);
2037 		strhup(stp);
2038 		return (0);
2039 
2040 	case M_UNHANGUP:
2041 		freemsg(bp);
2042 		mutex_enter(&stp->sd_lock);
2043 		stp->sd_werror = 0;
2044 		stp->sd_flag &= ~STRHUP;
2045 		mutex_exit(&stp->sd_lock);
2046 		return (0);
2047 
2048 	case M_SIG:
2049 		/*
2050 		 * Someone downstream wants to post a signal.  The
2051 		 * signal to post is contained in the first byte of the
2052 		 * message.  If the message would go on the front of
2053 		 * the queue, send a signal to the process group
2054 		 * (if not SIGPOLL) or to the siglist processes
2055 		 * (SIGPOLL).  If something is already on the queue,
2056 		 * OR if we are delivering a delayed suspend (*sigh*
2057 		 * another "tty" hack) and there's no one sleeping already,
2058 		 * just enqueue the message.
2059 		 */
2060 		mutex_enter(&stp->sd_lock);
2061 		if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2062 		    !(stp->sd_flag & RSLEEP))) {
2063 			(void) putq(q, bp);
2064 			mutex_exit(&stp->sd_lock);
2065 			return (0);
2066 		}
2067 		mutex_exit(&stp->sd_lock);
2068 		/* FALLTHRU */
2069 
2070 	case M_PCSIG:
2071 		/*
2072 		 * Don't enqueue, just post the signal.
2073 		 */
2074 		strsignal(stp, *bp->b_rptr, 0L);
2075 		freemsg(bp);
2076 		return (0);
2077 
2078 	case M_FLUSH:
2079 		/*
2080 		 * Flush queues.  The indication of which queues to flush
2081 		 * is in the first byte of the message.  If the read queue
2082 		 * is specified, then flush it.  If FLUSHBAND is set, just
2083 		 * flush the band specified by the second byte of the message.
2084 		 *
2085 		 * If a module has issued a M_SETOPT to not flush hi
2086 		 * priority messages off of the stream head, then pass this
2087 		 * flag into the flushq code to preserve such messages.
2088 		 */
2089 
2090 		if (*bp->b_rptr & FLUSHR) {
2091 			mutex_enter(&stp->sd_lock);
2092 			if (*bp->b_rptr & FLUSHBAND) {
2093 				ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2094 				flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2095 			} else
2096 				flushq_common(q, FLUSHALL,
2097 				    stp->sd_read_opt & RFLUSHPCPROT);
2098 			if ((q->q_first == NULL) ||
2099 			    (q->q_first->b_datap->db_type < QPCTL))
2100 				stp->sd_flag &= ~STRPRI;
2101 			else {
2102 				ASSERT(stp->sd_flag & STRPRI);
2103 			}
2104 			mutex_exit(&stp->sd_lock);
2105 		}
2106 		if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2107 			*bp->b_rptr &= ~FLUSHR;
2108 			bp->b_flag |= MSGNOLOOP;
2109 			/*
2110 			 * Protect against the driver passing up
2111 			 * messages after it has done a qprocsoff.
2112 			 */
2113 			if (_OTHERQ(q)->q_next == NULL)
2114 				freemsg(bp);
2115 			else
2116 				qreply(q, bp);
2117 			return (0);
2118 		}
2119 		freemsg(bp);
2120 		return (0);
2121 
2122 	case M_IOCACK:
2123 	case M_IOCNAK:
2124 		iocbp = (struct iocblk *)bp->b_rptr;
2125 		/*
2126 		 * If not waiting for ACK or NAK then just free msg.
2127 		 * If incorrect id sequence number then just free msg.
2128 		 * If already have ACK or NAK for user then this is a
2129 		 *    duplicate, display a warning and free the msg.
2130 		 */
2131 		mutex_enter(&stp->sd_lock);
2132 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2133 		    (stp->sd_iocid != iocbp->ioc_id)) {
2134 			/*
2135 			 * If the ACK/NAK is a dup, display a message
2136 			 * Dup is when sd_iocid == ioc_id, and
2137 			 * sd_iocblk == <valid ptr> or -1 (the former
2138 			 * is when an ioctl has been put on the stream
2139 			 * head, but has not yet been consumed, the
2140 			 * later is when it has been consumed).
2141 			 */
2142 			if ((stp->sd_iocid == iocbp->ioc_id) &&
2143 			    (stp->sd_iocblk != NULL)) {
2144 				log_dupioc(q, bp);
2145 			}
2146 			freemsg(bp);
2147 			mutex_exit(&stp->sd_lock);
2148 			return (0);
2149 		}
2150 
2151 		/*
2152 		 * Assign ACK or NAK to user and wake up.
2153 		 */
2154 		stp->sd_iocblk = bp;
2155 		cv_broadcast(&stp->sd_monitor);
2156 		mutex_exit(&stp->sd_lock);
2157 		return (0);
2158 
2159 	case M_COPYIN:
2160 	case M_COPYOUT:
2161 		reqp = (struct copyreq *)bp->b_rptr;
2162 
2163 		/*
2164 		 * If not waiting for ACK or NAK then just fail request.
2165 		 * If already have ACK, NAK, or copy request, then just
2166 		 * fail request.
2167 		 * If incorrect id sequence number then just fail request.
2168 		 */
2169 		mutex_enter(&stp->sd_lock);
2170 		if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2171 		    (stp->sd_iocid != reqp->cq_id)) {
2172 			if (bp->b_cont) {
2173 				freemsg(bp->b_cont);
2174 				bp->b_cont = NULL;
2175 			}
2176 			bp->b_datap->db_type = M_IOCDATA;
2177 			bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2178 			resp = (struct copyresp *)bp->b_rptr;
2179 			resp->cp_rval = (caddr_t)1;	/* failure */
2180 			mutex_exit(&stp->sd_lock);
2181 			putnext(stp->sd_wrq, bp);
2182 			return (0);
2183 		}
2184 
2185 		/*
2186 		 * Assign copy request to user and wake up.
2187 		 */
2188 		stp->sd_iocblk = bp;
2189 		cv_broadcast(&stp->sd_monitor);
2190 		mutex_exit(&stp->sd_lock);
2191 		return (0);
2192 
2193 	case M_SETOPTS:
2194 		/*
2195 		 * Set stream head options (read option, write offset,
2196 		 * min/max packet size, and/or high/low water marks for
2197 		 * the read side only).
2198 		 */
2199 
2200 		bpri = 0;
2201 		sop = (struct stroptions *)bp->b_rptr;
2202 		mutex_enter(&stp->sd_lock);
2203 		if (sop->so_flags & SO_READOPT) {
2204 			switch (sop->so_readopt & RMODEMASK) {
2205 			case RNORM:
2206 				stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2207 				break;
2208 
2209 			case RMSGD:
2210 				stp->sd_read_opt =
2211 				    ((stp->sd_read_opt & ~RD_MSGNODIS) |
2212 				    RD_MSGDIS);
2213 				break;
2214 
2215 			case RMSGN:
2216 				stp->sd_read_opt =
2217 				    ((stp->sd_read_opt & ~RD_MSGDIS) |
2218 				    RD_MSGNODIS);
2219 				break;
2220 			}
2221 			switch (sop->so_readopt & RPROTMASK) {
2222 			case RPROTNORM:
2223 				stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2224 				break;
2225 
2226 			case RPROTDAT:
2227 				stp->sd_read_opt =
2228 				    ((stp->sd_read_opt & ~RD_PROTDIS) |
2229 				    RD_PROTDAT);
2230 				break;
2231 
2232 			case RPROTDIS:
2233 				stp->sd_read_opt =
2234 				    ((stp->sd_read_opt & ~RD_PROTDAT) |
2235 				    RD_PROTDIS);
2236 				break;
2237 			}
2238 			switch (sop->so_readopt & RFLUSHMASK) {
2239 			case RFLUSHPCPROT:
2240 				/*
2241 				 * This sets the stream head to NOT flush
2242 				 * M_PCPROTO messages.
2243 				 */
2244 				stp->sd_read_opt |= RFLUSHPCPROT;
2245 				break;
2246 			}
2247 		}
2248 		if (sop->so_flags & SO_ERROPT) {
2249 			switch (sop->so_erropt & RERRMASK) {
2250 			case RERRNORM:
2251 				stp->sd_flag &= ~STRDERRNONPERSIST;
2252 				break;
2253 			case RERRNONPERSIST:
2254 				stp->sd_flag |= STRDERRNONPERSIST;
2255 				break;
2256 			}
2257 			switch (sop->so_erropt & WERRMASK) {
2258 			case WERRNORM:
2259 				stp->sd_flag &= ~STWRERRNONPERSIST;
2260 				break;
2261 			case WERRNONPERSIST:
2262 				stp->sd_flag |= STWRERRNONPERSIST;
2263 				break;
2264 			}
2265 		}
2266 		if (sop->so_flags & SO_COPYOPT) {
2267 			if (sop->so_copyopt & ZCVMSAFE) {
2268 				stp->sd_copyflag |= STZCVMSAFE;
2269 				stp->sd_copyflag &= ~STZCVMUNSAFE;
2270 			} else if (sop->so_copyopt & ZCVMUNSAFE) {
2271 				stp->sd_copyflag |= STZCVMUNSAFE;
2272 				stp->sd_copyflag &= ~STZCVMSAFE;
2273 			}
2274 
2275 			if (sop->so_copyopt & COPYCACHED) {
2276 				stp->sd_copyflag |= STRCOPYCACHED;
2277 			}
2278 		}
2279 		if (sop->so_flags & SO_WROFF)
2280 			stp->sd_wroff = sop->so_wroff;
2281 		if (sop->so_flags & SO_MINPSZ)
2282 			q->q_minpsz = sop->so_minpsz;
2283 		if (sop->so_flags & SO_MAXPSZ)
2284 			q->q_maxpsz = sop->so_maxpsz;
2285 		if (sop->so_flags & SO_MAXBLK)
2286 			stp->sd_maxblk = sop->so_maxblk;
2287 		if (sop->so_flags & SO_HIWAT) {
2288 		    if (sop->so_flags & SO_BAND) {
2289 			if (strqset(q, QHIWAT, sop->so_band, sop->so_hiwat))
2290 				cmn_err(CE_WARN,
2291 				    "strrput: could not allocate qband\n");
2292 			else
2293 				bpri = sop->so_band;
2294 		    } else {
2295 			q->q_hiwat = sop->so_hiwat;
2296 		    }
2297 		}
2298 		if (sop->so_flags & SO_LOWAT) {
2299 		    if (sop->so_flags & SO_BAND) {
2300 			if (strqset(q, QLOWAT, sop->so_band, sop->so_lowat))
2301 				cmn_err(CE_WARN,
2302 				    "strrput: could not allocate qband\n");
2303 			else
2304 				bpri = sop->so_band;
2305 		    } else {
2306 			q->q_lowat = sop->so_lowat;
2307 		    }
2308 		}
2309 		if (sop->so_flags & SO_MREADON)
2310 			stp->sd_flag |= SNDMREAD;
2311 		if (sop->so_flags & SO_MREADOFF)
2312 			stp->sd_flag &= ~SNDMREAD;
2313 		if (sop->so_flags & SO_NDELON)
2314 			stp->sd_flag |= OLDNDELAY;
2315 		if (sop->so_flags & SO_NDELOFF)
2316 			stp->sd_flag &= ~OLDNDELAY;
2317 		if (sop->so_flags & SO_ISTTY)
2318 			stp->sd_flag |= STRISTTY;
2319 		if (sop->so_flags & SO_ISNTTY)
2320 			stp->sd_flag &= ~STRISTTY;
2321 		if (sop->so_flags & SO_TOSTOP)
2322 			stp->sd_flag |= STRTOSTOP;
2323 		if (sop->so_flags & SO_TONSTOP)
2324 			stp->sd_flag &= ~STRTOSTOP;
2325 		if (sop->so_flags & SO_DELIM)
2326 			stp->sd_flag |= STRDELIM;
2327 		if (sop->so_flags & SO_NODELIM)
2328 			stp->sd_flag &= ~STRDELIM;
2329 
2330 		mutex_exit(&stp->sd_lock);
2331 		freemsg(bp);
2332 
2333 		/* Check backenable in case the water marks changed */
2334 		qbackenable(q, bpri);
2335 		return (0);
2336 
2337 	/*
2338 	 * The following set of cases deal with situations where two stream
2339 	 * heads are connected to each other (twisted streams).  These messages
2340 	 * have no meaning at the stream head.
2341 	 */
2342 	case M_BREAK:
2343 	case M_CTL:
2344 	case M_DELAY:
2345 	case M_START:
2346 	case M_STOP:
2347 	case M_IOCDATA:
2348 	case M_STARTI:
2349 	case M_STOPI:
2350 		freemsg(bp);
2351 		return (0);
2352 
2353 	case M_IOCTL:
2354 		/*
2355 		 * Always NAK this condition
2356 		 * (makes no sense)
2357 		 * If there is one or more threads in the read side
2358 		 * rwnext we have to defer the nacking until that thread
2359 		 * returns (in strget).
2360 		 */
2361 		mutex_enter(&stp->sd_lock);
2362 		if (stp->sd_struiodnak != 0) {
2363 			/*
2364 			 * Defer NAK to the streamhead. Queue at the end
2365 			 * the list.
2366 			 */
2367 			mblk_t *mp = stp->sd_struionak;
2368 
2369 			while (mp && mp->b_next)
2370 				mp = mp->b_next;
2371 			if (mp)
2372 				mp->b_next = bp;
2373 			else
2374 				stp->sd_struionak = bp;
2375 			bp->b_next = NULL;
2376 			mutex_exit(&stp->sd_lock);
2377 			return (0);
2378 		}
2379 		mutex_exit(&stp->sd_lock);
2380 
2381 		bp->b_datap->db_type = M_IOCNAK;
2382 		/*
2383 		 * Protect against the driver passing up
2384 		 * messages after it has done a qprocsoff.
2385 		 */
2386 		if (_OTHERQ(q)->q_next == NULL)
2387 			freemsg(bp);
2388 		else
2389 			qreply(q, bp);
2390 		return (0);
2391 
2392 	default:
2393 #ifdef DEBUG
2394 		cmn_err(CE_WARN,
2395 			"bad message type %x received at stream head\n",
2396 			bp->b_datap->db_type);
2397 #endif
2398 		freemsg(bp);
2399 		return (0);
2400 	}
2401 
2402 	/* NOTREACHED */
2403 }
2404 
2405 /*
2406  * Check if the stream pointed to by `stp' can be written to, and return an
2407  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2408  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2409  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2410  */
2411 static int
2412 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2413 {
2414 	int error;
2415 
2416 	ASSERT(MUTEX_HELD(&stp->sd_lock));
2417 
2418 	/*
2419 	 * For modem support, POSIX states that on writes, EIO should
2420 	 * be returned if the stream has been hung up.
2421 	 */
2422 	if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2423 		error = EIO;
2424 	else
2425 		error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2426 
2427 	if (error != 0) {
2428 		if (!(stp->sd_flag & STPLEX) &&
2429 		    (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2430 			tsignal(curthread, SIGPIPE);
2431 			error = EPIPE;
2432 		}
2433 	}
2434 
2435 	return (error);
2436 }
2437 
2438 /*
2439  * Copyin and send data down a stream.
2440  * The caller will allocate and copyin any control part that precedes the
2441  * message and pass than in as mctl.
2442  *
2443  * Caller should *not* hold sd_lock.
2444  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2445  * under sd_lock in order to avoid missing a backenabling wakeup.
2446  *
2447  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2448  *
2449  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2450  * For sync streams we can only ignore flow control by reverting to using
2451  * putnext.
2452  *
2453  * If sd_maxblk is less than *iosize this routine might return without
2454  * transferring all of *iosize. In all cases, on return *iosize will contain
2455  * the amount of data that was transferred.
2456  */
2457 static int
2458 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2459     int b_flag, int pri, int flags)
2460 {
2461 	struiod_t uiod;
2462 	mblk_t *mp;
2463 	queue_t *wqp = stp->sd_wrq;
2464 	int error = 0;
2465 	ssize_t count = *iosize;
2466 	cred_t *cr;
2467 
2468 	ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2469 
2470 	if (uiop != NULL && count >= 0)
2471 		flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2472 
2473 	if (!(flags & STRUIO_POSTPONE)) {
2474 		/*
2475 		 * Use regular canputnext, strmakedata, putnext sequence.
2476 		 */
2477 		if (pri == 0) {
2478 			if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2479 				freemsg(mctl);
2480 				return (EWOULDBLOCK);
2481 			}
2482 		} else {
2483 			if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2484 				freemsg(mctl);
2485 				return (EWOULDBLOCK);
2486 			}
2487 		}
2488 
2489 		if ((error = strmakedata(iosize, uiop, stp, flags,
2490 					&mp)) != 0) {
2491 			freemsg(mctl);
2492 			/*
2493 			 * need to change return code to ENOMEM
2494 			 * so that this is not confused with
2495 			 * flow control, EAGAIN.
2496 			 */
2497 
2498 			if (error == EAGAIN)
2499 				return (ENOMEM);
2500 			else
2501 				return (error);
2502 		}
2503 		if (mctl != NULL) {
2504 			if (mctl->b_cont == NULL)
2505 				mctl->b_cont = mp;
2506 			else if (mp != NULL)
2507 				linkb(mctl, mp);
2508 			mp = mctl;
2509 			/*
2510 			 * Note that for interrupt thread, the CRED() is
2511 			 * NULL. Don't bother with the pid either.
2512 			 */
2513 			if ((cr = CRED()) != NULL) {
2514 				mblk_setcred(mp, cr);
2515 				DB_CPID(mp) = curproc->p_pid;
2516 			}
2517 		} else if (mp == NULL)
2518 			return (0);
2519 
2520 		mp->b_flag |= b_flag;
2521 		mp->b_band = (uchar_t)pri;
2522 
2523 		if (flags & MSG_IGNFLOW) {
2524 			/*
2525 			 * XXX Hack: Don't get stuck running service
2526 			 * procedures. This is needed for sockfs when
2527 			 * sending the unbind message out of the rput
2528 			 * procedure - we don't want a put procedure
2529 			 * to run service procedures.
2530 			 */
2531 			putnext(wqp, mp);
2532 		} else {
2533 			stream_willservice(stp);
2534 			putnext(wqp, mp);
2535 			stream_runservice(stp);
2536 		}
2537 		return (0);
2538 	}
2539 	/*
2540 	 * Stream supports rwnext() for the write side.
2541 	 */
2542 	if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2543 		freemsg(mctl);
2544 		/*
2545 		 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2546 		 */
2547 		return (error == EAGAIN ? ENOMEM : error);
2548 	}
2549 	if (mctl != NULL) {
2550 		if (mctl->b_cont == NULL)
2551 			mctl->b_cont = mp;
2552 		else if (mp != NULL)
2553 			linkb(mctl, mp);
2554 		mp = mctl;
2555 		/*
2556 		 * Note that for interrupt thread, the CRED() is
2557 		 * NULL.  Don't bother with the pid either.
2558 		 */
2559 		if ((cr = CRED()) != NULL) {
2560 			mblk_setcred(mp, cr);
2561 			DB_CPID(mp) = curproc->p_pid;
2562 		}
2563 	} else if (mp == NULL) {
2564 		return (0);
2565 	}
2566 
2567 	mp->b_flag |= b_flag;
2568 	mp->b_band = (uchar_t)pri;
2569 
2570 	(void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2571 		sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2572 	uiod.d_uio.uio_offset = 0;
2573 	uiod.d_mp = mp;
2574 	error = rwnext(wqp, &uiod);
2575 	if (! uiod.d_mp) {
2576 		uioskip(uiop, *iosize);
2577 		return (error);
2578 	}
2579 	ASSERT(mp == uiod.d_mp);
2580 	if (error == EINVAL) {
2581 		/*
2582 		 * The stream plumbing must have changed while
2583 		 * we were away, so just turn off rwnext()s.
2584 		 */
2585 		error = 0;
2586 	} else if (error == EBUSY || error == EWOULDBLOCK) {
2587 		/*
2588 		 * Couldn't enter a perimeter or took a page fault,
2589 		 * so fall-back to putnext().
2590 		 */
2591 		error = 0;
2592 	} else {
2593 		freemsg(mp);
2594 		return (error);
2595 	}
2596 	/* Have to check canput before consuming data from the uio */
2597 	if (pri == 0) {
2598 		if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2599 			freemsg(mp);
2600 			return (EWOULDBLOCK);
2601 		}
2602 	} else {
2603 		if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2604 			freemsg(mp);
2605 			return (EWOULDBLOCK);
2606 		}
2607 	}
2608 	ASSERT(mp == uiod.d_mp);
2609 	/* Copyin data from the uio */
2610 	if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2611 		freemsg(mp);
2612 		return (error);
2613 	}
2614 	uioskip(uiop, *iosize);
2615 	if (flags & MSG_IGNFLOW) {
2616 		/*
2617 		 * XXX Hack: Don't get stuck running service procedures.
2618 		 * This is needed for sockfs when sending the unbind message
2619 		 * out of the rput procedure - we don't want a put procedure
2620 		 * to run service procedures.
2621 		 */
2622 		putnext(wqp, mp);
2623 	} else {
2624 		stream_willservice(stp);
2625 		putnext(wqp, mp);
2626 		stream_runservice(stp);
2627 	}
2628 	return (0);
2629 }
2630 
2631 /*
2632  * Write attempts to break the write request into messages conforming
2633  * with the minimum and maximum packet sizes set downstream.
2634  *
2635  * Write will not block if downstream queue is full and
2636  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2637  *
2638  * A write of zero bytes gets packaged into a zero length message and sent
2639  * downstream like any other message.
2640  *
2641  * If buffers of the requested sizes are not available, the write will
2642  * sleep until the buffers become available.
2643  *
2644  * Write (if specified) will supply a write offset in a message if it
2645  * makes sense. This can be specified by downstream modules as part of
2646  * a M_SETOPTS message.  Write will not supply the write offset if it
2647  * cannot supply any data in a buffer.  In other words, write will never
2648  * send down an empty packet due to a write offset.
2649  */
2650 /* ARGSUSED2 */
2651 int
2652 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2653 {
2654 	struct stdata *stp;
2655 	struct queue *wqp;
2656 	ssize_t rmin, rmax;
2657 	ssize_t iosize;
2658 	char waitflag;
2659 	int tempmode;
2660 	int error = 0;
2661 	int b_flag;
2662 
2663 	ASSERT(vp->v_stream);
2664 	stp = vp->v_stream;
2665 
2666 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
2667 		if ((error = straccess(stp, JCWRITE)) != 0)
2668 			return (error);
2669 
2670 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2671 		mutex_enter(&stp->sd_lock);
2672 		error = strwriteable(stp, B_TRUE, B_TRUE);
2673 		mutex_exit(&stp->sd_lock);
2674 		if (error != 0)
2675 			return (error);
2676 	}
2677 
2678 	wqp = stp->sd_wrq;
2679 
2680 	/* get these values from them cached in the stream head */
2681 	rmin = stp->sd_qn_minpsz;
2682 	rmax = stp->sd_qn_maxpsz;
2683 
2684 	/*
2685 	 * Check the min/max packet size constraints.  If min packet size
2686 	 * is non-zero, the write cannot be split into multiple messages
2687 	 * and still guarantee the size constraints.
2688 	 */
2689 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2690 
2691 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
2692 	if (rmax == 0) {
2693 		return (0);
2694 	}
2695 	if (rmin > 0) {
2696 		if (uiop->uio_resid < rmin) {
2697 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2698 				"strwrite out:q %p out %d error %d",
2699 				wqp, 0, ERANGE);
2700 			return (ERANGE);
2701 		}
2702 		if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2703 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2704 				"strwrite out:q %p out %d error %d",
2705 				wqp, 1, ERANGE);
2706 			return (ERANGE);
2707 		}
2708 	}
2709 
2710 	/*
2711 	 * Do until count satisfied or error.
2712 	 */
2713 	waitflag = WRITEWAIT;
2714 	if (stp->sd_flag & OLDNDELAY)
2715 		tempmode = uiop->uio_fmode & ~FNDELAY;
2716 	else
2717 		tempmode = uiop->uio_fmode;
2718 
2719 	if (rmax == INFPSZ)
2720 		rmax = uiop->uio_resid;
2721 
2722 	/*
2723 	 * Note that tempmode does not get used in strput/strmakedata
2724 	 * but only in strwaitq. The other routines use uio_fmode
2725 	 * unmodified.
2726 	 */
2727 
2728 	/* LINTED: constant in conditional context */
2729 	while (1) {	/* breaks when uio_resid reaches zero */
2730 		/*
2731 		 * Determine the size of the next message to be
2732 		 * packaged.  May have to break write into several
2733 		 * messages based on max packet size.
2734 		 */
2735 		iosize = MIN(uiop->uio_resid, rmax);
2736 
2737 		/*
2738 		 * Put block downstream when flow control allows it.
2739 		 */
2740 		if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2741 			b_flag = MSGDELIM;
2742 		else
2743 			b_flag = 0;
2744 
2745 		for (;;) {
2746 			int done = 0;
2747 
2748 			error = strput(stp, NULL, uiop, &iosize, b_flag,
2749 				0, 0);
2750 			if (error == 0)
2751 				break;
2752 			if (error != EWOULDBLOCK)
2753 				goto out;
2754 
2755 			mutex_enter(&stp->sd_lock);
2756 			/*
2757 			 * Check for a missed wakeup.
2758 			 * Needed since strput did not hold sd_lock across
2759 			 * the canputnext.
2760 			 */
2761 			if (canputnext(wqp)) {
2762 				/* Try again */
2763 				mutex_exit(&stp->sd_lock);
2764 				continue;
2765 			}
2766 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2767 				"strwrite wait:q %p wait", wqp);
2768 			if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2769 			    tempmode, -1, &done)) != 0 || done) {
2770 				mutex_exit(&stp->sd_lock);
2771 				if ((vp->v_type == VFIFO) &&
2772 				    (uiop->uio_fmode & FNDELAY) &&
2773 				    (error == EAGAIN))
2774 					error = 0;
2775 				goto out;
2776 			}
2777 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2778 				"strwrite wake:q %p awakes", wqp);
2779 			mutex_exit(&stp->sd_lock);
2780 			if (stp->sd_sidp != NULL &&
2781 			    stp->sd_vnode->v_type != VFIFO)
2782 				if (error = straccess(stp, JCWRITE))
2783 					goto out;
2784 		}
2785 		waitflag |= NOINTR;
2786 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2787 			"strwrite resid:q %p uiop %p", wqp, uiop);
2788 		if (uiop->uio_resid) {
2789 			/* Recheck for errors - needed for sockets */
2790 			if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2791 			    (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2792 				mutex_enter(&stp->sd_lock);
2793 				error = strwriteable(stp, B_FALSE, B_TRUE);
2794 				mutex_exit(&stp->sd_lock);
2795 				if (error != 0)
2796 					return (error);
2797 			}
2798 			continue;
2799 		}
2800 		break;
2801 	}
2802 out:
2803 	/*
2804 	 * For historical reasons, applications expect EAGAIN when a data
2805 	 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2806 	 */
2807 	if (error == ENOMEM)
2808 		error = EAGAIN;
2809 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2810 		"strwrite out:q %p out %d error %d", wqp, 2, error);
2811 	return (error);
2812 }
2813 
2814 /*
2815  * kstrwritemp() has very similar semantics as that of strwrite().
2816  * The main difference is it obtains mblks from the caller and also
2817  * does not do any copy as done in strwrite() from user buffers to
2818  * kernel buffers.
2819  *
2820  *
2821  * Currently, this routine is used by sendfile to send data allocated
2822  * within the kernel without any copying. This interface does not use the
2823  * synchronous stream interface as synch. stream interface implies
2824  * copying.
2825  */
2826 int
2827 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode)
2828 {
2829 	struct stdata *stp;
2830 	struct queue *wqp;
2831 	char waitflag;
2832 	int tempmode;
2833 	int error;
2834 	int done = 0;
2835 
2836 	ASSERT(vp->v_stream);
2837 	stp = vp->v_stream;
2838 
2839 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2840 		mutex_enter(&stp->sd_lock);
2841 		error = strwriteable(stp, B_FALSE, B_TRUE);
2842 		mutex_exit(&stp->sd_lock);
2843 		if (error != 0)
2844 			return (error);
2845 	}
2846 
2847 	/*
2848 	 * First, check for flow control without grabbing the sd_lock.
2849 	 * If we would block, re-check with the lock. This is similar
2850 	 * to the logic used by strwrite().
2851 	 */
2852 	wqp = stp->sd_wrq;
2853 	if (canputnext(wqp)) {
2854 		putnext(wqp, mp);
2855 		return (0);
2856 	}
2857 
2858 	waitflag = WRITEWAIT;
2859 	if (stp->sd_flag & OLDNDELAY)
2860 		tempmode = fmode & ~FNDELAY;
2861 	else
2862 		tempmode = fmode;
2863 
2864 	mutex_enter(&stp->sd_lock);
2865 	do {
2866 		if (canputnext(wqp)) {
2867 			mutex_exit(&stp->sd_lock);
2868 			putnext(wqp, mp);
2869 			return (0);
2870 		}
2871 		error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1,
2872 		    &done);
2873 	} while (error == 0 && !done);
2874 
2875 	mutex_exit(&stp->sd_lock);
2876 	/*
2877 	 * EAGAIN tells the application to try again. ENOMEM
2878 	 * is returned only if the memory allocation size
2879 	 * exceeds the physical limits of the system. ENOMEM
2880 	 * can't be true here.
2881 	 */
2882 	if (error == ENOMEM)
2883 		error = EAGAIN;
2884 	return (error);
2885 }
2886 
2887 /*
2888  * Stream head write service routine.
2889  * Its job is to wake up any sleeping writers when a queue
2890  * downstream needs data (part of the flow control in putq and getq).
2891  * It also must wake anyone sleeping on a poll().
2892  * For stream head right below mux module, it must also invoke put procedure
2893  * of next downstream module.
2894  */
2895 int
2896 strwsrv(queue_t *q)
2897 {
2898 	struct stdata *stp;
2899 	queue_t *tq;
2900 	qband_t *qbp;
2901 	int i;
2902 	qband_t *myqbp;
2903 	int isevent;
2904 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
2905 
2906 	TRACE_1(TR_FAC_STREAMS_FR,
2907 		TR_STRWSRV, "strwsrv:q %p", q);
2908 	stp = (struct stdata *)q->q_ptr;
2909 	ASSERT(qclaimed(q));
2910 	mutex_enter(&stp->sd_lock);
2911 	ASSERT(!(stp->sd_flag & STPLEX));
2912 
2913 	if (stp->sd_flag & WSLEEP) {
2914 		stp->sd_flag &= ~WSLEEP;
2915 		cv_broadcast(&q->q_wait);
2916 	}
2917 	mutex_exit(&stp->sd_lock);
2918 
2919 	/* The other end of a stream pipe went away. */
2920 	if ((tq = q->q_next) == NULL) {
2921 		return (0);
2922 	}
2923 
2924 	/* Find the next module forward that has a service procedure */
2925 	claimstr(q);
2926 	tq = q->q_nfsrv;
2927 	ASSERT(tq != NULL);
2928 
2929 	if ((q->q_flag & QBACK)) {
2930 		if ((tq->q_flag & QFULL)) {
2931 			mutex_enter(QLOCK(tq));
2932 			if (!(tq->q_flag & QFULL)) {
2933 				mutex_exit(QLOCK(tq));
2934 				goto wakeup;
2935 			}
2936 			/*
2937 			 * The queue must have become full again. Set QWANTW
2938 			 * again so strwsrv will be back enabled when
2939 			 * the queue becomes non-full next time.
2940 			 */
2941 			tq->q_flag |= QWANTW;
2942 			mutex_exit(QLOCK(tq));
2943 		} else {
2944 		wakeup:
2945 			pollwakeup(&stp->sd_pollist, POLLWRNORM);
2946 			mutex_enter(&stp->sd_lock);
2947 			if (stp->sd_sigflags & S_WRNORM)
2948 				strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2949 			mutex_exit(&stp->sd_lock);
2950 		}
2951 	}
2952 
2953 	isevent = 0;
2954 	i = 1;
2955 	bzero((caddr_t)qbf, NBAND);
2956 	mutex_enter(QLOCK(tq));
2957 	if ((myqbp = q->q_bandp) != NULL)
2958 		for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
2959 			ASSERT(myqbp);
2960 			if ((myqbp->qb_flag & QB_BACK)) {
2961 				if (qbp->qb_flag & QB_FULL) {
2962 					/*
2963 					 * The band must have become full again.
2964 					 * Set QB_WANTW again so strwsrv will
2965 					 * be back enabled when the band becomes
2966 					 * non-full next time.
2967 					 */
2968 					qbp->qb_flag |= QB_WANTW;
2969 				} else {
2970 					isevent = 1;
2971 					qbf[i] = 1;
2972 				}
2973 			}
2974 			myqbp = myqbp->qb_next;
2975 			i++;
2976 		}
2977 	mutex_exit(QLOCK(tq));
2978 
2979 	if (isevent) {
2980 	    for (i = tq->q_nband; i; i--) {
2981 		if (qbf[i]) {
2982 			pollwakeup(&stp->sd_pollist, POLLWRBAND);
2983 			mutex_enter(&stp->sd_lock);
2984 			if (stp->sd_sigflags & S_WRBAND)
2985 				strsendsig(stp->sd_siglist, S_WRBAND,
2986 					(uchar_t)i, 0);
2987 			mutex_exit(&stp->sd_lock);
2988 		}
2989 	    }
2990 	}
2991 
2992 	releasestr(q);
2993 	return (0);
2994 }
2995 
2996 /*
2997  * Special case of strcopyin/strcopyout for copying
2998  * struct strioctl that can deal with both data
2999  * models.
3000  */
3001 
3002 #ifdef	_LP64
3003 
3004 static int
3005 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3006 {
3007 	struct	strioctl32 strioc32;
3008 	struct	strioctl *striocp;
3009 
3010 	if (copyflag & U_TO_K) {
3011 		ASSERT((copyflag & K_TO_K) == 0);
3012 
3013 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3014 			if (copyin(from, &strioc32, sizeof (strioc32)))
3015 				return (EFAULT);
3016 
3017 			striocp = (struct strioctl *)to;
3018 			striocp->ic_cmd	= strioc32.ic_cmd;
3019 			striocp->ic_timout = strioc32.ic_timout;
3020 			striocp->ic_len	= strioc32.ic_len;
3021 			striocp->ic_dp	= (char *)(uintptr_t)strioc32.ic_dp;
3022 
3023 		} else { /* NATIVE data model */
3024 			if (copyin(from, to, sizeof (struct strioctl))) {
3025 				return (EFAULT);
3026 			} else {
3027 				return (0);
3028 			}
3029 		}
3030 	} else {
3031 		ASSERT(copyflag & K_TO_K);
3032 		bcopy(from, to, sizeof (struct strioctl));
3033 	}
3034 	return (0);
3035 }
3036 
3037 static int
3038 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3039 {
3040 	struct	strioctl32 strioc32;
3041 	struct	strioctl *striocp;
3042 
3043 	if (copyflag & U_TO_K) {
3044 		ASSERT((copyflag & K_TO_K) == 0);
3045 
3046 		if ((flag & FMODELS) == DATAMODEL_ILP32) {
3047 			striocp = (struct strioctl *)from;
3048 			strioc32.ic_cmd	= striocp->ic_cmd;
3049 			strioc32.ic_timout = striocp->ic_timout;
3050 			strioc32.ic_len	= striocp->ic_len;
3051 			strioc32.ic_dp	= (caddr32_t)(uintptr_t)striocp->ic_dp;
3052 			ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3053 			    striocp->ic_dp);
3054 
3055 			if (copyout(&strioc32, to, sizeof (strioc32)))
3056 				return (EFAULT);
3057 
3058 		} else { /* NATIVE data model */
3059 			if (copyout(from, to, sizeof (struct strioctl))) {
3060 				return (EFAULT);
3061 			} else {
3062 				return (0);
3063 			}
3064 		}
3065 	} else {
3066 		ASSERT(copyflag & K_TO_K);
3067 		bcopy(from, to, sizeof (struct strioctl));
3068 	}
3069 	return (0);
3070 }
3071 
3072 #else	/* ! _LP64 */
3073 
3074 /* ARGSUSED2 */
3075 static int
3076 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3077 {
3078 	return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3079 }
3080 
3081 /* ARGSUSED2 */
3082 static int
3083 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3084 {
3085 	return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3086 }
3087 
3088 #endif	/* _LP64 */
3089 
3090 /*
3091  * Determine type of job control semantics expected by user.  The
3092  * possibilities are:
3093  *	JCREAD	- Behaves like read() on fd; send SIGTTIN
3094  *	JCWRITE	- Behaves like write() on fd; send SIGTTOU if TOSTOP set
3095  *	JCSETP	- Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3096  *	JCGETP	- Gets a value in the stream; no signals.
3097  * See straccess in strsubr.c for usage of these values.
3098  *
3099  * This routine also returns -1 for I_STR as a special case; the
3100  * caller must call again with the real ioctl number for
3101  * classification.
3102  */
3103 static int
3104 job_control_type(int cmd)
3105 {
3106 	switch (cmd) {
3107 	case I_STR:
3108 		return (-1);
3109 
3110 	case I_RECVFD:
3111 	case I_E_RECVFD:
3112 		return (JCREAD);
3113 
3114 	case I_FDINSERT:
3115 	case I_SENDFD:
3116 		return (JCWRITE);
3117 
3118 	case TCSETA:
3119 	case TCSETAW:
3120 	case TCSETAF:
3121 	case TCSBRK:
3122 	case TCXONC:
3123 	case TCFLSH:
3124 	case TCDSET:	/* Obsolete */
3125 	case TIOCSWINSZ:
3126 	case TCSETS:
3127 	case TCSETSW:
3128 	case TCSETSF:
3129 	case TIOCSETD:
3130 	case TIOCHPCL:
3131 	case TIOCSETP:
3132 	case TIOCSETN:
3133 	case TIOCEXCL:
3134 	case TIOCNXCL:
3135 	case TIOCFLUSH:
3136 	case TIOCSETC:
3137 	case TIOCLBIS:
3138 	case TIOCLBIC:
3139 	case TIOCLSET:
3140 	case TIOCSBRK:
3141 	case TIOCCBRK:
3142 	case TIOCSDTR:
3143 	case TIOCCDTR:
3144 	case TIOCSLTC:
3145 	case TIOCSTOP:
3146 	case TIOCSTART:
3147 	case TIOCSTI:
3148 	case TIOCSPGRP:
3149 	case TIOCMSET:
3150 	case TIOCMBIS:
3151 	case TIOCMBIC:
3152 	case TIOCREMOTE:
3153 	case TIOCSIGNAL:
3154 	case LDSETT:
3155 	case LDSMAP:	/* Obsolete */
3156 	case DIOCSETP:
3157 	case I_FLUSH:
3158 	case I_SRDOPT:
3159 	case I_SETSIG:
3160 	case I_SWROPT:
3161 	case I_FLUSHBAND:
3162 	case I_SETCLTIME:
3163 	case I_SERROPT:
3164 	case I_ESETSIG:
3165 	case FIONBIO:
3166 	case FIOASYNC:
3167 	case FIOSETOWN:
3168 	case JBOOT:	/* Obsolete */
3169 	case JTERM:	/* Obsolete */
3170 	case JTIMOM:	/* Obsolete */
3171 	case JZOMBOOT:	/* Obsolete */
3172 	case JAGENT:	/* Obsolete */
3173 	case JTRUN:	/* Obsolete */
3174 	case JXTPROTO:	/* Obsolete */
3175 		return (JCSETP);
3176 	}
3177 
3178 	return (JCGETP);
3179 }
3180 
3181 /*
3182  * ioctl for streams
3183  */
3184 int
3185 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3186     cred_t *crp, int *rvalp)
3187 {
3188 	struct stdata *stp;
3189 	struct strioctl strioc;
3190 	struct uio uio;
3191 	struct iovec iov;
3192 	int access;
3193 	mblk_t *mp;
3194 	int error = 0;
3195 	int done = 0;
3196 	ssize_t	rmin, rmax;
3197 	queue_t *wrq;
3198 	queue_t *rdq;
3199 	boolean_t kioctl = B_FALSE;
3200 
3201 	if (flag & FKIOCTL) {
3202 		copyflag = K_TO_K;
3203 		kioctl = B_TRUE;
3204 	}
3205 	ASSERT(vp->v_stream);
3206 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3207 	stp = vp->v_stream;
3208 
3209 	TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3210 		"strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3211 
3212 #ifdef C2_AUDIT
3213 	if (audit_active)
3214 		audit_strioctl(vp, cmd, arg, flag, copyflag, crp, rvalp);
3215 #endif
3216 
3217 	/*
3218 	 * If the copy is kernel to kernel, make sure that the FNATIVE
3219 	 * flag is set.  After this it would be a serious error to have
3220 	 * no model flag.
3221 	 */
3222 	if (copyflag == K_TO_K)
3223 		flag = (flag & ~FMODELS) | FNATIVE;
3224 
3225 	ASSERT((flag & FMODELS) != 0);
3226 
3227 	wrq = stp->sd_wrq;
3228 	rdq = _RD(wrq);
3229 
3230 	access = job_control_type(cmd);
3231 
3232 	/* We should never see these here, should be handled by iwscn */
3233 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3234 		return (EINVAL);
3235 
3236 	if (access != -1 && stp->sd_sidp != NULL &&
3237 	    stp->sd_vnode->v_type != VFIFO)
3238 		if (error = straccess(stp, access))
3239 			return (error);
3240 
3241 	/*
3242 	 * Check for sgttyb-related ioctls first, and complain as
3243 	 * necessary.
3244 	 */
3245 	switch (cmd) {
3246 	case TIOCGETP:
3247 	case TIOCSETP:
3248 	case TIOCSETN:
3249 		if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3250 			sgttyb_complaint = B_TRUE;
3251 			cmn_err(CE_NOTE,
3252 			    "application used obsolete TIOC[GS]ET");
3253 		}
3254 		if (sgttyb_handling >= 3) {
3255 			tsignal(curthread, SIGSYS);
3256 			return (EIO);
3257 		}
3258 		break;
3259 	}
3260 
3261 	mutex_enter(&stp->sd_lock);
3262 
3263 	switch (cmd) {
3264 	case I_RECVFD:
3265 	case I_E_RECVFD:
3266 	case I_PEEK:
3267 	case I_NREAD:
3268 	case FIONREAD:
3269 	case FIORDCHK:
3270 	case I_ATMARK:
3271 	case FIONBIO:
3272 	case FIOASYNC:
3273 		if (stp->sd_flag & (STRDERR|STPLEX)) {
3274 			error = strgeterr(stp, STRDERR|STPLEX, 0);
3275 			if (error != 0) {
3276 				mutex_exit(&stp->sd_lock);
3277 				return (error);
3278 			}
3279 		}
3280 		break;
3281 
3282 	default:
3283 		if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3284 			error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3285 			if (error != 0) {
3286 				mutex_exit(&stp->sd_lock);
3287 				return (error);
3288 			}
3289 		}
3290 	}
3291 
3292 	mutex_exit(&stp->sd_lock);
3293 
3294 	switch (cmd) {
3295 	default:
3296 		/*
3297 		 * The stream head has hardcoded knowledge of a
3298 		 * miscellaneous collection of terminal-, keyboard- and
3299 		 * mouse-related ioctls, enumerated below.  This hardcoded
3300 		 * knowledge allows the stream head to automatically
3301 		 * convert transparent ioctl requests made by userland
3302 		 * programs into I_STR ioctls which many old STREAMS
3303 		 * modules and drivers require.
3304 		 *
3305 		 * No new ioctls should ever be added to this list.
3306 		 * Instead, the STREAMS module or driver should be written
3307 		 * to either handle transparent ioctls or require any
3308 		 * userland programs to use I_STR ioctls (by returning
3309 		 * EINVAL to any transparent ioctl requests).
3310 		 *
3311 		 * More importantly, removing ioctls from this list should
3312 		 * be done with the utmost care, since our STREAMS modules
3313 		 * and drivers *count* on the stream head performing this
3314 		 * conversion, and thus may panic while processing
3315 		 * transparent ioctl request for one of these ioctls (keep
3316 		 * in mind that third party modules and drivers may have
3317 		 * similar problems).
3318 		 */
3319 		if (((cmd & IOCTYPE) == LDIOC) ||
3320 		    ((cmd & IOCTYPE) == tIOC) ||
3321 		    ((cmd & IOCTYPE) == TIOC) ||
3322 		    ((cmd & IOCTYPE) == KIOC) ||
3323 		    ((cmd & IOCTYPE) == MSIOC) ||
3324 		    ((cmd & IOCTYPE) == VUIOC)) {
3325 			/*
3326 			 * The ioctl is a tty ioctl - set up strioc buffer
3327 			 * and call strdoioctl() to do the work.
3328 			 */
3329 			if (stp->sd_flag & STRHUP)
3330 				return (ENXIO);
3331 			strioc.ic_cmd = cmd;
3332 			strioc.ic_timout = INFTIM;
3333 
3334 			switch (cmd) {
3335 
3336 			case TCXONC:
3337 			case TCSBRK:
3338 			case TCFLSH:
3339 			case TCDSET:
3340 				{
3341 				int native_arg = (int)arg;
3342 				strioc.ic_len = sizeof (int);
3343 				strioc.ic_dp = (char *)&native_arg;
3344 				return (strdoioctl(stp, &strioc, flag,
3345 				    K_TO_K, crp, rvalp));
3346 				}
3347 
3348 			case TCSETA:
3349 			case TCSETAW:
3350 			case TCSETAF:
3351 				strioc.ic_len = sizeof (struct termio);
3352 				strioc.ic_dp = (char *)arg;
3353 				return (strdoioctl(stp, &strioc, flag,
3354 					copyflag, crp, rvalp));
3355 
3356 			case TCSETS:
3357 			case TCSETSW:
3358 			case TCSETSF:
3359 				strioc.ic_len = sizeof (struct termios);
3360 				strioc.ic_dp = (char *)arg;
3361 				return (strdoioctl(stp, &strioc, flag,
3362 					copyflag, crp, rvalp));
3363 
3364 			case LDSETT:
3365 				strioc.ic_len = sizeof (struct termcb);
3366 				strioc.ic_dp = (char *)arg;
3367 				return (strdoioctl(stp, &strioc, flag,
3368 					copyflag, crp, rvalp));
3369 
3370 			case TIOCSETP:
3371 				strioc.ic_len = sizeof (struct sgttyb);
3372 				strioc.ic_dp = (char *)arg;
3373 				return (strdoioctl(stp, &strioc, flag,
3374 					copyflag, crp, rvalp));
3375 
3376 			case TIOCSTI:
3377 				if ((flag & FREAD) == 0 &&
3378 				    secpolicy_sti(crp) != 0) {
3379 					return (EPERM);
3380 				}
3381 				if (stp->sd_sidp !=
3382 				    ttoproc(curthread)->p_sessp->s_sidp &&
3383 				    secpolicy_sti(crp) != 0) {
3384 					return (EACCES);
3385 				}
3386 
3387 				strioc.ic_len = sizeof (char);
3388 				strioc.ic_dp = (char *)arg;
3389 				return (strdoioctl(stp, &strioc, flag,
3390 					copyflag, crp, rvalp));
3391 
3392 			case TIOCSWINSZ:
3393 				strioc.ic_len = sizeof (struct winsize);
3394 				strioc.ic_dp = (char *)arg;
3395 				return (strdoioctl(stp, &strioc, flag,
3396 					copyflag, crp, rvalp));
3397 
3398 			case TIOCSSIZE:
3399 				strioc.ic_len = sizeof (struct ttysize);
3400 				strioc.ic_dp = (char *)arg;
3401 				return (strdoioctl(stp, &strioc, flag,
3402 					copyflag, crp, rvalp));
3403 
3404 			case TIOCSSOFTCAR:
3405 			case KIOCTRANS:
3406 			case KIOCTRANSABLE:
3407 			case KIOCCMD:
3408 			case KIOCSDIRECT:
3409 			case KIOCSCOMPAT:
3410 			case KIOCSKABORTEN:
3411 			case KIOCSRPTDELAY:
3412 			case KIOCSRPTRATE:
3413 			case VUIDSFORMAT:
3414 			case TIOCSPPS:
3415 				strioc.ic_len = sizeof (int);
3416 				strioc.ic_dp = (char *)arg;
3417 				return (strdoioctl(stp, &strioc, flag,
3418 					copyflag, crp, rvalp));
3419 
3420 			case KIOCSETKEY:
3421 			case KIOCGETKEY:
3422 				strioc.ic_len = sizeof (struct kiockey);
3423 				strioc.ic_dp = (char *)arg;
3424 				return (strdoioctl(stp, &strioc, flag,
3425 					copyflag, crp, rvalp));
3426 
3427 			case KIOCSKEY:
3428 			case KIOCGKEY:
3429 				strioc.ic_len = sizeof (struct kiockeymap);
3430 				strioc.ic_dp = (char *)arg;
3431 				return (strdoioctl(stp, &strioc, flag,
3432 					copyflag, crp, rvalp));
3433 
3434 			case KIOCSLED:
3435 				/* arg is a pointer to char */
3436 				strioc.ic_len = sizeof (char);
3437 				strioc.ic_dp = (char *)arg;
3438 				return (strdoioctl(stp, &strioc, flag,
3439 					copyflag, crp, rvalp));
3440 
3441 			case MSIOSETPARMS:
3442 				strioc.ic_len = sizeof (Ms_parms);
3443 				strioc.ic_dp = (char *)arg;
3444 				return (strdoioctl(stp, &strioc, flag,
3445 					copyflag, crp, rvalp));
3446 
3447 			case VUIDSADDR:
3448 			case VUIDGADDR:
3449 				strioc.ic_len = sizeof (struct vuid_addr_probe);
3450 				strioc.ic_dp = (char *)arg;
3451 				return (strdoioctl(stp, &strioc, flag,
3452 					copyflag, crp, rvalp));
3453 
3454 			/*
3455 			 * These M_IOCTL's don't require any data to be sent
3456 			 * downstream, and the driver will allocate and link
3457 			 * on its own mblk_t upon M_IOCACK -- thus we set
3458 			 * ic_len to zero and set ic_dp to arg so we know
3459 			 * where to copyout to later.
3460 			 */
3461 			case TIOCGSOFTCAR:
3462 			case TIOCGWINSZ:
3463 			case TIOCGSIZE:
3464 			case KIOCGTRANS:
3465 			case KIOCGTRANSABLE:
3466 			case KIOCTYPE:
3467 			case KIOCGDIRECT:
3468 			case KIOCGCOMPAT:
3469 			case KIOCLAYOUT:
3470 			case KIOCGLED:
3471 			case MSIOGETPARMS:
3472 			case MSIOBUTTONS:
3473 			case VUIDGFORMAT:
3474 			case TIOCGPPS:
3475 			case TIOCGPPSEV:
3476 			case TCGETA:
3477 			case TCGETS:
3478 			case LDGETT:
3479 			case TIOCGETP:
3480 			case KIOCGRPTDELAY:
3481 			case KIOCGRPTRATE:
3482 				strioc.ic_len = 0;
3483 				strioc.ic_dp = (char *)arg;
3484 				return (strdoioctl(stp, &strioc, flag,
3485 					copyflag, crp, rvalp));
3486 			}
3487 		}
3488 
3489 		/*
3490 		 * Unknown cmd - send it down as a transparent ioctl.
3491 		 */
3492 		strioc.ic_cmd = cmd;
3493 		strioc.ic_timout = INFTIM;
3494 		strioc.ic_len = TRANSPARENT;
3495 		strioc.ic_dp = (char *)&arg;
3496 
3497 		return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3498 
3499 	case I_STR:
3500 		/*
3501 		 * Stream ioctl.  Read in an strioctl buffer from the user
3502 		 * along with any data specified and send it downstream.
3503 		 * Strdoioctl will wait allow only one ioctl message at
3504 		 * a time, and waits for the acknowledgement.
3505 		 */
3506 
3507 		if (stp->sd_flag & STRHUP)
3508 			return (ENXIO);
3509 
3510 		error = strcopyin_strioctl((void *)arg, &strioc, flag,
3511 		    copyflag);
3512 		if (error != 0)
3513 			return (error);
3514 
3515 		if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3516 			return (EINVAL);
3517 
3518 		access = job_control_type(strioc.ic_cmd);
3519 		if (access != -1 && stp->sd_sidp != NULL &&
3520 		    stp->sd_vnode->v_type != VFIFO &&
3521 		    (error = straccess(stp, access)) != 0)
3522 			return (error);
3523 
3524 		/*
3525 		 * The I_STR facility provides a trap door for malicious
3526 		 * code to send down bogus streamio(7I) ioctl commands to
3527 		 * unsuspecting STREAMS modules and drivers which expect to
3528 		 * only get these messages from the stream head.
3529 		 * Explicitly prohibit any streamio ioctls which can be
3530 		 * passed downstream by the stream head.  Note that we do
3531 		 * not block all streamio ioctls because the ioctl
3532 		 * numberspace is not well managed and thus it's possible
3533 		 * that a module or driver's ioctl numbers may accidentally
3534 		 * collide with them.
3535 		 */
3536 		switch (strioc.ic_cmd) {
3537 		case I_LINK:
3538 		case I_PLINK:
3539 		case I_UNLINK:
3540 		case I_PUNLINK:
3541 		case _I_GETPEERCRED:
3542 		case _I_PLINK_LH:
3543 			return (EINVAL);
3544 		}
3545 
3546 		error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3547 		if (error == 0) {
3548 			error = strcopyout_strioctl(&strioc, (void *)arg,
3549 			    flag, copyflag);
3550 		}
3551 		return (error);
3552 
3553 	case I_NREAD:
3554 		/*
3555 		 * Return number of bytes of data in first message
3556 		 * in queue in "arg" and return the number of messages
3557 		 * in queue in return value.
3558 		 */
3559 	    {
3560 		size_t	size;
3561 		int	retval;
3562 		int	count = 0;
3563 
3564 		mutex_enter(QLOCK(rdq));
3565 
3566 		size = msgdsize(rdq->q_first);
3567 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3568 			count++;
3569 
3570 		mutex_exit(QLOCK(rdq));
3571 		if (stp->sd_struiordq) {
3572 			infod_t infod;
3573 
3574 			infod.d_cmd = INFOD_COUNT;
3575 			infod.d_count = 0;
3576 			if (count == 0) {
3577 				infod.d_cmd |= INFOD_FIRSTBYTES;
3578 				infod.d_bytes = 0;
3579 			}
3580 			infod.d_res = 0;
3581 			(void) infonext(rdq, &infod);
3582 			count += infod.d_count;
3583 			if (infod.d_res & INFOD_FIRSTBYTES)
3584 				size = infod.d_bytes;
3585 		}
3586 
3587 		/*
3588 		 * Drop down from size_t to the "int" required by the
3589 		 * interface.  Cap at INT_MAX.
3590 		 */
3591 		retval = MIN(size, INT_MAX);
3592 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3593 		    copyflag);
3594 		if (!error)
3595 			*rvalp = count;
3596 		return (error);
3597 	    }
3598 
3599 	case FIONREAD:
3600 		/*
3601 		 * Return number of bytes of data in all data messages
3602 		 * in queue in "arg".
3603 		 */
3604 	    {
3605 		size_t	size = 0;
3606 		int	retval;
3607 
3608 		mutex_enter(QLOCK(rdq));
3609 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3610 			size += msgdsize(mp);
3611 		mutex_exit(QLOCK(rdq));
3612 
3613 		if (stp->sd_struiordq) {
3614 			infod_t infod;
3615 
3616 			infod.d_cmd = INFOD_BYTES;
3617 			infod.d_res = 0;
3618 			infod.d_bytes = 0;
3619 			(void) infonext(rdq, &infod);
3620 			size += infod.d_bytes;
3621 		}
3622 
3623 		/*
3624 		 * Drop down from size_t to the "int" required by the
3625 		 * interface.  Cap at INT_MAX.
3626 		 */
3627 		retval = MIN(size, INT_MAX);
3628 		error = strcopyout(&retval, (void *)arg, sizeof (retval),
3629 		    copyflag);
3630 
3631 		*rvalp = 0;
3632 		return (error);
3633 	    }
3634 	case FIORDCHK:
3635 		/*
3636 		 * FIORDCHK does not use arg value (like FIONREAD),
3637 		 * instead a count is returned. I_NREAD value may
3638 		 * not be accurate but safe. The real thing to do is
3639 		 * to add the msgdsizes of all data  messages until
3640 		 * a non-data message.
3641 		 */
3642 	    {
3643 		size_t size = 0;
3644 
3645 		mutex_enter(QLOCK(rdq));
3646 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3647 			size += msgdsize(mp);
3648 		mutex_exit(QLOCK(rdq));
3649 
3650 		if (stp->sd_struiordq) {
3651 			infod_t infod;
3652 
3653 			infod.d_cmd = INFOD_BYTES;
3654 			infod.d_res = 0;
3655 			infod.d_bytes = 0;
3656 			(void) infonext(rdq, &infod);
3657 			size += infod.d_bytes;
3658 		}
3659 
3660 		/*
3661 		 * Since ioctl returns an int, and memory sizes under
3662 		 * LP64 may not fit, we return INT_MAX if the count was
3663 		 * actually greater.
3664 		 */
3665 		*rvalp = MIN(size, INT_MAX);
3666 		return (0);
3667 	    }
3668 
3669 	case I_FIND:
3670 		/*
3671 		 * Get module name.
3672 		 */
3673 	    {
3674 		char mname[FMNAMESZ + 1];
3675 		queue_t *q;
3676 
3677 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3678 		    mname, FMNAMESZ + 1, NULL);
3679 		if (error)
3680 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3681 
3682 		/*
3683 		 * Return EINVAL if we're handed a bogus module name.
3684 		 */
3685 		if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3686 			TRACE_0(TR_FAC_STREAMS_FR,
3687 				TR_I_CANT_FIND, "couldn't I_FIND");
3688 			return (EINVAL);
3689 		}
3690 
3691 		*rvalp = 0;
3692 
3693 		/* Look downstream to see if module is there. */
3694 		claimstr(stp->sd_wrq);
3695 		for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3696 			if (q->q_flag&QREADR) {
3697 				q = NULL;
3698 				break;
3699 			}
3700 			if (strcmp(mname, q->q_qinfo->qi_minfo->mi_idname) == 0)
3701 				break;
3702 		}
3703 		releasestr(stp->sd_wrq);
3704 
3705 		*rvalp = (q ? 1 : 0);
3706 		return (error);
3707 	    }
3708 
3709 	case I_PUSH:
3710 	case __I_PUSH_NOCTTY:
3711 		/*
3712 		 * Push a module.
3713 		 * For the case __I_PUSH_NOCTTY push a module but
3714 		 * do not allocate controlling tty. See bugid 4025044
3715 		 */
3716 
3717 	    {
3718 		char mname[FMNAMESZ + 1];
3719 		fmodsw_impl_t *fp;
3720 		dev_t dummydev;
3721 
3722 		if (stp->sd_flag & STRHUP)
3723 			return (ENXIO);
3724 
3725 		/*
3726 		 * Get module name and look up in fmodsw.
3727 		 */
3728 		error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3729 		    mname, FMNAMESZ + 1, NULL);
3730 		if (error)
3731 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3732 
3733 		if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3734 		    NULL)
3735 			return (EINVAL);
3736 
3737 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3738 		    "I_PUSH:fp %p stp %p", fp, stp);
3739 
3740 		if (error = strstartplumb(stp, flag, cmd)) {
3741 			fmodsw_rele(fp);
3742 			return (error);
3743 		}
3744 
3745 		/*
3746 		 * See if any more modules can be pushed on this stream.
3747 		 * Note that this check must be done after strstartplumb()
3748 		 * since otherwise multiple threads issuing I_PUSHes on
3749 		 * the same stream will be able to exceed nstrpush.
3750 		 */
3751 		mutex_enter(&stp->sd_lock);
3752 		if (stp->sd_pushcnt >= nstrpush) {
3753 			fmodsw_rele(fp);
3754 			strendplumb(stp);
3755 			mutex_exit(&stp->sd_lock);
3756 			return (EINVAL);
3757 		}
3758 		mutex_exit(&stp->sd_lock);
3759 
3760 		/*
3761 		 * Push new module and call its open routine
3762 		 * via qattach().  Modules don't change device
3763 		 * numbers, so just ignore dummydev here.
3764 		 */
3765 		dummydev = vp->v_rdev;
3766 		if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3767 		    B_FALSE)) == 0) {
3768 			if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3769 			    (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3770 				/*
3771 				 * try to allocate it as a controlling terminal
3772 				 */
3773 				strctty(stp);
3774 			}
3775 		}
3776 
3777 		/*
3778 		 * If flow control is on, don't break it - enable
3779 		 * first back queue with svc procedure.
3780 		 */
3781 		if (rdq->q_flag & QWANTW) {
3782 			/* Note: no setqback here - use pri -1. */
3783 			backenable(_RD(wrq->q_next), -1);
3784 		}
3785 
3786 		mutex_enter(&stp->sd_lock);
3787 
3788 		/*
3789 		 * As a performance concern we are caching the values of
3790 		 * q_minpsz and q_maxpsz of the module below the stream
3791 		 * head in the stream head.
3792 		 */
3793 		mutex_enter(QLOCK(stp->sd_wrq->q_next));
3794 		rmin = stp->sd_wrq->q_next->q_minpsz;
3795 		rmax = stp->sd_wrq->q_next->q_maxpsz;
3796 		mutex_exit(QLOCK(stp->sd_wrq->q_next));
3797 
3798 		/* Do this processing here as a performance concern */
3799 		if (strmsgsz != 0) {
3800 			if (rmax == INFPSZ)
3801 				rmax = strmsgsz;
3802 			else  {
3803 				if (vp->v_type == VFIFO)
3804 					rmax = MIN(PIPE_BUF, rmax);
3805 				else	rmax = MIN(strmsgsz, rmax);
3806 			}
3807 		}
3808 
3809 		mutex_enter(QLOCK(wrq));
3810 		stp->sd_qn_minpsz = rmin;
3811 		stp->sd_qn_maxpsz = rmax;
3812 		mutex_exit(QLOCK(wrq));
3813 
3814 		strendplumb(stp);
3815 		mutex_exit(&stp->sd_lock);
3816 		return (error);
3817 	    }
3818 
3819 	case I_POP:
3820 	    {
3821 		queue_t	*q;
3822 
3823 		if (stp->sd_flag & STRHUP)
3824 			return (ENXIO);
3825 		if (!wrq->q_next)	/* for broken pipes */
3826 			return (EINVAL);
3827 
3828 		if (error = strstartplumb(stp, flag, cmd))
3829 			return (error);
3830 
3831 		/*
3832 		 * If there is an anchor on this stream and popping
3833 		 * the current module would attempt to pop through the
3834 		 * anchor, then disallow the pop unless we have sufficient
3835 		 * privileges; take the cheapest (non-locking) check
3836 		 * first.
3837 		 */
3838 		if (secpolicy_net_config(crp, B_TRUE) != 0) {
3839 			mutex_enter(&stp->sd_lock);
3840 			/*
3841 			 * Anchors only apply if there's at least one
3842 			 * module on the stream (sd_pushcnt > 0).
3843 			 */
3844 			if (stp->sd_pushcnt > 0 &&
3845 			    stp->sd_pushcnt == stp->sd_anchor &&
3846 			    stp->sd_vnode->v_type != VFIFO) {
3847 				strendplumb(stp);
3848 				mutex_exit(&stp->sd_lock);
3849 				/* Audit and report error */
3850 				return (secpolicy_net_config(crp, B_FALSE));
3851 			}
3852 			mutex_exit(&stp->sd_lock);
3853 		}
3854 
3855 		q = wrq->q_next;
3856 		TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3857 			"I_POP:%p from %p", q, stp);
3858 		if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3859 			error = EINVAL;
3860 		} else {
3861 			qdetach(_RD(q), 1, flag, crp, B_FALSE);
3862 			error = 0;
3863 		}
3864 		mutex_enter(&stp->sd_lock);
3865 
3866 		/*
3867 		 * As a performance concern we are caching the values of
3868 		 * q_minpsz and q_maxpsz of the module below the stream
3869 		 * head in the stream head.
3870 		 */
3871 		mutex_enter(QLOCK(wrq->q_next));
3872 		rmin = wrq->q_next->q_minpsz;
3873 		rmax = wrq->q_next->q_maxpsz;
3874 		mutex_exit(QLOCK(wrq->q_next));
3875 
3876 		/* Do this processing here as a performance concern */
3877 		if (strmsgsz != 0) {
3878 			if (rmax == INFPSZ)
3879 				rmax = strmsgsz;
3880 			else  {
3881 				if (vp->v_type == VFIFO)
3882 					rmax = MIN(PIPE_BUF, rmax);
3883 				else	rmax = MIN(strmsgsz, rmax);
3884 			}
3885 		}
3886 
3887 		mutex_enter(QLOCK(wrq));
3888 		stp->sd_qn_minpsz = rmin;
3889 		stp->sd_qn_maxpsz = rmax;
3890 		mutex_exit(QLOCK(wrq));
3891 
3892 		/* If we popped through the anchor, then reset the anchor. */
3893 		if (stp->sd_pushcnt < stp->sd_anchor)
3894 			stp->sd_anchor = 0;
3895 
3896 		strendplumb(stp);
3897 		mutex_exit(&stp->sd_lock);
3898 		return (error);
3899 	    }
3900 
3901 	case _I_MUXID2FD:
3902 	{
3903 		/*
3904 		 * Create a fd for a I_PLINK'ed lower stream with a given
3905 		 * muxid.  With the fd, application can send down ioctls,
3906 		 * like I_LIST, to the previously I_PLINK'ed stream.  Note
3907 		 * that after getting the fd, the application has to do an
3908 		 * I_PUNLINK on the muxid before it can do any operation
3909 		 * on the lower stream.  This is required by spec1170.
3910 		 *
3911 		 * The fd used to do this ioctl should point to the same
3912 		 * controlling device used to do the I_PLINK.  If it uses
3913 		 * a different stream or an invalid muxid, I_MUXID2FD will
3914 		 * fail.  The error code is set to EINVAL.
3915 		 *
3916 		 * The intended use of this interface is the following.
3917 		 * An application I_PLINK'ed a stream and exits.  The fd
3918 		 * to the lower stream is gone.  Another application
3919 		 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
3920 		 */
3921 		int muxid = (int)arg;
3922 		int fd;
3923 		linkinfo_t *linkp;
3924 		struct file *fp;
3925 
3926 		/*
3927 		 * Do not allow the wildcard muxid.  This ioctl is not
3928 		 * intended to find arbitrary link.
3929 		 */
3930 		if (muxid == 0) {
3931 			return (EINVAL);
3932 		}
3933 
3934 		mutex_enter(&muxifier);
3935 		linkp = findlinks(vp->v_stream, muxid, LINKPERSIST);
3936 		if (linkp == NULL) {
3937 			mutex_exit(&muxifier);
3938 			return (EINVAL);
3939 		}
3940 
3941 		if ((fd = ufalloc(0)) == -1) {
3942 			mutex_exit(&muxifier);
3943 			return (EMFILE);
3944 		}
3945 		fp = linkp->li_fpdown;
3946 		mutex_enter(&fp->f_tlock);
3947 		fp->f_count++;
3948 		mutex_exit(&fp->f_tlock);
3949 		mutex_exit(&muxifier);
3950 		setf(fd, fp);
3951 		*rvalp = fd;
3952 		return (0);
3953 	}
3954 
3955 	case _I_INSERT:
3956 	{
3957 		/*
3958 		 * To insert a module to a given position in a stream.
3959 		 * In the first release, only allow privileged user
3960 		 * to use this ioctl.
3961 		 *
3962 		 * Note that we do not plan to support this ioctl
3963 		 * on pipes in the first release.  We want to learn more
3964 		 * about the implications of these ioctls before extending
3965 		 * their support.  And we do not think these features are
3966 		 * valuable for pipes.
3967 		 *
3968 		 * Neither do we support O/C hot stream.  Note that only
3969 		 * the upper streams of TCP/IP stack are O/C hot streams.
3970 		 * The lower IP stream is not.
3971 		 * When there is a O/C cold barrier, we only allow inserts
3972 		 * above the barrier.
3973 		 */
3974 		STRUCT_DECL(strmodconf, strmodinsert);
3975 		char mod_name[FMNAMESZ + 1];
3976 		fmodsw_impl_t *fp;
3977 		dev_t dummydev;
3978 		queue_t *tmp_wrq;
3979 		int pos;
3980 		boolean_t is_insert;
3981 
3982 		STRUCT_INIT(strmodinsert, flag);
3983 		if (stp->sd_flag & STRHUP)
3984 			return (ENXIO);
3985 		if (STRMATED(stp))
3986 			return (EINVAL);
3987 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
3988 			return (error);
3989 
3990 		error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
3991 		    STRUCT_SIZE(strmodinsert), copyflag);
3992 		if (error)
3993 			return (error);
3994 
3995 		/*
3996 		 * Get module name and look up in fmodsw.
3997 		 */
3998 		error = (copyflag & U_TO_K ? copyinstr :
3999 		    copystr)(STRUCT_FGETP(strmodinsert, mod_name),
4000 		    mod_name, FMNAMESZ + 1, NULL);
4001 		if (error)
4002 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4003 
4004 		if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
4005 		    NULL)
4006 			return (EINVAL);
4007 
4008 		if (error = strstartplumb(stp, flag, cmd)) {
4009 			fmodsw_rele(fp);
4010 			return (error);
4011 		}
4012 
4013 		/*
4014 		 * Is this _I_INSERT just like an I_PUSH?  We need to know
4015 		 * this because we do some optimizations if this is a
4016 		 * module being pushed.
4017 		 */
4018 		pos = STRUCT_FGET(strmodinsert, pos);
4019 		is_insert = (pos != 0);
4020 
4021 		/*
4022 		 * Make sure pos is valid.  Even though it is not an I_PUSH,
4023 		 * we impose the same limit on the number of modules in a
4024 		 * stream.
4025 		 */
4026 		mutex_enter(&stp->sd_lock);
4027 		if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4028 		    pos > stp->sd_pushcnt) {
4029 			fmodsw_rele(fp);
4030 			strendplumb(stp);
4031 			mutex_exit(&stp->sd_lock);
4032 			return (EINVAL);
4033 		}
4034 		mutex_exit(&stp->sd_lock);
4035 
4036 		/*
4037 		 * First find the correct position this module to
4038 		 * be inserted.  We don't need to call claimstr()
4039 		 * as the stream should not be changing at this point.
4040 		 *
4041 		 * Insert new module and call its open routine
4042 		 * via qattach().  Modules don't change device
4043 		 * numbers, so just ignore dummydev here.
4044 		 */
4045 		for (tmp_wrq = stp->sd_wrq; pos > 0;
4046 		    tmp_wrq = tmp_wrq->q_next, pos--) {
4047 			ASSERT(SAMESTR(tmp_wrq));
4048 		}
4049 		dummydev = vp->v_rdev;
4050 		if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4051 		    fp, is_insert)) != 0) {
4052 			mutex_enter(&stp->sd_lock);
4053 			strendplumb(stp);
4054 			mutex_exit(&stp->sd_lock);
4055 			return (error);
4056 		}
4057 		/*
4058 		 * If flow control is on, don't break it - enable
4059 		 * first back queue with svc procedure.
4060 		 */
4061 		if (_RD(tmp_wrq)->q_nfsrv->q_flag & QWANTW) {
4062 			/*
4063 			 * Note: no setqback here - use pri -1.
4064 			 * tmp_wrq->q_next is the new module.  We need
4065 			 * to backenable() the module below the new module.
4066 			 */
4067 			backenable(_RD(tmp_wrq->q_next->q_next), -1);
4068 		}
4069 
4070 		mutex_enter(&stp->sd_lock);
4071 
4072 		/*
4073 		 * As a performance concern we are caching the values of
4074 		 * q_minpsz and q_maxpsz of the module below the stream
4075 		 * head in the stream head.
4076 		 */
4077 		if (!is_insert) {
4078 			mutex_enter(QLOCK(stp->sd_wrq->q_next));
4079 			rmin = stp->sd_wrq->q_next->q_minpsz;
4080 			rmax = stp->sd_wrq->q_next->q_maxpsz;
4081 			mutex_exit(QLOCK(stp->sd_wrq->q_next));
4082 
4083 			/* Do this processing here as a performance concern */
4084 			if (strmsgsz != 0) {
4085 				if (rmax == INFPSZ) {
4086 					rmax = strmsgsz;
4087 				} else  {
4088 					rmax = MIN(strmsgsz, rmax);
4089 				}
4090 			}
4091 
4092 			mutex_enter(QLOCK(wrq));
4093 			stp->sd_qn_minpsz = rmin;
4094 			stp->sd_qn_maxpsz = rmax;
4095 			mutex_exit(QLOCK(wrq));
4096 		}
4097 
4098 		/*
4099 		 * Need to update the anchor value if this module is
4100 		 * inserted below the anchor point.
4101 		 */
4102 		if (stp->sd_anchor != 0) {
4103 			pos = STRUCT_FGET(strmodinsert, pos);
4104 			if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4105 				stp->sd_anchor++;
4106 		}
4107 
4108 		strendplumb(stp);
4109 		mutex_exit(&stp->sd_lock);
4110 		return (0);
4111 	}
4112 
4113 	case _I_REMOVE:
4114 	{
4115 		/*
4116 		 * To remove a module with a given name in a stream.  The
4117 		 * caller of this ioctl needs to provide both the name and
4118 		 * the position of the module to be removed.  This eliminates
4119 		 * the ambiguity of removal if a module is inserted/pushed
4120 		 * multiple times in a stream.  In the first release, only
4121 		 * allow privileged user to use this ioctl.
4122 		 *
4123 		 * Note that we do not plan to support this ioctl
4124 		 * on pipes in the first release.  We want to learn more
4125 		 * about the implications of these ioctls before extending
4126 		 * their support.  And we do not think these features are
4127 		 * valuable for pipes.
4128 		 *
4129 		 * Neither do we support O/C hot stream.  Note that only
4130 		 * the upper streams of TCP/IP stack are O/C hot streams.
4131 		 * The lower IP stream is not.
4132 		 * When there is a O/C cold barrier we do not allow removal
4133 		 * below the barrier.
4134 		 *
4135 		 * Also note that _I_REMOVE cannot be used to remove a
4136 		 * driver or the stream head.
4137 		 */
4138 		STRUCT_DECL(strmodconf, strmodremove);
4139 		queue_t	*q;
4140 		int pos;
4141 		char mod_name[FMNAMESZ + 1];
4142 		boolean_t is_remove;
4143 
4144 		STRUCT_INIT(strmodremove, flag);
4145 		if (stp->sd_flag & STRHUP)
4146 			return (ENXIO);
4147 		if (STRMATED(stp))
4148 			return (EINVAL);
4149 		if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4150 			return (error);
4151 
4152 		error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4153 		    STRUCT_SIZE(strmodremove), copyflag);
4154 		if (error)
4155 			return (error);
4156 
4157 		error = (copyflag & U_TO_K ? copyinstr :
4158 		    copystr)(STRUCT_FGETP(strmodremove, mod_name),
4159 		    mod_name, FMNAMESZ + 1, NULL);
4160 		if (error)
4161 			return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4162 
4163 		if ((error = strstartplumb(stp, flag, cmd)) != 0)
4164 			return (error);
4165 
4166 		/*
4167 		 * Match the name of given module to the name of module at
4168 		 * the given position.
4169 		 */
4170 		pos = STRUCT_FGET(strmodremove, pos);
4171 
4172 		is_remove = (pos != 0);
4173 		for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4174 		    q = q->q_next, pos--)
4175 			;
4176 		if (pos > 0 || ! SAMESTR(q) ||
4177 		    strncmp(q->q_qinfo->qi_minfo->mi_idname, mod_name,
4178 		    strlen(q->q_qinfo->qi_minfo->mi_idname)) != 0) {
4179 			mutex_enter(&stp->sd_lock);
4180 			strendplumb(stp);
4181 			mutex_exit(&stp->sd_lock);
4182 			return (EINVAL);
4183 		}
4184 
4185 		ASSERT(!(q->q_flag & QREADR));
4186 		qdetach(_RD(q), 1, flag, crp, is_remove);
4187 
4188 		mutex_enter(&stp->sd_lock);
4189 
4190 		/*
4191 		 * As a performance concern we are caching the values of
4192 		 * q_minpsz and q_maxpsz of the module below the stream
4193 		 * head in the stream head.
4194 		 */
4195 		if (!is_remove) {
4196 			mutex_enter(QLOCK(wrq->q_next));
4197 			rmin = wrq->q_next->q_minpsz;
4198 			rmax = wrq->q_next->q_maxpsz;
4199 			mutex_exit(QLOCK(wrq->q_next));
4200 
4201 			/* Do this processing here as a performance concern */
4202 			if (strmsgsz != 0) {
4203 				if (rmax == INFPSZ)
4204 					rmax = strmsgsz;
4205 				else  {
4206 					if (vp->v_type == VFIFO)
4207 						rmax = MIN(PIPE_BUF, rmax);
4208 					else	rmax = MIN(strmsgsz, rmax);
4209 				}
4210 			}
4211 
4212 			mutex_enter(QLOCK(wrq));
4213 			stp->sd_qn_minpsz = rmin;
4214 			stp->sd_qn_maxpsz = rmax;
4215 			mutex_exit(QLOCK(wrq));
4216 		}
4217 
4218 		/*
4219 		 * Need to update the anchor value if this module is removed
4220 		 * at or below the anchor point.  If the removed module is at
4221 		 * the anchor point, remove the anchor for this stream if
4222 		 * there is no module above the anchor point.  Otherwise, if
4223 		 * the removed module is below the anchor point, decrement the
4224 		 * anchor point by 1.
4225 		 */
4226 		if (stp->sd_anchor != 0) {
4227 			pos = STRUCT_FGET(strmodremove, pos);
4228 			if (pos == 0)
4229 				stp->sd_anchor = 0;
4230 			else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4231 				stp->sd_anchor--;
4232 		}
4233 
4234 		strendplumb(stp);
4235 		mutex_exit(&stp->sd_lock);
4236 		return (0);
4237 	}
4238 
4239 	case I_ANCHOR:
4240 		/*
4241 		 * Set the anchor position on the stream to reside at
4242 		 * the top module (in other words, the top module
4243 		 * cannot be popped).  Anchors with a FIFO make no
4244 		 * obvious sense, so they're not allowed.
4245 		 */
4246 		mutex_enter(&stp->sd_lock);
4247 
4248 		if (stp->sd_vnode->v_type == VFIFO) {
4249 			mutex_exit(&stp->sd_lock);
4250 			return (EINVAL);
4251 		}
4252 
4253 		stp->sd_anchor = stp->sd_pushcnt;
4254 
4255 		mutex_exit(&stp->sd_lock);
4256 		return (0);
4257 
4258 	case I_LOOK:
4259 		/*
4260 		 * Get name of first module downstream.
4261 		 * If no module, return an error.
4262 		 */
4263 	    {
4264 		claimstr(wrq);
4265 		if (_SAMESTR(wrq) && wrq->q_next->q_next) {
4266 			char *name = wrq->q_next->q_qinfo->qi_minfo->mi_idname;
4267 			error = strcopyout(name, (void *)arg, strlen(name) + 1,
4268 			    copyflag);
4269 			releasestr(wrq);
4270 			return (error);
4271 		}
4272 		releasestr(wrq);
4273 		return (EINVAL);
4274 	    }
4275 
4276 	case I_LINK:
4277 	case I_PLINK:
4278 		/*
4279 		 * Link a multiplexor.
4280 		 */
4281 		return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4282 
4283 	case _I_PLINK_LH:
4284 		/*
4285 		 * Link a multiplexor: Call must originate from kernel.
4286 		 */
4287 		if (kioctl)
4288 			return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4289 
4290 		return (EINVAL);
4291 	case I_UNLINK:
4292 	case I_PUNLINK:
4293 		/*
4294 		 * Unlink a multiplexor.
4295 		 * If arg is -1, unlink all links for which this is the
4296 		 * controlling stream.  Otherwise, arg is an index number
4297 		 * for a link to be removed.
4298 		 */
4299 	    {
4300 		struct linkinfo *linkp;
4301 		int native_arg = (int)arg;
4302 		int type;
4303 
4304 		TRACE_1(TR_FAC_STREAMS_FR,
4305 			TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4306 		if (vp->v_type == VFIFO) {
4307 			return (EINVAL);
4308 		}
4309 		if (cmd == I_UNLINK)
4310 			type = LINKNORMAL;
4311 		else	/* I_PUNLINK */
4312 			type = LINKPERSIST;
4313 		if (native_arg == 0) {
4314 			return (EINVAL);
4315 		}
4316 		if (native_arg == MUXID_ALL)
4317 			error = munlinkall(stp, type, crp, rvalp);
4318 		else {
4319 			mutex_enter(&muxifier);
4320 			if (!(linkp = findlinks(stp, (int)arg, type))) {
4321 				/* invalid user supplied index number */
4322 				mutex_exit(&muxifier);
4323 				return (EINVAL);
4324 			}
4325 			/* munlink drops the muxifier lock */
4326 			error = munlink(stp, linkp, type, crp, rvalp);
4327 		}
4328 		return (error);
4329 	    }
4330 
4331 	case I_FLUSH:
4332 		/*
4333 		 * send a flush message downstream
4334 		 * flush message can indicate
4335 		 * FLUSHR - flush read queue
4336 		 * FLUSHW - flush write queue
4337 		 * FLUSHRW - flush read/write queue
4338 		 */
4339 		if (stp->sd_flag & STRHUP)
4340 			return (ENXIO);
4341 		if (arg & ~FLUSHRW)
4342 			return (EINVAL);
4343 
4344 		for (;;) {
4345 			if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4346 				break;
4347 			}
4348 			if (error = strwaitbuf(1, BPRI_HI)) {
4349 				return (error);
4350 			}
4351 		}
4352 
4353 		/*
4354 		 * Send down an unsupported ioctl and wait for the nack
4355 		 * in order to allow the M_FLUSH to propagate back
4356 		 * up to the stream head.
4357 		 * Replaces if (qready()) runqueues();
4358 		 */
4359 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4360 		strioc.ic_timout = 0;
4361 		strioc.ic_len = 0;
4362 		strioc.ic_dp = NULL;
4363 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4364 		*rvalp = 0;
4365 		return (0);
4366 
4367 	case I_FLUSHBAND:
4368 	    {
4369 		struct bandinfo binfo;
4370 
4371 		error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4372 		    copyflag);
4373 		if (error)
4374 			return (error);
4375 		if (stp->sd_flag & STRHUP)
4376 			return (ENXIO);
4377 		if (binfo.bi_flag & ~FLUSHRW)
4378 			return (EINVAL);
4379 		while (!(mp = allocb(2, BPRI_HI))) {
4380 			if (error = strwaitbuf(2, BPRI_HI))
4381 				return (error);
4382 		}
4383 		mp->b_datap->db_type = M_FLUSH;
4384 		*mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4385 		*mp->b_wptr++ = binfo.bi_pri;
4386 		putnext(stp->sd_wrq, mp);
4387 		/*
4388 		 * Send down an unsupported ioctl and wait for the nack
4389 		 * in order to allow the M_FLUSH to propagate back
4390 		 * up to the stream head.
4391 		 * Replaces if (qready()) runqueues();
4392 		 */
4393 		strioc.ic_cmd = -1;	/* The unsupported ioctl */
4394 		strioc.ic_timout = 0;
4395 		strioc.ic_len = 0;
4396 		strioc.ic_dp = NULL;
4397 		(void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4398 		*rvalp = 0;
4399 		return (0);
4400 	    }
4401 
4402 	case I_SRDOPT:
4403 		/*
4404 		 * Set read options
4405 		 *
4406 		 * RNORM - default stream mode
4407 		 * RMSGN - message no discard
4408 		 * RMSGD - message discard
4409 		 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4410 		 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4411 		 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4412 		 */
4413 		if (arg & ~(RMODEMASK | RPROTMASK))
4414 			return (EINVAL);
4415 
4416 		if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4417 			return (EINVAL);
4418 
4419 		mutex_enter(&stp->sd_lock);
4420 		switch (arg & RMODEMASK) {
4421 		case RNORM:
4422 			stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4423 			break;
4424 		case RMSGD:
4425 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4426 			    RD_MSGDIS;
4427 			break;
4428 		case RMSGN:
4429 			stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4430 			    RD_MSGNODIS;
4431 			break;
4432 		}
4433 
4434 		switch (arg & RPROTMASK) {
4435 		case RPROTNORM:
4436 			stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4437 			break;
4438 
4439 		case RPROTDAT:
4440 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4441 			    RD_PROTDAT);
4442 			break;
4443 
4444 		case RPROTDIS:
4445 			stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4446 			    RD_PROTDIS);
4447 			break;
4448 		}
4449 		mutex_exit(&stp->sd_lock);
4450 		return (0);
4451 
4452 	case I_GRDOPT:
4453 		/*
4454 		 * Get read option and return the value
4455 		 * to spot pointed to by arg
4456 		 */
4457 	    {
4458 		int rdopt;
4459 
4460 		rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4461 		    ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4462 		rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4463 		    ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4464 
4465 		return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4466 		    copyflag));
4467 	    }
4468 
4469 	case I_SERROPT:
4470 		/*
4471 		 * Set error options
4472 		 *
4473 		 * RERRNORM - persistent read errors
4474 		 * RERRNONPERSIST - non-persistent read errors
4475 		 * WERRNORM - persistent write errors
4476 		 * WERRNONPERSIST - non-persistent write errors
4477 		 */
4478 		if (arg & ~(RERRMASK | WERRMASK))
4479 			return (EINVAL);
4480 
4481 		mutex_enter(&stp->sd_lock);
4482 		switch (arg & RERRMASK) {
4483 		case RERRNORM:
4484 			stp->sd_flag &= ~STRDERRNONPERSIST;
4485 			break;
4486 		case RERRNONPERSIST:
4487 			stp->sd_flag |= STRDERRNONPERSIST;
4488 			break;
4489 		}
4490 		switch (arg & WERRMASK) {
4491 		case WERRNORM:
4492 			stp->sd_flag &= ~STWRERRNONPERSIST;
4493 			break;
4494 		case WERRNONPERSIST:
4495 			stp->sd_flag |= STWRERRNONPERSIST;
4496 			break;
4497 		}
4498 		mutex_exit(&stp->sd_lock);
4499 		return (0);
4500 
4501 	case I_GERROPT:
4502 		/*
4503 		 * Get error option and return the value
4504 		 * to spot pointed to by arg
4505 		 */
4506 	    {
4507 		int erropt = 0;
4508 
4509 		erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4510 			RERRNORM;
4511 		erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4512 			WERRNORM;
4513 		return (strcopyout(&erropt, (void *)arg, sizeof (int),
4514 		    copyflag));
4515 	    }
4516 
4517 	case I_SETSIG:
4518 		/*
4519 		 * Register the calling proc to receive the SIGPOLL
4520 		 * signal based on the events given in arg.  If
4521 		 * arg is zero, remove the proc from register list.
4522 		 */
4523 	    {
4524 		strsig_t *ssp, *pssp;
4525 		struct pid *pidp;
4526 
4527 		pssp = NULL;
4528 		pidp = curproc->p_pidp;
4529 		/*
4530 		 * Hold sd_lock to prevent traversal of sd_siglist while
4531 		 * it is modified.
4532 		 */
4533 		mutex_enter(&stp->sd_lock);
4534 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4535 			pssp = ssp, ssp = ssp->ss_next)
4536 			;
4537 
4538 		if (arg) {
4539 			if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4540 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4541 				mutex_exit(&stp->sd_lock);
4542 				return (EINVAL);
4543 			}
4544 			if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4545 				mutex_exit(&stp->sd_lock);
4546 				return (EINVAL);
4547 			}
4548 
4549 			/*
4550 			 * If proc not already registered, add it
4551 			 * to list.
4552 			 */
4553 			if (!ssp) {
4554 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4555 				ssp->ss_pidp = pidp;
4556 				ssp->ss_pid = pidp->pid_id;
4557 				ssp->ss_next = NULL;
4558 				if (pssp)
4559 					pssp->ss_next = ssp;
4560 				else
4561 					stp->sd_siglist = ssp;
4562 				mutex_enter(&pidlock);
4563 				PID_HOLD(pidp);
4564 				mutex_exit(&pidlock);
4565 			}
4566 
4567 			/*
4568 			 * Set events.
4569 			 */
4570 			ssp->ss_events = (int)arg;
4571 		} else {
4572 			/*
4573 			 * Remove proc from register list.
4574 			 */
4575 			if (ssp) {
4576 				mutex_enter(&pidlock);
4577 				PID_RELE(pidp);
4578 				mutex_exit(&pidlock);
4579 				if (pssp)
4580 					pssp->ss_next = ssp->ss_next;
4581 				else
4582 					stp->sd_siglist = ssp->ss_next;
4583 				kmem_free(ssp, sizeof (strsig_t));
4584 			} else {
4585 				mutex_exit(&stp->sd_lock);
4586 				return (EINVAL);
4587 			}
4588 		}
4589 
4590 		/*
4591 		 * Recalculate OR of sig events.
4592 		 */
4593 		stp->sd_sigflags = 0;
4594 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4595 			stp->sd_sigflags |= ssp->ss_events;
4596 		mutex_exit(&stp->sd_lock);
4597 		return (0);
4598 	    }
4599 
4600 	case I_GETSIG:
4601 		/*
4602 		 * Return (in arg) the current registration of events
4603 		 * for which the calling proc is to be signaled.
4604 		 */
4605 	    {
4606 		struct strsig *ssp;
4607 		struct pid  *pidp;
4608 
4609 		pidp = curproc->p_pidp;
4610 		mutex_enter(&stp->sd_lock);
4611 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4612 			if (ssp->ss_pidp == pidp) {
4613 				error = strcopyout(&ssp->ss_events, (void *)arg,
4614 				    sizeof (int), copyflag);
4615 				mutex_exit(&stp->sd_lock);
4616 				return (error);
4617 			}
4618 		mutex_exit(&stp->sd_lock);
4619 		return (EINVAL);
4620 	    }
4621 
4622 	case I_ESETSIG:
4623 		/*
4624 		 * Register the ss_pid to receive the SIGPOLL
4625 		 * signal based on the events is ss_events arg.  If
4626 		 * ss_events is zero, remove the proc from register list.
4627 		 */
4628 	{
4629 		struct strsig *ssp, *pssp;
4630 		struct proc *proc;
4631 		struct pid  *pidp;
4632 		pid_t pid;
4633 		struct strsigset ss;
4634 
4635 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4636 		if (error)
4637 			return (error);
4638 
4639 		pid = ss.ss_pid;
4640 
4641 		if (ss.ss_events != 0) {
4642 			/*
4643 			 * Permissions check by sending signal 0.
4644 			 * Note that when kill fails it does a set_errno
4645 			 * causing the system call to fail.
4646 			 */
4647 			error = kill(pid, 0);
4648 			if (error) {
4649 				return (error);
4650 			}
4651 		}
4652 		mutex_enter(&pidlock);
4653 		if (pid == 0)
4654 			proc = curproc;
4655 		else if (pid < 0)
4656 			proc = pgfind(-pid);
4657 		else
4658 			proc = prfind(pid);
4659 		if (proc == NULL) {
4660 			mutex_exit(&pidlock);
4661 			return (ESRCH);
4662 		}
4663 		if (pid < 0)
4664 			pidp = proc->p_pgidp;
4665 		else
4666 			pidp = proc->p_pidp;
4667 		ASSERT(pidp);
4668 		/*
4669 		 * Get a hold on the pid structure while referencing it.
4670 		 * There is a separate PID_HOLD should it be inserted
4671 		 * in the list below.
4672 		 */
4673 		PID_HOLD(pidp);
4674 		mutex_exit(&pidlock);
4675 
4676 		pssp = NULL;
4677 		/*
4678 		 * Hold sd_lock to prevent traversal of sd_siglist while
4679 		 * it is modified.
4680 		 */
4681 		mutex_enter(&stp->sd_lock);
4682 		for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4683 				pssp = ssp, ssp = ssp->ss_next)
4684 			;
4685 
4686 		if (ss.ss_events) {
4687 			if (ss.ss_events &
4688 			    ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4689 			    S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4690 				mutex_exit(&stp->sd_lock);
4691 				mutex_enter(&pidlock);
4692 				PID_RELE(pidp);
4693 				mutex_exit(&pidlock);
4694 				return (EINVAL);
4695 			}
4696 			if ((ss.ss_events & S_BANDURG) &&
4697 			    !(ss.ss_events & S_RDBAND)) {
4698 				mutex_exit(&stp->sd_lock);
4699 				mutex_enter(&pidlock);
4700 				PID_RELE(pidp);
4701 				mutex_exit(&pidlock);
4702 				return (EINVAL);
4703 			}
4704 
4705 			/*
4706 			 * If proc not already registered, add it
4707 			 * to list.
4708 			 */
4709 			if (!ssp) {
4710 				ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4711 				ssp->ss_pidp = pidp;
4712 				ssp->ss_pid = pid;
4713 				ssp->ss_next = NULL;
4714 				if (pssp)
4715 					pssp->ss_next = ssp;
4716 				else
4717 					stp->sd_siglist = ssp;
4718 				mutex_enter(&pidlock);
4719 				PID_HOLD(pidp);
4720 				mutex_exit(&pidlock);
4721 			}
4722 
4723 			/*
4724 			 * Set events.
4725 			 */
4726 			ssp->ss_events = ss.ss_events;
4727 		} else {
4728 			/*
4729 			 * Remove proc from register list.
4730 			 */
4731 			if (ssp) {
4732 				mutex_enter(&pidlock);
4733 				PID_RELE(pidp);
4734 				mutex_exit(&pidlock);
4735 				if (pssp)
4736 					pssp->ss_next = ssp->ss_next;
4737 				else
4738 					stp->sd_siglist = ssp->ss_next;
4739 				kmem_free(ssp, sizeof (strsig_t));
4740 			} else {
4741 				mutex_exit(&stp->sd_lock);
4742 				mutex_enter(&pidlock);
4743 				PID_RELE(pidp);
4744 				mutex_exit(&pidlock);
4745 				return (EINVAL);
4746 			}
4747 		}
4748 
4749 		/*
4750 		 * Recalculate OR of sig events.
4751 		 */
4752 		stp->sd_sigflags = 0;
4753 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4754 			stp->sd_sigflags |= ssp->ss_events;
4755 		mutex_exit(&stp->sd_lock);
4756 		mutex_enter(&pidlock);
4757 		PID_RELE(pidp);
4758 		mutex_exit(&pidlock);
4759 		return (0);
4760 	    }
4761 
4762 	case I_EGETSIG:
4763 		/*
4764 		 * Return (in arg) the current registration of events
4765 		 * for which the calling proc is to be signaled.
4766 		 */
4767 	    {
4768 		struct strsig *ssp;
4769 		struct proc *proc;
4770 		pid_t pid;
4771 		struct pid  *pidp;
4772 		struct strsigset ss;
4773 
4774 		error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4775 		if (error)
4776 			return (error);
4777 
4778 		pid = ss.ss_pid;
4779 		mutex_enter(&pidlock);
4780 		if (pid == 0)
4781 			proc = curproc;
4782 		else if (pid < 0)
4783 			proc = pgfind(-pid);
4784 		else
4785 			proc = prfind(pid);
4786 		if (proc == NULL) {
4787 			mutex_exit(&pidlock);
4788 			return (ESRCH);
4789 		}
4790 		if (pid < 0)
4791 			pidp = proc->p_pgidp;
4792 		else
4793 			pidp = proc->p_pidp;
4794 
4795 		/* Prevent the pidp from being reassigned */
4796 		PID_HOLD(pidp);
4797 		mutex_exit(&pidlock);
4798 
4799 		mutex_enter(&stp->sd_lock);
4800 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4801 			if (ssp->ss_pid == pid) {
4802 				ss.ss_pid = ssp->ss_pid;
4803 				ss.ss_events = ssp->ss_events;
4804 				error = strcopyout(&ss, (void *)arg,
4805 				    sizeof (struct strsigset), copyflag);
4806 				mutex_exit(&stp->sd_lock);
4807 				mutex_enter(&pidlock);
4808 				PID_RELE(pidp);
4809 				mutex_exit(&pidlock);
4810 				return (error);
4811 			}
4812 		mutex_exit(&stp->sd_lock);
4813 		mutex_enter(&pidlock);
4814 		PID_RELE(pidp);
4815 		mutex_exit(&pidlock);
4816 		return (EINVAL);
4817 	    }
4818 
4819 	case I_PEEK:
4820 	    {
4821 		STRUCT_DECL(strpeek, strpeek);
4822 		size_t n;
4823 		mblk_t *fmp, *tmp_mp = NULL;
4824 
4825 		STRUCT_INIT(strpeek, flag);
4826 
4827 		error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4828 		    STRUCT_SIZE(strpeek), copyflag);
4829 		if (error)
4830 			return (error);
4831 
4832 		mutex_enter(QLOCK(rdq));
4833 		/*
4834 		 * Skip the invalid messages
4835 		 */
4836 		for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4837 			if (mp->b_datap->db_type != M_SIG)
4838 				break;
4839 
4840 		/*
4841 		 * If user has requested to peek at a high priority message
4842 		 * and first message is not, return 0
4843 		 */
4844 		if (mp != NULL) {
4845 			if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
4846 			    queclass(mp) == QNORM) {
4847 				*rvalp = 0;
4848 				mutex_exit(QLOCK(rdq));
4849 				return (0);
4850 			}
4851 		} else if (stp->sd_struiordq == NULL ||
4852 		    (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
4853 			/*
4854 			 * No mblks to look at at the streamhead and
4855 			 * 1). This isn't a synch stream or
4856 			 * 2). This is a synch stream but caller wants high
4857 			 *	priority messages which is not supported by
4858 			 *	the synch stream. (it only supports QNORM)
4859 			 */
4860 			*rvalp = 0;
4861 			mutex_exit(QLOCK(rdq));
4862 			return (0);
4863 		}
4864 
4865 		fmp = mp;
4866 
4867 		if (mp && mp->b_datap->db_type == M_PASSFP) {
4868 			mutex_exit(QLOCK(rdq));
4869 			return (EBADMSG);
4870 		}
4871 
4872 		ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
4873 		    mp->b_datap->db_type == M_PROTO ||
4874 		    mp->b_datap->db_type == M_DATA);
4875 
4876 		if (mp && mp->b_datap->db_type == M_PCPROTO) {
4877 			STRUCT_FSET(strpeek, flags, RS_HIPRI);
4878 		} else {
4879 			STRUCT_FSET(strpeek, flags, 0);
4880 		}
4881 
4882 
4883 		if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
4884 			mutex_exit(QLOCK(rdq));
4885 			return (ENOSR);
4886 		}
4887 		mutex_exit(QLOCK(rdq));
4888 
4889 		/*
4890 		 * set mp = tmp_mp, so that I_PEEK processing can continue.
4891 		 * tmp_mp is used to free the dup'd message.
4892 		 */
4893 		mp = tmp_mp;
4894 
4895 		uio.uio_fmode = 0;
4896 		uio.uio_extflg = UIO_COPY_CACHED;
4897 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
4898 		    UIO_SYSSPACE;
4899 		uio.uio_limit = 0;
4900 		/*
4901 		 * First process PROTO blocks, if any.
4902 		 * If user doesn't want to get ctl info by setting maxlen <= 0,
4903 		 * then set len to -1/0 and skip control blocks part.
4904 		 */
4905 		if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
4906 			STRUCT_FSET(strpeek, ctlbuf.len, -1);
4907 		else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
4908 			STRUCT_FSET(strpeek, ctlbuf.len, 0);
4909 		else {
4910 			int	ctl_part = 0;
4911 
4912 			iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
4913 			iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
4914 			uio.uio_iov = &iov;
4915 			uio.uio_resid = iov.iov_len;
4916 			uio.uio_loffset = 0;
4917 			uio.uio_iovcnt = 1;
4918 			while (mp && mp->b_datap->db_type != M_DATA &&
4919 			    uio.uio_resid >= 0) {
4920 				ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
4921 				    mp->b_datap->db_type == M_PROTO :
4922 				    mp->b_datap->db_type == M_PCPROTO);
4923 
4924 				if ((n = MIN(uio.uio_resid,
4925 				    mp->b_wptr - mp->b_rptr)) != 0 &&
4926 				    (error = uiomove((char *)mp->b_rptr, n,
4927 				    UIO_READ, &uio)) != 0) {
4928 					freemsg(tmp_mp);
4929 					return (error);
4930 				}
4931 				ctl_part = 1;
4932 				mp = mp->b_cont;
4933 			}
4934 			/* No ctl message */
4935 			if (ctl_part == 0)
4936 				STRUCT_FSET(strpeek, ctlbuf.len, -1);
4937 			else
4938 				STRUCT_FSET(strpeek, ctlbuf.len,
4939 				    STRUCT_FGET(strpeek, ctlbuf.maxlen) -
4940 				    uio.uio_resid);
4941 		}
4942 
4943 		/*
4944 		 * Now process DATA blocks, if any.
4945 		 * If user doesn't want to get data info by setting maxlen <= 0,
4946 		 * then set len to -1/0 and skip data blocks part.
4947 		 */
4948 		if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
4949 			STRUCT_FSET(strpeek, databuf.len, -1);
4950 		else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
4951 			STRUCT_FSET(strpeek, databuf.len, 0);
4952 		else {
4953 			int	data_part = 0;
4954 
4955 			iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
4956 			iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
4957 			uio.uio_iov = &iov;
4958 			uio.uio_resid = iov.iov_len;
4959 			uio.uio_loffset = 0;
4960 			uio.uio_iovcnt = 1;
4961 			while (mp && uio.uio_resid) {
4962 				if (mp->b_datap->db_type == M_DATA) {
4963 					if ((n = MIN(uio.uio_resid,
4964 					    mp->b_wptr - mp->b_rptr)) != 0 &&
4965 					    (error = uiomove((char *)mp->b_rptr,
4966 						n, UIO_READ, &uio)) != 0) {
4967 						freemsg(tmp_mp);
4968 						return (error);
4969 					}
4970 					data_part = 1;
4971 				}
4972 				ASSERT(data_part == 0 ||
4973 				    mp->b_datap->db_type == M_DATA);
4974 				mp = mp->b_cont;
4975 			}
4976 			/* No data message */
4977 			if (data_part == 0)
4978 				STRUCT_FSET(strpeek, databuf.len, -1);
4979 			else
4980 				STRUCT_FSET(strpeek, databuf.len,
4981 				    STRUCT_FGET(strpeek, databuf.maxlen) -
4982 				    uio.uio_resid);
4983 		}
4984 		freemsg(tmp_mp);
4985 
4986 		/*
4987 		 * It is a synch stream and user wants to get
4988 		 * data (maxlen > 0).
4989 		 * uio setup is done by the codes that process DATA
4990 		 * blocks above.
4991 		 */
4992 		if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
4993 			infod_t infod;
4994 
4995 			infod.d_cmd = INFOD_COPYOUT;
4996 			infod.d_res = 0;
4997 			infod.d_uiop = &uio;
4998 			error = infonext(rdq, &infod);
4999 			if (error == EINVAL || error == EBUSY)
5000 				error = 0;
5001 			if (error)
5002 				return (error);
5003 			STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
5004 			    databuf.maxlen) - uio.uio_resid);
5005 			if (STRUCT_FGET(strpeek, databuf.len) == 0) {
5006 				/*
5007 				 * No data found by the infonext().
5008 				 */
5009 				STRUCT_FSET(strpeek, databuf.len, -1);
5010 			}
5011 		}
5012 		error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
5013 		    STRUCT_SIZE(strpeek), copyflag);
5014 		if (error) {
5015 			return (error);
5016 		}
5017 		/*
5018 		 * If there is no message retrieved, set return code to 0
5019 		 * otherwise, set it to 1.
5020 		 */
5021 		if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
5022 		    STRUCT_FGET(strpeek, databuf.len) == -1)
5023 			*rvalp = 0;
5024 		else
5025 			*rvalp = 1;
5026 		return (0);
5027 	    }
5028 
5029 	case I_FDINSERT:
5030 	    {
5031 		STRUCT_DECL(strfdinsert, strfdinsert);
5032 		struct file *resftp;
5033 		struct stdata *resstp;
5034 		t_uscalar_t	ival;
5035 		ssize_t msgsize;
5036 		struct strbuf mctl;
5037 
5038 		STRUCT_INIT(strfdinsert, flag);
5039 		if (stp->sd_flag & STRHUP)
5040 			return (ENXIO);
5041 		/*
5042 		 * STRDERR, STWRERR and STPLEX tested above.
5043 		 */
5044 		error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5045 		    STRUCT_SIZE(strfdinsert), copyflag);
5046 		if (error)
5047 			return (error);
5048 
5049 		if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5050 		    (STRUCT_FGET(strfdinsert, offset) %
5051 		    sizeof (t_uscalar_t)) != 0)
5052 			return (EINVAL);
5053 		if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5054 			if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5055 				releasef(STRUCT_FGET(strfdinsert, fildes));
5056 				return (EINVAL);
5057 			}
5058 		} else
5059 			return (EINVAL);
5060 
5061 		mutex_enter(&resstp->sd_lock);
5062 		if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5063 			error = strgeterr(resstp,
5064 					STRDERR|STWRERR|STRHUP|STPLEX, 0);
5065 			if (error != 0) {
5066 				mutex_exit(&resstp->sd_lock);
5067 				releasef(STRUCT_FGET(strfdinsert, fildes));
5068 				return (error);
5069 			}
5070 		}
5071 		mutex_exit(&resstp->sd_lock);
5072 
5073 #ifdef	_ILP32
5074 		{
5075 			queue_t	*q;
5076 			queue_t	*mate = NULL;
5077 
5078 			/* get read queue of stream terminus */
5079 			claimstr(resstp->sd_wrq);
5080 			for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5081 			    q = q->q_next)
5082 				if (!STRMATED(resstp) && STREAM(q) != resstp &&
5083 				    mate == NULL) {
5084 					ASSERT(q->q_qinfo->qi_srvp);
5085 					ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5086 					claimstr(q);
5087 					mate = q;
5088 				}
5089 			q = _RD(q);
5090 			if (mate)
5091 				releasestr(mate);
5092 			releasestr(resstp->sd_wrq);
5093 			ival = (t_uscalar_t)q;
5094 		}
5095 #else
5096 		ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5097 #endif	/* _ILP32 */
5098 
5099 		if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5100 		    STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5101 			releasef(STRUCT_FGET(strfdinsert, fildes));
5102 			return (EINVAL);
5103 		}
5104 
5105 		/*
5106 		 * Check for legal flag value.
5107 		 */
5108 		if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5109 			releasef(STRUCT_FGET(strfdinsert, fildes));
5110 			return (EINVAL);
5111 		}
5112 
5113 		/* get these values from those cached in the stream head */
5114 		mutex_enter(QLOCK(stp->sd_wrq));
5115 		rmin = stp->sd_qn_minpsz;
5116 		rmax = stp->sd_qn_maxpsz;
5117 		mutex_exit(QLOCK(stp->sd_wrq));
5118 
5119 		/*
5120 		 * Make sure ctl and data sizes together fall within
5121 		 * the limits of the max and min receive packet sizes
5122 		 * and do not exceed system limit.  A negative data
5123 		 * length means that no data part is to be sent.
5124 		 */
5125 		ASSERT((rmax >= 0) || (rmax == INFPSZ));
5126 		if (rmax == 0) {
5127 			releasef(STRUCT_FGET(strfdinsert, fildes));
5128 			return (ERANGE);
5129 		}
5130 		if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5131 			msgsize = 0;
5132 		if ((msgsize < rmin) ||
5133 		    ((msgsize > rmax) && (rmax != INFPSZ)) ||
5134 		    (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5135 			releasef(STRUCT_FGET(strfdinsert, fildes));
5136 			return (ERANGE);
5137 		}
5138 
5139 		mutex_enter(&stp->sd_lock);
5140 		while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5141 		    !canputnext(stp->sd_wrq)) {
5142 			if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5143 			    flag, -1, &done)) != 0 || done) {
5144 				mutex_exit(&stp->sd_lock);
5145 				releasef(STRUCT_FGET(strfdinsert, fildes));
5146 				return (error);
5147 			}
5148 			if (stp->sd_sidp != NULL &&
5149 			    stp->sd_vnode->v_type != VFIFO) {
5150 				mutex_exit(&stp->sd_lock);
5151 				if (error = straccess(stp, access)) {
5152 					releasef(
5153 					    STRUCT_FGET(strfdinsert, fildes));
5154 					return (error);
5155 				}
5156 				mutex_enter(&stp->sd_lock);
5157 			}
5158 		}
5159 		mutex_exit(&stp->sd_lock);
5160 
5161 		/*
5162 		 * Copy strfdinsert.ctlbuf into native form of
5163 		 * ctlbuf to pass down into strmakemsg().
5164 		 */
5165 		mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5166 		mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5167 		mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5168 
5169 		iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5170 		iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5171 		uio.uio_iov = &iov;
5172 		uio.uio_iovcnt = 1;
5173 		uio.uio_loffset = 0;
5174 		uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5175 		    UIO_SYSSPACE;
5176 		uio.uio_fmode = 0;
5177 		uio.uio_extflg = UIO_COPY_CACHED;
5178 		uio.uio_resid = iov.iov_len;
5179 		if ((error = strmakemsg(&mctl,
5180 		    &msgsize, &uio, stp,
5181 		    STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5182 			STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5183 			releasef(STRUCT_FGET(strfdinsert, fildes));
5184 			return (error);
5185 		}
5186 
5187 		STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5188 
5189 		/*
5190 		 * Place the possibly reencoded queue pointer 'offset' bytes
5191 		 * from the start of the control portion of the message.
5192 		 */
5193 		*((t_uscalar_t *)(mp->b_rptr +
5194 		    STRUCT_FGET(strfdinsert, offset))) = ival;
5195 
5196 		/*
5197 		 * Put message downstream.
5198 		 */
5199 		stream_willservice(stp);
5200 		putnext(stp->sd_wrq, mp);
5201 		stream_runservice(stp);
5202 		releasef(STRUCT_FGET(strfdinsert, fildes));
5203 		return (error);
5204 	    }
5205 
5206 	case I_SENDFD:
5207 	    {
5208 		struct file *fp;
5209 
5210 		if ((fp = getf((int)arg)) == NULL)
5211 			return (EBADF);
5212 		error = do_sendfp(stp, fp, crp);
5213 #ifdef C2_AUDIT
5214 		if (audit_active) {
5215 			audit_fdsend((int)arg, fp, error);
5216 		}
5217 #endif
5218 		releasef((int)arg);
5219 		return (error);
5220 	    }
5221 
5222 	case I_RECVFD:
5223 	case I_E_RECVFD:
5224 	    {
5225 		struct k_strrecvfd *srf;
5226 		int i, fd;
5227 
5228 		mutex_enter(&stp->sd_lock);
5229 		while (!(mp = getq(rdq))) {
5230 			if (stp->sd_flag & (STRHUP|STREOF)) {
5231 				mutex_exit(&stp->sd_lock);
5232 				return (ENXIO);
5233 			}
5234 			if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5235 			    flag, -1, &done)) != 0 || done) {
5236 				mutex_exit(&stp->sd_lock);
5237 				return (error);
5238 			}
5239 			if (stp->sd_sidp != NULL &&
5240 			    stp->sd_vnode->v_type != VFIFO) {
5241 				mutex_exit(&stp->sd_lock);
5242 				if (error = straccess(stp, access))
5243 					return (error);
5244 				mutex_enter(&stp->sd_lock);
5245 			}
5246 		}
5247 		if (mp->b_datap->db_type != M_PASSFP) {
5248 			putback(stp, rdq, mp, mp->b_band);
5249 			mutex_exit(&stp->sd_lock);
5250 			return (EBADMSG);
5251 		}
5252 		mutex_exit(&stp->sd_lock);
5253 
5254 		srf = (struct k_strrecvfd *)mp->b_rptr;
5255 		if ((fd = ufalloc(0)) == -1) {
5256 			mutex_enter(&stp->sd_lock);
5257 			putback(stp, rdq, mp, mp->b_band);
5258 			mutex_exit(&stp->sd_lock);
5259 			return (EMFILE);
5260 		}
5261 		if (cmd == I_RECVFD) {
5262 			struct o_strrecvfd	ostrfd;
5263 
5264 			/* check to see if uid/gid values are too large. */
5265 
5266 			if (srf->uid > (o_uid_t)USHRT_MAX ||
5267 			    srf->gid > (o_gid_t)USHRT_MAX) {
5268 				mutex_enter(&stp->sd_lock);
5269 				putback(stp, rdq, mp, mp->b_band);
5270 				mutex_exit(&stp->sd_lock);
5271 				setf(fd, NULL);	/* release fd entry */
5272 				return (EOVERFLOW);
5273 			}
5274 
5275 			ostrfd.fd = fd;
5276 			ostrfd.uid = (o_uid_t)srf->uid;
5277 			ostrfd.gid = (o_gid_t)srf->gid;
5278 
5279 			/* Null the filler bits */
5280 			for (i = 0; i < 8; i++)
5281 				ostrfd.fill[i] = 0;
5282 
5283 			error = strcopyout(&ostrfd, (void *)arg,
5284 			    sizeof (struct o_strrecvfd), copyflag);
5285 		} else {		/* I_E_RECVFD */
5286 			struct strrecvfd	strfd;
5287 
5288 			strfd.fd = fd;
5289 			strfd.uid = srf->uid;
5290 			strfd.gid = srf->gid;
5291 
5292 			/* null the filler bits */
5293 			for (i = 0; i < 8; i++)
5294 				strfd.fill[i] = 0;
5295 
5296 			error = strcopyout(&strfd, (void *)arg,
5297 			    sizeof (struct strrecvfd), copyflag);
5298 		}
5299 
5300 		if (error) {
5301 			setf(fd, NULL);	/* release fd entry */
5302 			mutex_enter(&stp->sd_lock);
5303 			putback(stp, rdq, mp, mp->b_band);
5304 			mutex_exit(&stp->sd_lock);
5305 			return (error);
5306 		}
5307 #ifdef C2_AUDIT
5308 		if (audit_active) {
5309 			audit_fdrecv(fd, srf->fp);
5310 		}
5311 #endif
5312 
5313 		/*
5314 		 * Always increment f_count since the freemsg() below will
5315 		 * always call free_passfp() which performs a closef().
5316 		 */
5317 		mutex_enter(&srf->fp->f_tlock);
5318 		srf->fp->f_count++;
5319 		mutex_exit(&srf->fp->f_tlock);
5320 		setf(fd, srf->fp);
5321 		freemsg(mp);
5322 		return (0);
5323 	    }
5324 
5325 	case I_SWROPT:
5326 		/*
5327 		 * Set/clear the write options. arg is a bit
5328 		 * mask with any of the following bits set...
5329 		 * 	SNDZERO - send zero length message
5330 		 *	SNDPIPE - send sigpipe to process if
5331 		 *		sd_werror is set and process is
5332 		 *		doing a write or putmsg.
5333 		 * The new stream head write options should reflect
5334 		 * what is in arg.
5335 		 */
5336 		if (arg & ~(SNDZERO|SNDPIPE))
5337 			return (EINVAL);
5338 
5339 		mutex_enter(&stp->sd_lock);
5340 		stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5341 		if (arg & SNDZERO)
5342 			stp->sd_wput_opt |= SW_SNDZERO;
5343 		if (arg & SNDPIPE)
5344 			stp->sd_wput_opt |= SW_SIGPIPE;
5345 		mutex_exit(&stp->sd_lock);
5346 		return (0);
5347 
5348 	case I_GWROPT:
5349 	    {
5350 		int wropt = 0;
5351 
5352 		if (stp->sd_wput_opt & SW_SNDZERO)
5353 			wropt |= SNDZERO;
5354 		if (stp->sd_wput_opt & SW_SIGPIPE)
5355 			wropt |= SNDPIPE;
5356 		return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5357 		    copyflag));
5358 	    }
5359 
5360 	case I_LIST:
5361 		/*
5362 		 * Returns all the modules found on this stream,
5363 		 * upto the driver. If argument is NULL, return the
5364 		 * number of modules (including driver). If argument
5365 		 * is not NULL, copy the names into the structure
5366 		 * provided.
5367 		 */
5368 
5369 	    {
5370 		queue_t *q;
5371 		int num_modules, space_allocated;
5372 		STRUCT_DECL(str_list, strlist);
5373 		struct str_mlist *mlist_ptr;
5374 
5375 		if (arg == NULL) { /* Return number of modules plus driver */
5376 			q = stp->sd_wrq;
5377 			if (stp->sd_vnode->v_type == VFIFO) {
5378 				*rvalp = stp->sd_pushcnt;
5379 			} else {
5380 				*rvalp = stp->sd_pushcnt + 1;
5381 			}
5382 		} else {
5383 			STRUCT_INIT(strlist, flag);
5384 
5385 			error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5386 			    STRUCT_SIZE(strlist), copyflag);
5387 			if (error)
5388 				return (error);
5389 
5390 			space_allocated = STRUCT_FGET(strlist, sl_nmods);
5391 			if ((space_allocated) <= 0)
5392 				return (EINVAL);
5393 			claimstr(stp->sd_wrq);
5394 			q = stp->sd_wrq;
5395 			num_modules = 0;
5396 			while (_SAMESTR(q) && (space_allocated != 0)) {
5397 				char *name =
5398 				    q->q_next->q_qinfo->qi_minfo->mi_idname;
5399 
5400 				mlist_ptr = STRUCT_FGETP(strlist, sl_modlist);
5401 
5402 				error = strcopyout(name, mlist_ptr,
5403 				    strlen(name) + 1, copyflag);
5404 
5405 				if (error) {
5406 					releasestr(stp->sd_wrq);
5407 					return (error);
5408 				}
5409 				q = q->q_next;
5410 				space_allocated--;
5411 				num_modules++;
5412 				mlist_ptr =
5413 				    (struct str_mlist *)((uintptr_t)mlist_ptr +
5414 				    sizeof (struct str_mlist));
5415 				STRUCT_FSETP(strlist, sl_modlist, mlist_ptr);
5416 			}
5417 			releasestr(stp->sd_wrq);
5418 			error = strcopyout(&num_modules, (void *)arg,
5419 			    sizeof (int), copyflag);
5420 		}
5421 		return (error);
5422 	    }
5423 
5424 	case I_CKBAND:
5425 	    {
5426 		queue_t *q;
5427 		qband_t *qbp;
5428 
5429 		if ((arg < 0) || (arg >= NBAND))
5430 			return (EINVAL);
5431 		q = _RD(stp->sd_wrq);
5432 		mutex_enter(QLOCK(q));
5433 		if (arg > (int)q->q_nband) {
5434 			*rvalp = 0;
5435 		} else {
5436 			if (arg == 0) {
5437 				if (q->q_first)
5438 					*rvalp = 1;
5439 				else
5440 					*rvalp = 0;
5441 			} else {
5442 				qbp = q->q_bandp;
5443 				while (--arg > 0)
5444 					qbp = qbp->qb_next;
5445 				if (qbp->qb_first)
5446 					*rvalp = 1;
5447 				else
5448 					*rvalp = 0;
5449 			}
5450 		}
5451 		mutex_exit(QLOCK(q));
5452 		return (0);
5453 	    }
5454 
5455 	case I_GETBAND:
5456 	    {
5457 		int intpri;
5458 		queue_t *q;
5459 
5460 		q = _RD(stp->sd_wrq);
5461 		mutex_enter(QLOCK(q));
5462 		mp = q->q_first;
5463 		if (!mp) {
5464 			mutex_exit(QLOCK(q));
5465 			return (ENODATA);
5466 		}
5467 		intpri = (int)mp->b_band;
5468 		error = strcopyout(&intpri, (void *)arg, sizeof (int),
5469 		    copyflag);
5470 		mutex_exit(QLOCK(q));
5471 		return (error);
5472 	    }
5473 
5474 	case I_ATMARK:
5475 	    {
5476 		queue_t *q;
5477 
5478 		if (arg & ~(ANYMARK|LASTMARK))
5479 			return (EINVAL);
5480 		q = _RD(stp->sd_wrq);
5481 		mutex_enter(&stp->sd_lock);
5482 		if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5483 			*rvalp = 1;
5484 		} else {
5485 			mutex_enter(QLOCK(q));
5486 			mp = q->q_first;
5487 
5488 			if (mp == NULL)
5489 				*rvalp = 0;
5490 			else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5491 				*rvalp = 1;
5492 			else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5493 				*rvalp = 1;
5494 			else
5495 				*rvalp = 0;
5496 			mutex_exit(QLOCK(q));
5497 		}
5498 		mutex_exit(&stp->sd_lock);
5499 		return (0);
5500 	    }
5501 
5502 	case I_CANPUT:
5503 	    {
5504 		char band;
5505 
5506 		if ((arg < 0) || (arg >= NBAND))
5507 			return (EINVAL);
5508 		band = (char)arg;
5509 		*rvalp = bcanputnext(stp->sd_wrq, band);
5510 		return (0);
5511 	    }
5512 
5513 	case I_SETCLTIME:
5514 	    {
5515 		int closetime;
5516 
5517 		error = strcopyin((void *)arg, &closetime, sizeof (int),
5518 		    copyflag);
5519 		if (error)
5520 			return (error);
5521 		if (closetime < 0)
5522 			return (EINVAL);
5523 
5524 		stp->sd_closetime = closetime;
5525 		return (0);
5526 	    }
5527 
5528 	case I_GETCLTIME:
5529 	    {
5530 		int closetime;
5531 
5532 		closetime = stp->sd_closetime;
5533 		return (strcopyout(&closetime, (void *)arg, sizeof (int),
5534 		    copyflag));
5535 	    }
5536 
5537 	case TIOCGSID:
5538 	{
5539 		pid_t sid;
5540 
5541 		mutex_enter(&pidlock);
5542 		if (stp->sd_sidp == NULL) {
5543 			mutex_exit(&pidlock);
5544 			return (ENOTTY);
5545 		}
5546 		sid = stp->sd_sidp->pid_id;
5547 		mutex_exit(&pidlock);
5548 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5549 		    copyflag));
5550 	}
5551 
5552 	case TIOCSPGRP:
5553 	{
5554 		pid_t pgrp;
5555 		proc_t *q;
5556 		pid_t	sid, fg_pgid, bg_pgid;
5557 
5558 		if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5559 		    copyflag))
5560 			return (error);
5561 		mutex_enter(&stp->sd_lock);
5562 		mutex_enter(&pidlock);
5563 		if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5564 			mutex_exit(&pidlock);
5565 			mutex_exit(&stp->sd_lock);
5566 			return (ENOTTY);
5567 		}
5568 		if (pgrp == stp->sd_pgidp->pid_id) {
5569 			mutex_exit(&pidlock);
5570 			mutex_exit(&stp->sd_lock);
5571 			return (0);
5572 		}
5573 		if (pgrp <= 0 || pgrp >= maxpid) {
5574 			mutex_exit(&pidlock);
5575 			mutex_exit(&stp->sd_lock);
5576 			return (EINVAL);
5577 		}
5578 		if ((q = pgfind(pgrp)) == NULL ||
5579 		    q->p_sessp != ttoproc(curthread)->p_sessp) {
5580 			mutex_exit(&pidlock);
5581 			mutex_exit(&stp->sd_lock);
5582 			return (EPERM);
5583 		}
5584 		sid = stp->sd_sidp->pid_id;
5585 		fg_pgid = q->p_pgrp;
5586 		bg_pgid = stp->sd_pgidp->pid_id;
5587 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5588 		PID_RELE(stp->sd_pgidp);
5589 		stp->sd_pgidp = q->p_pgidp;
5590 		PID_HOLD(stp->sd_pgidp);
5591 		mutex_exit(&pidlock);
5592 		mutex_exit(&stp->sd_lock);
5593 		return (0);
5594 	}
5595 
5596 	case TIOCGPGRP:
5597 	{
5598 		pid_t pgrp;
5599 
5600 		mutex_enter(&pidlock);
5601 		if (stp->sd_sidp == NULL) {
5602 			mutex_exit(&pidlock);
5603 			return (ENOTTY);
5604 		}
5605 		pgrp = stp->sd_pgidp->pid_id;
5606 		mutex_exit(&pidlock);
5607 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5608 		    copyflag));
5609 	}
5610 
5611 	case FIONBIO:
5612 	case FIOASYNC:
5613 		return (0);	/* handled by the upper layer */
5614 	}
5615 }
5616 
5617 /*
5618  * Custom free routine used for M_PASSFP messages.
5619  */
5620 static void
5621 free_passfp(struct k_strrecvfd *srf)
5622 {
5623 	(void) closef(srf->fp);
5624 	kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5625 }
5626 
5627 /* ARGSUSED */
5628 int
5629 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5630 {
5631 	queue_t *qp, *nextqp;
5632 	struct k_strrecvfd *srf;
5633 	mblk_t *mp;
5634 	frtn_t *frtnp;
5635 	size_t bufsize;
5636 	queue_t	*mate = NULL;
5637 	syncq_t	*sq = NULL;
5638 	int retval = 0;
5639 
5640 	if (stp->sd_flag & STRHUP)
5641 		return (ENXIO);
5642 
5643 	claimstr(stp->sd_wrq);
5644 
5645 	/* Fastpath, we have a pipe, and we are already mated, use it. */
5646 	if (STRMATED(stp)) {
5647 		qp = _RD(stp->sd_mate->sd_wrq);
5648 		claimstr(qp);
5649 		mate = qp;
5650 	} else { /* Not already mated. */
5651 
5652 		/*
5653 		 * Walk the stream to the end of this one.
5654 		 * assumes that the claimstr() will prevent
5655 		 * plumbing between the stream head and the
5656 		 * driver from changing
5657 		 */
5658 		qp = stp->sd_wrq;
5659 
5660 		/*
5661 		 * Loop until we reach the end of this stream.
5662 		 * On completion, qp points to the write queue
5663 		 * at the end of the stream, or the read queue
5664 		 * at the stream head if this is a fifo.
5665 		 */
5666 		while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5667 			;
5668 
5669 		/*
5670 		 * Just in case we get a q_next which is NULL, but
5671 		 * not at the end of the stream.  This is actually
5672 		 * broken, so we set an assert to catch it in
5673 		 * debug, and set an error and return if not debug.
5674 		 */
5675 		ASSERT(qp);
5676 		if (qp == NULL) {
5677 			releasestr(stp->sd_wrq);
5678 			return (EINVAL);
5679 		}
5680 
5681 		/*
5682 		 * Enter the syncq for the driver, so (hopefully)
5683 		 * the queue values will not change on us.
5684 		 * XXXX - This will only prevent the race IFF only
5685 		 *   the write side modifies the q_next member, and
5686 		 *   the put procedure is protected by at least
5687 		 *   MT_PERQ.
5688 		 */
5689 		if ((sq = qp->q_syncq) != NULL)
5690 			entersq(sq, SQ_PUT);
5691 
5692 		/* Now get the q_next value from this qp. */
5693 		nextqp = qp->q_next;
5694 
5695 		/*
5696 		 * If nextqp exists and the other stream is different
5697 		 * from this one claim the stream, set the mate, and
5698 		 * get the read queue at the stream head of the other
5699 		 * stream.  Assumes that nextqp was at least valid when
5700 		 * we got it.  Hopefully the entersq of the driver
5701 		 * will prevent it from changing on us.
5702 		 */
5703 		if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5704 			ASSERT(qp->q_qinfo->qi_srvp);
5705 			ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5706 			ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5707 			claimstr(nextqp);
5708 
5709 			/* Make sure we still have a q_next */
5710 			if (nextqp != qp->q_next) {
5711 				releasestr(stp->sd_wrq);
5712 				releasestr(nextqp);
5713 				return (EINVAL);
5714 			}
5715 
5716 			qp = _RD(STREAM(nextqp)->sd_wrq);
5717 			mate = qp;
5718 		}
5719 		/* If we entered the synq above, leave it. */
5720 		if (sq != NULL)
5721 			leavesq(sq, SQ_PUT);
5722 	} /*  STRMATED(STP)  */
5723 
5724 	/* XXX prevents substitution of the ops vector */
5725 	if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5726 		retval = EINVAL;
5727 		goto out;
5728 	}
5729 
5730 	if (qp->q_flag & QFULL) {
5731 		retval = EAGAIN;
5732 		goto out;
5733 	}
5734 
5735 	/*
5736 	 * Since M_PASSFP messages include a file descriptor, we use
5737 	 * esballoc() and specify a custom free routine (free_passfp()) that
5738 	 * will close the descriptor as part of freeing the message.  For
5739 	 * convenience, we stash the frtn_t right after the data block.
5740 	 */
5741 	bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5742 	srf = kmem_alloc(bufsize, KM_NOSLEEP);
5743 	if (srf == NULL) {
5744 		retval = EAGAIN;
5745 		goto out;
5746 	}
5747 
5748 	frtnp = (frtn_t *)(srf + 1);
5749 	frtnp->free_arg = (caddr_t)srf;
5750 	frtnp->free_func = free_passfp;
5751 
5752 	mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5753 	if (mp == NULL) {
5754 		kmem_free(srf, bufsize);
5755 		retval = EAGAIN;
5756 		goto out;
5757 	}
5758 	mp->b_wptr += sizeof (struct k_strrecvfd);
5759 	mp->b_datap->db_type = M_PASSFP;
5760 
5761 	srf->fp = fp;
5762 	srf->uid = crgetuid(curthread->t_cred);
5763 	srf->gid = crgetgid(curthread->t_cred);
5764 	mutex_enter(&fp->f_tlock);
5765 	fp->f_count++;
5766 	mutex_exit(&fp->f_tlock);
5767 
5768 	put(qp, mp);
5769 out:
5770 	releasestr(stp->sd_wrq);
5771 	if (mate)
5772 		releasestr(mate);
5773 	return (retval);
5774 }
5775 
5776 /*
5777  * Send an ioctl message downstream and wait for acknowledgement.
5778  * flags may be set to either U_TO_K or K_TO_K and a combination
5779  * of STR_NOERROR or STR_NOSIG
5780  * STR_NOSIG: Signals are essentially ignored or held and have
5781  *	no effect for the duration of the call.
5782  * STR_NOERROR: Ignores stream head read, write and hup errors.
5783  *	Additionally, if an existing ioctl times out, it is assumed
5784  *	lost and and this ioctl will continue as if the previous ioctl had
5785  *	finished.  ETIME may be returned if this ioctl times out (i.e.
5786  *	ic_timout is not INFTIM).  Non-stream head errors may be returned if
5787  *	the ioc_error indicates that the driver/module had problems,
5788  *	an EFAULT was found when accessing user data, a lack of
5789  * 	resources, etc.
5790  */
5791 int
5792 strdoioctl(
5793 	struct stdata *stp,
5794 	struct strioctl *strioc,
5795 	int fflags,		/* file flags with model info */
5796 	int flag,
5797 	cred_t *crp,
5798 	int *rvalp)
5799 {
5800 	mblk_t *bp;
5801 	struct iocblk *iocbp;
5802 	struct copyreq *reqp;
5803 	struct copyresp *resp;
5804 	int id;
5805 	int transparent = 0;
5806 	int error = 0;
5807 	int len = 0;
5808 	caddr_t taddr;
5809 	int copyflag = (flag & (U_TO_K | K_TO_K));
5810 	int sigflag = (flag & STR_NOSIG);
5811 	int errs;
5812 	uint_t waitflags;
5813 
5814 	ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5815 	ASSERT((fflags & FMODELS) != 0);
5816 
5817 	TRACE_2(TR_FAC_STREAMS_FR,
5818 		TR_STRDOIOCTL,
5819 		"strdoioctl:stp %p strioc %p", stp, strioc);
5820 	if (strioc->ic_len == TRANSPARENT) {	/* send arg in M_DATA block */
5821 		transparent = 1;
5822 		strioc->ic_len = sizeof (intptr_t);
5823 	}
5824 
5825 	if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5826 		return (EINVAL);
5827 
5828 	if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5829 	    crp)) == NULL)
5830 			return (error);
5831 
5832 	bzero(bp->b_wptr, sizeof (union ioctypes));
5833 
5834 	iocbp = (struct iocblk *)bp->b_wptr;
5835 	iocbp->ioc_count = strioc->ic_len;
5836 	iocbp->ioc_cmd = strioc->ic_cmd;
5837 	iocbp->ioc_flag = (fflags & FMODELS);
5838 
5839 	crhold(crp);
5840 	iocbp->ioc_cr = crp;
5841 	DB_TYPE(bp) = M_IOCTL;
5842 	DB_CPID(bp) = curproc->p_pid;
5843 	bp->b_wptr += sizeof (struct iocblk);
5844 
5845 	if (flag & STR_NOERROR)
5846 		errs = STPLEX;
5847 	else
5848 		errs = STRHUP|STRDERR|STWRERR|STPLEX;
5849 
5850 	/*
5851 	 * If there is data to copy into ioctl block, do so.
5852 	 */
5853 	if (iocbp->ioc_count > 0) {
5854 		if (transparent)
5855 			/*
5856 			 * Note: STR_NOERROR does not have an effect
5857 			 * in putiocd()
5858 			 */
5859 			id = K_TO_K | sigflag;
5860 		else
5861 			id = flag;
5862 		if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
5863 			freemsg(bp);
5864 			crfree(crp);
5865 			return (error);
5866 		}
5867 
5868 		/*
5869 		 * We could have slept copying in user pages.
5870 		 * Recheck the stream head state (the other end
5871 		 * of a pipe could have gone away).
5872 		 */
5873 		if (stp->sd_flag & errs) {
5874 			mutex_enter(&stp->sd_lock);
5875 			error = strgeterr(stp, errs, 0);
5876 			mutex_exit(&stp->sd_lock);
5877 			if (error != 0) {
5878 				freemsg(bp);
5879 				crfree(crp);
5880 				return (error);
5881 			}
5882 		}
5883 	}
5884 	if (transparent)
5885 		iocbp->ioc_count = TRANSPARENT;
5886 
5887 	/*
5888 	 * Block for up to STRTIMOUT milliseconds if there is an outstanding
5889 	 * ioctl for this stream already running.  All processes
5890 	 * sleeping here will be awakened as a result of an ACK
5891 	 * or NAK being received for the outstanding ioctl, or
5892 	 * as a result of the timer expiring on the outstanding
5893 	 * ioctl (a failure), or as a result of any waiting
5894 	 * process's timer expiring (also a failure).
5895 	 */
5896 
5897 	error = 0;
5898 	mutex_enter(&stp->sd_lock);
5899 	while (stp->sd_flag & (IOCWAIT | IOCWAITNE)) {
5900 		clock_t cv_rval;
5901 
5902 		TRACE_0(TR_FAC_STREAMS_FR,
5903 			TR_STRDOIOCTL_WAIT,
5904 			"strdoioctl sleeps - IOCWAIT");
5905 		cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
5906 		    STRTIMOUT, sigflag);
5907 		if (cv_rval <= 0) {
5908 			if (cv_rval == 0) {
5909 				error = EINTR;
5910 			} else {
5911 				if (flag & STR_NOERROR) {
5912 					/*
5913 					 * Terminating current ioctl in
5914 					 * progress -- assume it got lost and
5915 					 * wake up the other thread so that the
5916 					 * operation completes.
5917 					 */
5918 					if (!(stp->sd_flag & IOCWAITNE)) {
5919 						stp->sd_flag |= IOCWAITNE;
5920 						cv_broadcast(&stp->sd_monitor);
5921 					}
5922 					/*
5923 					 * Otherwise, there's a running
5924 					 * STR_NOERROR -- we have no choice
5925 					 * here but to wait forever (or until
5926 					 * interrupted).
5927 					 */
5928 				} else {
5929 					/*
5930 					 * pending ioctl has caused
5931 					 * us to time out
5932 					 */
5933 					error = ETIME;
5934 				}
5935 			}
5936 		} else if ((stp->sd_flag & errs)) {
5937 			error = strgeterr(stp, errs, 0);
5938 		}
5939 		if (error) {
5940 			mutex_exit(&stp->sd_lock);
5941 			freemsg(bp);
5942 			crfree(crp);
5943 			return (error);
5944 		}
5945 	}
5946 
5947 	/*
5948 	 * Have control of ioctl mechanism.
5949 	 * Send down ioctl packet and wait for response.
5950 	 */
5951 	if (stp->sd_iocblk != (mblk_t *)-1) {
5952 		freemsg(stp->sd_iocblk);
5953 	}
5954 	stp->sd_iocblk = NULL;
5955 
5956 	/*
5957 	 * If this is marked with 'noerror' (internal; mostly
5958 	 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
5959 	 * in here by setting IOCWAITNE.
5960 	 */
5961 	waitflags = IOCWAIT;
5962 	if (flag & STR_NOERROR)
5963 		waitflags |= IOCWAITNE;
5964 
5965 	stp->sd_flag |= waitflags;
5966 
5967 	/*
5968 	 * Assign sequence number.
5969 	 */
5970 	iocbp->ioc_id = stp->sd_iocid = getiocseqno();
5971 
5972 	mutex_exit(&stp->sd_lock);
5973 
5974 	TRACE_1(TR_FAC_STREAMS_FR,
5975 		TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
5976 	stream_willservice(stp);
5977 	putnext(stp->sd_wrq, bp);
5978 	stream_runservice(stp);
5979 
5980 	/*
5981 	 * Timed wait for acknowledgment.  The wait time is limited by the
5982 	 * timeout value, which must be a positive integer (number of
5983 	 * milliseconds to wait, or 0 (use default value of STRTIMOUT
5984 	 * milliseconds), or -1 (wait forever).  This will be awakened
5985 	 * either by an ACK/NAK message arriving, the timer expiring, or
5986 	 * the timer expiring on another ioctl waiting for control of the
5987 	 * mechanism.
5988 	 */
5989 waitioc:
5990 	mutex_enter(&stp->sd_lock);
5991 
5992 
5993 	/*
5994 	 * If the reply has already arrived, don't sleep.  If awakened from
5995 	 * the sleep, fail only if the reply has not arrived by then.
5996 	 * Otherwise, process the reply.
5997 	 */
5998 	while (!stp->sd_iocblk) {
5999 		clock_t cv_rval;
6000 
6001 		if (stp->sd_flag & errs) {
6002 			error = strgeterr(stp, errs, 0);
6003 			if (error != 0) {
6004 				stp->sd_flag &= ~waitflags;
6005 				cv_broadcast(&stp->sd_iocmonitor);
6006 				mutex_exit(&stp->sd_lock);
6007 				crfree(crp);
6008 				return (error);
6009 			}
6010 		}
6011 
6012 		TRACE_0(TR_FAC_STREAMS_FR,
6013 			TR_STRDOIOCTL_WAIT2,
6014 			"strdoioctl sleeps awaiting reply");
6015 		ASSERT(error == 0);
6016 
6017 		cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
6018 		    (strioc->ic_timout ?
6019 		    strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
6020 
6021 		/*
6022 		 * There are four possible cases here: interrupt, timeout,
6023 		 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
6024 		 * valid M_IOCTL reply).
6025 		 *
6026 		 * If we've been awakened by a STR_NOERROR ioctl on some other
6027 		 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
6028 		 * will be set.  Pretend as if we just timed out.  Note that
6029 		 * this other thread waited at least STRTIMOUT before trying to
6030 		 * awaken our thread, so this is indistinguishable (even for
6031 		 * INFTIM) from the case where we failed with ETIME waiting on
6032 		 * IOCWAIT in the prior loop.
6033 		 */
6034 		if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6035 		    stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6036 			cv_rval = -1;
6037 		}
6038 
6039 		/*
6040 		 * note: STR_NOERROR does not protect
6041 		 * us here.. use ic_timout < 0
6042 		 */
6043 		if (cv_rval <= 0) {
6044 			if (cv_rval == 0) {
6045 				error = EINTR;
6046 			} else {
6047 				error =  ETIME;
6048 			}
6049 			/*
6050 			 * A message could have come in after we were scheduled
6051 			 * but before we were actually run.
6052 			 */
6053 			bp = stp->sd_iocblk;
6054 			stp->sd_iocblk = NULL;
6055 			if (bp != NULL) {
6056 				if ((bp->b_datap->db_type == M_COPYIN) ||
6057 				    (bp->b_datap->db_type == M_COPYOUT)) {
6058 					mutex_exit(&stp->sd_lock);
6059 					if (bp->b_cont) {
6060 						freemsg(bp->b_cont);
6061 						bp->b_cont = NULL;
6062 					}
6063 					bp->b_datap->db_type = M_IOCDATA;
6064 					bp->b_wptr = bp->b_rptr +
6065 						sizeof (struct copyresp);
6066 					resp = (struct copyresp *)bp->b_rptr;
6067 					resp->cp_rval =
6068 					    (caddr_t)1; /* failure */
6069 					stream_willservice(stp);
6070 					putnext(stp->sd_wrq, bp);
6071 					stream_runservice(stp);
6072 					mutex_enter(&stp->sd_lock);
6073 				} else {
6074 					freemsg(bp);
6075 				}
6076 			}
6077 			stp->sd_flag &= ~waitflags;
6078 			cv_broadcast(&stp->sd_iocmonitor);
6079 			mutex_exit(&stp->sd_lock);
6080 			crfree(crp);
6081 			return (error);
6082 		}
6083 	}
6084 	bp = stp->sd_iocblk;
6085 	/*
6086 	 * Note: it is strictly impossible to get here with sd_iocblk set to
6087 	 * -1.  This is because the initial loop above doesn't allow any new
6088 	 * ioctls into the fray until all others have passed this point.
6089 	 */
6090 	ASSERT(bp != NULL && bp != (mblk_t *)-1);
6091 	TRACE_1(TR_FAC_STREAMS_FR,
6092 		TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6093 	if ((bp->b_datap->db_type == M_IOCACK) ||
6094 	    (bp->b_datap->db_type == M_IOCNAK)) {
6095 		/* for detection of duplicate ioctl replies */
6096 		stp->sd_iocblk = (mblk_t *)-1;
6097 		stp->sd_flag &= ~waitflags;
6098 		cv_broadcast(&stp->sd_iocmonitor);
6099 		mutex_exit(&stp->sd_lock);
6100 	} else {
6101 		/*
6102 		 * flags not cleared here because we're still doing
6103 		 * copy in/out for ioctl.
6104 		 */
6105 		stp->sd_iocblk = NULL;
6106 		mutex_exit(&stp->sd_lock);
6107 	}
6108 
6109 
6110 	/*
6111 	 * Have received acknowledgment.
6112 	 */
6113 
6114 	switch (bp->b_datap->db_type) {
6115 	case M_IOCACK:
6116 		/*
6117 		 * Positive ack.
6118 		 */
6119 		iocbp = (struct iocblk *)bp->b_rptr;
6120 
6121 		/*
6122 		 * Set error if indicated.
6123 		 */
6124 		if (iocbp->ioc_error) {
6125 			error = iocbp->ioc_error;
6126 			break;
6127 		}
6128 
6129 		/*
6130 		 * Set return value.
6131 		 */
6132 		*rvalp = iocbp->ioc_rval;
6133 
6134 		/*
6135 		 * Data may have been returned in ACK message (ioc_count > 0).
6136 		 * If so, copy it out to the user's buffer.
6137 		 */
6138 		if (iocbp->ioc_count && !transparent) {
6139 			if (error = getiocd(bp, strioc->ic_dp, copyflag))
6140 				break;
6141 		}
6142 		if (!transparent) {
6143 			if (len)	/* an M_COPYOUT was used with I_STR */
6144 				strioc->ic_len = len;
6145 			else
6146 				strioc->ic_len = (int)iocbp->ioc_count;
6147 		}
6148 		break;
6149 
6150 	case M_IOCNAK:
6151 		/*
6152 		 * Negative ack.
6153 		 *
6154 		 * The only thing to do is set error as specified
6155 		 * in neg ack packet.
6156 		 */
6157 		iocbp = (struct iocblk *)bp->b_rptr;
6158 
6159 		error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6160 		break;
6161 
6162 	case M_COPYIN:
6163 		/*
6164 		 * Driver or module has requested user ioctl data.
6165 		 */
6166 		reqp = (struct copyreq *)bp->b_rptr;
6167 
6168 		/*
6169 		 * M_COPYIN should *never* have a message attached, though
6170 		 * it's harmless if it does -- thus, panic on a DEBUG
6171 		 * kernel and just free it on a non-DEBUG build.
6172 		 */
6173 		ASSERT(bp->b_cont == NULL);
6174 		if (bp->b_cont != NULL) {
6175 			freemsg(bp->b_cont);
6176 			bp->b_cont = NULL;
6177 		}
6178 
6179 		error = putiocd(bp, reqp->cq_addr, flag, crp);
6180 		if (error && bp->b_cont) {
6181 			freemsg(bp->b_cont);
6182 			bp->b_cont = NULL;
6183 		}
6184 
6185 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6186 		bp->b_datap->db_type = M_IOCDATA;
6187 
6188 		mblk_setcred(bp, crp);
6189 		DB_CPID(bp) = curproc->p_pid;
6190 		resp = (struct copyresp *)bp->b_rptr;
6191 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6192 		resp->cp_flag = (fflags & FMODELS);
6193 
6194 		stream_willservice(stp);
6195 		putnext(stp->sd_wrq, bp);
6196 		stream_runservice(stp);
6197 
6198 		if (error) {
6199 			mutex_enter(&stp->sd_lock);
6200 			stp->sd_flag &= ~waitflags;
6201 			cv_broadcast(&stp->sd_iocmonitor);
6202 			mutex_exit(&stp->sd_lock);
6203 			crfree(crp);
6204 			return (error);
6205 		}
6206 
6207 		goto waitioc;
6208 
6209 	case M_COPYOUT:
6210 		/*
6211 		 * Driver or module has ioctl data for a user.
6212 		 */
6213 		reqp = (struct copyreq *)bp->b_rptr;
6214 		ASSERT(bp->b_cont != NULL);
6215 
6216 		/*
6217 		 * Always (transparent or non-transparent )
6218 		 * use the address specified in the request
6219 		 */
6220 		taddr = reqp->cq_addr;
6221 		if (!transparent)
6222 			len = (int)reqp->cq_size;
6223 
6224 		/* copyout data to the provided address */
6225 		error = getiocd(bp, taddr, copyflag);
6226 
6227 		freemsg(bp->b_cont);
6228 		bp->b_cont = NULL;
6229 
6230 		bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6231 		bp->b_datap->db_type = M_IOCDATA;
6232 
6233 		mblk_setcred(bp, crp);
6234 		DB_CPID(bp) = curproc->p_pid;
6235 		resp = (struct copyresp *)bp->b_rptr;
6236 		resp->cp_rval = (caddr_t)(uintptr_t)error;
6237 		resp->cp_flag = (fflags & FMODELS);
6238 
6239 		stream_willservice(stp);
6240 		putnext(stp->sd_wrq, bp);
6241 		stream_runservice(stp);
6242 
6243 		if (error) {
6244 			mutex_enter(&stp->sd_lock);
6245 			stp->sd_flag &= ~waitflags;
6246 			cv_broadcast(&stp->sd_iocmonitor);
6247 			mutex_exit(&stp->sd_lock);
6248 			crfree(crp);
6249 			return (error);
6250 		}
6251 		goto waitioc;
6252 
6253 	default:
6254 		ASSERT(0);
6255 		mutex_enter(&stp->sd_lock);
6256 		stp->sd_flag &= ~waitflags;
6257 		cv_broadcast(&stp->sd_iocmonitor);
6258 		mutex_exit(&stp->sd_lock);
6259 		break;
6260 	}
6261 
6262 	freemsg(bp);
6263 	crfree(crp);
6264 	return (error);
6265 }
6266 
6267 /*
6268  * For the SunOS keyboard driver.
6269  * Return the next available "ioctl" sequence number.
6270  * Exported, so that streams modules can send "ioctl" messages
6271  * downstream from their open routine.
6272  */
6273 int
6274 getiocseqno(void)
6275 {
6276 	int	i;
6277 
6278 	mutex_enter(&strresources);
6279 	i = ++ioc_id;
6280 	mutex_exit(&strresources);
6281 	return (i);
6282 }
6283 
6284 /*
6285  * Get the next message from the read queue.  If the message is
6286  * priority, STRPRI will have been set by strrput().  This flag
6287  * should be reset only when the entire message at the front of the
6288  * queue as been consumed.
6289  *
6290  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6291  */
6292 int
6293 strgetmsg(
6294 	struct vnode *vp,
6295 	struct strbuf *mctl,
6296 	struct strbuf *mdata,
6297 	unsigned char *prip,
6298 	int *flagsp,
6299 	int fmode,
6300 	rval_t *rvp)
6301 {
6302 	struct stdata *stp;
6303 	mblk_t *bp, *nbp;
6304 	mblk_t *savemp = NULL;
6305 	mblk_t *savemptail = NULL;
6306 	uint_t old_sd_flag;
6307 	int flg;
6308 	int more = 0;
6309 	int error = 0;
6310 	char first = 1;
6311 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6312 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6313 	unsigned char pri = 0;
6314 	queue_t *q;
6315 	int	pr = 0;			/* Partial read successful */
6316 	struct uio uios;
6317 	struct uio *uiop = &uios;
6318 	struct iovec iovs;
6319 	unsigned char type;
6320 
6321 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6322 		"strgetmsg:%p", vp);
6323 
6324 	ASSERT(vp->v_stream);
6325 	stp = vp->v_stream;
6326 	rvp->r_val1 = 0;
6327 
6328 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6329 		if (error = straccess(stp, JCREAD))
6330 			return (error);
6331 
6332 	/* Fast check of flags before acquiring the lock */
6333 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6334 		mutex_enter(&stp->sd_lock);
6335 		error = strgeterr(stp, STRDERR|STPLEX, 0);
6336 		mutex_exit(&stp->sd_lock);
6337 		if (error != 0)
6338 			return (error);
6339 	}
6340 
6341 	switch (*flagsp) {
6342 	case MSG_HIPRI:
6343 		if (*prip != 0)
6344 			return (EINVAL);
6345 		break;
6346 
6347 	case MSG_ANY:
6348 	case MSG_BAND:
6349 		break;
6350 
6351 	default:
6352 		return (EINVAL);
6353 	}
6354 	/*
6355 	 * Setup uio and iov for data part
6356 	 */
6357 	iovs.iov_base = mdata->buf;
6358 	iovs.iov_len = mdata->maxlen;
6359 	uios.uio_iov = &iovs;
6360 	uios.uio_iovcnt = 1;
6361 	uios.uio_loffset = 0;
6362 	uios.uio_segflg = UIO_USERSPACE;
6363 	uios.uio_fmode = 0;
6364 	uios.uio_extflg = UIO_COPY_CACHED;
6365 	uios.uio_resid = mdata->maxlen;
6366 	uios.uio_offset = 0;
6367 
6368 	q = _RD(stp->sd_wrq);
6369 	mutex_enter(&stp->sd_lock);
6370 	old_sd_flag = stp->sd_flag;
6371 	mark = 0;
6372 	for (;;) {
6373 		int done = 0;
6374 		mblk_t *q_first = q->q_first;
6375 
6376 		/*
6377 		 * Get the next message of appropriate priority
6378 		 * from the stream head.  If the caller is interested
6379 		 * in band or hipri messages, then they should already
6380 		 * be enqueued at the stream head.  On the other hand
6381 		 * if the caller wants normal (band 0) messages, they
6382 		 * might be deferred in a synchronous stream and they
6383 		 * will need to be pulled up.
6384 		 *
6385 		 * After we have dequeued a message, we might find that
6386 		 * it was a deferred M_SIG that was enqueued at the
6387 		 * stream head.  It must now be posted as part of the
6388 		 * read by calling strsignal_nolock().
6389 		 *
6390 		 * Also note that strrput does not enqueue an M_PCSIG,
6391 		 * and there cannot be more than one hipri message,
6392 		 * so there was no need to have the M_PCSIG case.
6393 		 *
6394 		 * At some time it might be nice to try and wrap the
6395 		 * functionality of kstrgetmsg() and strgetmsg() into
6396 		 * a common routine so to reduce the amount of replicated
6397 		 * code (since they are extremely similar).
6398 		 */
6399 		if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6400 			/* Asking for normal, band0 data */
6401 			bp = strget(stp, q, uiop, first, &error);
6402 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6403 			if (bp != NULL) {
6404 				if (bp->b_datap->db_type == M_SIG) {
6405 					strsignal_nolock(stp, *bp->b_rptr,
6406 					    (int32_t)bp->b_band);
6407 					continue;
6408 				} else {
6409 					break;
6410 				}
6411 			}
6412 			if (error != 0) {
6413 				goto getmout;
6414 			}
6415 
6416 		/*
6417 		 * We can't depend on the value of STRPRI here because
6418 		 * the stream head may be in transit. Therefore, we
6419 		 * must look at the type of the first message to
6420 		 * determine if a high priority messages is waiting
6421 		 */
6422 		} else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6423 			    q_first->b_datap->db_type >= QPCTL &&
6424 			    (bp = getq_noenab(q)) != NULL) {
6425 			/* Asked for HIPRI and got one */
6426 			ASSERT(bp->b_datap->db_type >= QPCTL);
6427 			break;
6428 		} else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6429 			    ((q_first->b_band >= *prip) ||
6430 			    q_first->b_datap->db_type >= QPCTL) &&
6431 			    (bp = getq_noenab(q)) != NULL) {
6432 			/*
6433 			 * Asked for at least band "prip" and got either at
6434 			 * least that band or a hipri message.
6435 			 */
6436 			ASSERT(bp->b_band >= *prip ||
6437 				bp->b_datap->db_type >= QPCTL);
6438 			if (bp->b_datap->db_type == M_SIG) {
6439 				strsignal_nolock(stp, *bp->b_rptr,
6440 				    (int32_t)bp->b_band);
6441 				continue;
6442 			} else {
6443 				break;
6444 			}
6445 		}
6446 
6447 		/* No data. Time to sleep? */
6448 		qbackenable(q, 0);
6449 
6450 		/*
6451 		 * If STRHUP or STREOF, return 0 length control and data.
6452 		 * If resid is 0, then a read(fd,buf,0) was done. Do not
6453 		 * sleep to satisfy this request because by default we have
6454 		 * zero bytes to return.
6455 		 */
6456 		if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6457 		    mdata->maxlen == 0)) {
6458 			mctl->len = mdata->len = 0;
6459 			*flagsp = 0;
6460 			mutex_exit(&stp->sd_lock);
6461 			return (0);
6462 		}
6463 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6464 			"strgetmsg calls strwaitq:%p, %p",
6465 			vp, uiop);
6466 		if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6467 		    &done)) != 0) || done) {
6468 			TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6469 				"strgetmsg error or done:%p, %p",
6470 				vp, uiop);
6471 			mutex_exit(&stp->sd_lock);
6472 			return (error);
6473 		}
6474 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6475 			"strgetmsg awakes:%p, %p", vp, uiop);
6476 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
6477 			mutex_exit(&stp->sd_lock);
6478 			if (error = straccess(stp, JCREAD))
6479 				return (error);
6480 			mutex_enter(&stp->sd_lock);
6481 		}
6482 		first = 0;
6483 	}
6484 	ASSERT(bp != NULL);
6485 	/*
6486 	 * Extract any mark information. If the message is not completely
6487 	 * consumed this information will be put in the mblk
6488 	 * that is putback.
6489 	 * If MSGMARKNEXT is set and the message is completely consumed
6490 	 * the STRATMARK flag will be set below. Likewise, if
6491 	 * MSGNOTMARKNEXT is set and the message is
6492 	 * completely consumed STRNOTATMARK will be set.
6493 	 */
6494 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6495 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6496 		(MSGMARKNEXT|MSGNOTMARKNEXT));
6497 	if (mark != 0 && bp == stp->sd_mark) {
6498 		mark |= _LASTMARK;
6499 		stp->sd_mark = NULL;
6500 	}
6501 	/*
6502 	 * keep track of the original message type and priority
6503 	 */
6504 	pri = bp->b_band;
6505 	type = bp->b_datap->db_type;
6506 	if (type == M_PASSFP) {
6507 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6508 			stp->sd_mark = bp;
6509 		bp->b_flag |= mark & ~_LASTMARK;
6510 		putback(stp, q, bp, pri);
6511 		qbackenable(q, pri);
6512 		mutex_exit(&stp->sd_lock);
6513 		return (EBADMSG);
6514 	}
6515 	ASSERT(type != M_SIG);
6516 
6517 	/*
6518 	 * Set this flag so strrput will not generate signals. Need to
6519 	 * make sure this flag is cleared before leaving this routine
6520 	 * else signals will stop being sent.
6521 	 */
6522 	stp->sd_flag |= STRGETINPROG;
6523 	mutex_exit(&stp->sd_lock);
6524 
6525 	if (STREAM_NEEDSERVICE(stp))
6526 		stream_runservice(stp);
6527 
6528 	/*
6529 	 * Set HIPRI flag if message is priority.
6530 	 */
6531 	if (type >= QPCTL)
6532 		flg = MSG_HIPRI;
6533 	else
6534 		flg = MSG_BAND;
6535 
6536 	/*
6537 	 * First process PROTO or PCPROTO blocks, if any.
6538 	 */
6539 	if (mctl->maxlen >= 0 && type != M_DATA) {
6540 		size_t	n, bcnt;
6541 		char	*ubuf;
6542 
6543 		bcnt = mctl->maxlen;
6544 		ubuf = mctl->buf;
6545 		while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6546 			if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6547 			    copyout(bp->b_rptr, ubuf, n)) {
6548 				error = EFAULT;
6549 				mutex_enter(&stp->sd_lock);
6550 				/*
6551 				 * clear stream head pri flag based on
6552 				 * first message type
6553 				 */
6554 				if (type >= QPCTL) {
6555 					ASSERT(type == M_PCPROTO);
6556 					stp->sd_flag &= ~STRPRI;
6557 				}
6558 				more = 0;
6559 				freemsg(bp);
6560 				goto getmout;
6561 			}
6562 			ubuf += n;
6563 			bp->b_rptr += n;
6564 			if (bp->b_rptr >= bp->b_wptr) {
6565 				nbp = bp;
6566 				bp = bp->b_cont;
6567 				freeb(nbp);
6568 			}
6569 			ASSERT(n <= bcnt);
6570 			bcnt -= n;
6571 			if (bcnt == 0)
6572 				break;
6573 		}
6574 		mctl->len = mctl->maxlen - bcnt;
6575 	} else
6576 		mctl->len = -1;
6577 
6578 	if (bp && bp->b_datap->db_type != M_DATA) {
6579 		/*
6580 		 * More PROTO blocks in msg.
6581 		 */
6582 		more |= MORECTL;
6583 		savemp = bp;
6584 		while (bp && bp->b_datap->db_type != M_DATA) {
6585 			savemptail = bp;
6586 			bp = bp->b_cont;
6587 		}
6588 		savemptail->b_cont = NULL;
6589 	}
6590 
6591 	/*
6592 	 * Now process DATA blocks, if any.
6593 	 */
6594 	if (mdata->maxlen >= 0 && bp) {
6595 		/*
6596 		 * struiocopyout will consume a potential zero-length
6597 		 * M_DATA even if uio_resid is zero.
6598 		 */
6599 		size_t oldresid = uiop->uio_resid;
6600 
6601 		bp = struiocopyout(bp, uiop, &error);
6602 		if (error != 0) {
6603 			mutex_enter(&stp->sd_lock);
6604 			/*
6605 			 * clear stream head hi pri flag based on
6606 			 * first message
6607 			 */
6608 			if (type >= QPCTL) {
6609 				ASSERT(type == M_PCPROTO);
6610 				stp->sd_flag &= ~STRPRI;
6611 			}
6612 			more = 0;
6613 			freemsg(savemp);
6614 			goto getmout;
6615 		}
6616 		/*
6617 		 * (pr == 1) indicates a partial read.
6618 		 */
6619 		if (oldresid > uiop->uio_resid)
6620 			pr = 1;
6621 		mdata->len = mdata->maxlen - uiop->uio_resid;
6622 	} else
6623 		mdata->len = -1;
6624 
6625 	if (bp) {			/* more data blocks in msg */
6626 		more |= MOREDATA;
6627 		if (savemp)
6628 			savemptail->b_cont = bp;
6629 		else
6630 			savemp = bp;
6631 	}
6632 
6633 	mutex_enter(&stp->sd_lock);
6634 	if (savemp) {
6635 		if (pr && (savemp->b_datap->db_type == M_DATA) &&
6636 		    msgnodata(savemp)) {
6637 			/*
6638 			 * Avoid queuing a zero-length tail part of
6639 			 * a message. pr=1 indicates that we read some of
6640 			 * the message.
6641 			 */
6642 			freemsg(savemp);
6643 			more &= ~MOREDATA;
6644 			/*
6645 			 * clear stream head hi pri flag based on
6646 			 * first message
6647 			 */
6648 			if (type >= QPCTL) {
6649 				ASSERT(type == M_PCPROTO);
6650 				stp->sd_flag &= ~STRPRI;
6651 			}
6652 		} else {
6653 			savemp->b_band = pri;
6654 			/*
6655 			 * If the first message was HIPRI and the one we're
6656 			 * putting back isn't, then clear STRPRI, otherwise
6657 			 * set STRPRI again.  Note that we must set STRPRI
6658 			 * again since the flush logic in strrput_nondata()
6659 			 * may have cleared it while we had sd_lock dropped.
6660 			 */
6661 			if (type >= QPCTL) {
6662 				ASSERT(type == M_PCPROTO);
6663 				if (queclass(savemp) < QPCTL)
6664 					stp->sd_flag &= ~STRPRI;
6665 				else
6666 					stp->sd_flag |= STRPRI;
6667 			} else if (queclass(savemp) >= QPCTL) {
6668 				/*
6669 				 * The first message was not a HIPRI message,
6670 				 * but the one we are about to putback is.
6671 				 * For simplicitly, we do not allow for HIPRI
6672 				 * messages to be embedded in the message
6673 				 * body, so just force it to same type as
6674 				 * first message.
6675 				 */
6676 				ASSERT(type == M_DATA || type == M_PROTO);
6677 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6678 				savemp->b_datap->db_type = type;
6679 			}
6680 			if (mark != 0) {
6681 				savemp->b_flag |= mark & ~_LASTMARK;
6682 				if ((mark & _LASTMARK) &&
6683 				    (stp->sd_mark == NULL)) {
6684 					/*
6685 					 * If another marked message arrived
6686 					 * while sd_lock was not held sd_mark
6687 					 * would be non-NULL.
6688 					 */
6689 					stp->sd_mark = savemp;
6690 				}
6691 			}
6692 			putback(stp, q, savemp, pri);
6693 		}
6694 	} else {
6695 		/*
6696 		 * The complete message was consumed.
6697 		 *
6698 		 * If another M_PCPROTO arrived while sd_lock was not held
6699 		 * it would have been discarded since STRPRI was still set.
6700 		 *
6701 		 * Move the MSG*MARKNEXT information
6702 		 * to the stream head just in case
6703 		 * the read queue becomes empty.
6704 		 * clear stream head hi pri flag based on
6705 		 * first message
6706 		 *
6707 		 * If the stream head was at the mark
6708 		 * (STRATMARK) before we dropped sd_lock above
6709 		 * and some data was consumed then we have
6710 		 * moved past the mark thus STRATMARK is
6711 		 * cleared. However, if a message arrived in
6712 		 * strrput during the copyout above causing
6713 		 * STRATMARK to be set we can not clear that
6714 		 * flag.
6715 		 */
6716 		if (type >= QPCTL) {
6717 			ASSERT(type == M_PCPROTO);
6718 			stp->sd_flag &= ~STRPRI;
6719 		}
6720 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
6721 			if (mark & MSGMARKNEXT) {
6722 				stp->sd_flag &= ~STRNOTATMARK;
6723 				stp->sd_flag |= STRATMARK;
6724 			} else if (mark & MSGNOTMARKNEXT) {
6725 				stp->sd_flag &= ~STRATMARK;
6726 				stp->sd_flag |= STRNOTATMARK;
6727 			} else {
6728 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
6729 			}
6730 		} else if (pr && (old_sd_flag & STRATMARK)) {
6731 			stp->sd_flag &= ~STRATMARK;
6732 		}
6733 	}
6734 
6735 	*flagsp = flg;
6736 	*prip = pri;
6737 
6738 	/*
6739 	 * Getmsg cleanup processing - if the state of the queue has changed
6740 	 * some signals may need to be sent and/or poll awakened.
6741 	 */
6742 getmout:
6743 	qbackenable(q, pri);
6744 
6745 	/*
6746 	 * We dropped the stream head lock above. Send all M_SIG messages
6747 	 * before processing stream head for SIGPOLL messages.
6748 	 */
6749 	ASSERT(MUTEX_HELD(&stp->sd_lock));
6750 	while ((bp = q->q_first) != NULL &&
6751 	    (bp->b_datap->db_type == M_SIG)) {
6752 		/*
6753 		 * sd_lock is held so the content of the read queue can not
6754 		 * change.
6755 		 */
6756 		bp = getq(q);
6757 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
6758 
6759 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
6760 		mutex_exit(&stp->sd_lock);
6761 		freemsg(bp);
6762 		if (STREAM_NEEDSERVICE(stp))
6763 			stream_runservice(stp);
6764 		mutex_enter(&stp->sd_lock);
6765 	}
6766 
6767 	/*
6768 	 * stream head cannot change while we make the determination
6769 	 * whether or not to send a signal. Drop the flag to allow strrput
6770 	 * to send firstmsgsigs again.
6771 	 */
6772 	stp->sd_flag &= ~STRGETINPROG;
6773 
6774 	/*
6775 	 * If the type of message at the front of the queue changed
6776 	 * due to the receive the appropriate signals and pollwakeup events
6777 	 * are generated. The type of changes are:
6778 	 *	Processed a hipri message, q_first is not hipri.
6779 	 *	Processed a band X message, and q_first is band Y.
6780 	 * The generated signals and pollwakeups are identical to what
6781 	 * strrput() generates should the message that is now on q_first
6782 	 * arrive to an empty read queue.
6783 	 *
6784 	 * Note: only strrput will send a signal for a hipri message.
6785 	 */
6786 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
6787 		strsigset_t signals = 0;
6788 		strpollset_t pollwakeups = 0;
6789 
6790 		if (flg & MSG_HIPRI) {
6791 			/*
6792 			 * Removed a hipri message. Regular data at
6793 			 * the front of  the queue.
6794 			 */
6795 			if (bp->b_band == 0) {
6796 				signals = S_INPUT | S_RDNORM;
6797 				pollwakeups = POLLIN | POLLRDNORM;
6798 			} else {
6799 				signals = S_INPUT | S_RDBAND;
6800 				pollwakeups = POLLIN | POLLRDBAND;
6801 			}
6802 		} else if (pri != bp->b_band) {
6803 			/*
6804 			 * The band is different for the new q_first.
6805 			 */
6806 			if (bp->b_band == 0) {
6807 				signals = S_RDNORM;
6808 				pollwakeups = POLLIN | POLLRDNORM;
6809 			} else {
6810 				signals = S_RDBAND;
6811 				pollwakeups = POLLIN | POLLRDBAND;
6812 			}
6813 		}
6814 
6815 		if (pollwakeups != 0) {
6816 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
6817 				if (!(stp->sd_rput_opt & SR_POLLIN))
6818 					goto no_pollwake;
6819 				stp->sd_rput_opt &= ~SR_POLLIN;
6820 			}
6821 			mutex_exit(&stp->sd_lock);
6822 			pollwakeup(&stp->sd_pollist, pollwakeups);
6823 			mutex_enter(&stp->sd_lock);
6824 		}
6825 no_pollwake:
6826 
6827 		if (stp->sd_sigflags & signals)
6828 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
6829 	}
6830 	mutex_exit(&stp->sd_lock);
6831 
6832 	rvp->r_val1 = more;
6833 	return (error);
6834 #undef	_LASTMARK
6835 }
6836 
6837 /*
6838  * Get the next message from the read queue.  If the message is
6839  * priority, STRPRI will have been set by strrput().  This flag
6840  * should be reset only when the entire message at the front of the
6841  * queue as been consumed.
6842  *
6843  * If uiop is NULL all data is returned in mctlp.
6844  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
6845  * not enabled.
6846  * The timeout parameter is in milliseconds; -1 for infinity.
6847  * This routine handles the consolidation private flags:
6848  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
6849  *	MSG_DELAYERROR	Defer the error check until the queue is empty.
6850  *	MSG_HOLDSIG	Hold signals while waiting for data.
6851  *	MSG_IPEEK	Only peek at messages.
6852  *	MSG_DISCARDTAIL	Discard the tail M_DATA part of the message
6853  *			that doesn't fit.
6854  *	MSG_NOMARK	If the message is marked leave it on the queue.
6855  *
6856  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6857  */
6858 int
6859 kstrgetmsg(
6860 	struct vnode *vp,
6861 	mblk_t **mctlp,
6862 	struct uio *uiop,
6863 	unsigned char *prip,
6864 	int *flagsp,
6865 	clock_t timout,
6866 	rval_t *rvp)
6867 {
6868 	struct stdata *stp;
6869 	mblk_t *bp, *nbp;
6870 	mblk_t *savemp = NULL;
6871 	mblk_t *savemptail = NULL;
6872 	int flags;
6873 	uint_t old_sd_flag;
6874 	int flg;
6875 	int more = 0;
6876 	int error = 0;
6877 	char first = 1;
6878 	uint_t mark;		/* Contains MSG*MARK and _LASTMARK */
6879 #define	_LASTMARK	0x8000	/* Distinct from MSG*MARK */
6880 	unsigned char pri = 0;
6881 	queue_t *q;
6882 	int	pr = 0;			/* Partial read successful */
6883 	unsigned char type;
6884 
6885 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
6886 		"kstrgetmsg:%p", vp);
6887 
6888 	ASSERT(vp->v_stream);
6889 	stp = vp->v_stream;
6890 	rvp->r_val1 = 0;
6891 
6892 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
6893 		if (error = straccess(stp, JCREAD))
6894 			return (error);
6895 
6896 	flags = *flagsp;
6897 	/* Fast check of flags before acquiring the lock */
6898 	if (stp->sd_flag & (STRDERR|STPLEX)) {
6899 		if ((stp->sd_flag & STPLEX) ||
6900 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
6901 			mutex_enter(&stp->sd_lock);
6902 			error = strgeterr(stp, STRDERR|STPLEX,
6903 					(flags & MSG_IPEEK));
6904 			mutex_exit(&stp->sd_lock);
6905 			if (error != 0)
6906 				return (error);
6907 		}
6908 	}
6909 
6910 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
6911 	case MSG_HIPRI:
6912 		if (*prip != 0)
6913 			return (EINVAL);
6914 		break;
6915 
6916 	case MSG_ANY:
6917 	case MSG_BAND:
6918 		break;
6919 
6920 	default:
6921 		return (EINVAL);
6922 	}
6923 
6924 retry:
6925 	q = _RD(stp->sd_wrq);
6926 	mutex_enter(&stp->sd_lock);
6927 	old_sd_flag = stp->sd_flag;
6928 	mark = 0;
6929 	for (;;) {
6930 		int done = 0;
6931 		int waitflag;
6932 		int fmode;
6933 		mblk_t *q_first = q->q_first;
6934 
6935 		/*
6936 		 * This section of the code operates just like the code
6937 		 * in strgetmsg().  There is a comment there about what
6938 		 * is going on here.
6939 		 */
6940 		if (!(flags & (MSG_HIPRI|MSG_BAND))) {
6941 			/* Asking for normal, band0 data */
6942 			bp = strget(stp, q, uiop, first, &error);
6943 			ASSERT(MUTEX_HELD(&stp->sd_lock));
6944 			if (bp != NULL) {
6945 				if (bp->b_datap->db_type == M_SIG) {
6946 					strsignal_nolock(stp, *bp->b_rptr,
6947 					    (int32_t)bp->b_band);
6948 					continue;
6949 				} else {
6950 					break;
6951 				}
6952 			}
6953 			if (error != 0) {
6954 				goto getmout;
6955 			}
6956 		/*
6957 		 * We can't depend on the value of STRPRI here because
6958 		 * the stream head may be in transit. Therefore, we
6959 		 * must look at the type of the first message to
6960 		 * determine if a high priority messages is waiting
6961 		 */
6962 		} else if ((flags & MSG_HIPRI) && q_first != NULL &&
6963 			    q_first->b_datap->db_type >= QPCTL &&
6964 			    (bp = getq_noenab(q)) != NULL) {
6965 			ASSERT(bp->b_datap->db_type >= QPCTL);
6966 			break;
6967 		} else if ((flags & MSG_BAND) && q_first != NULL &&
6968 			    ((q_first->b_band >= *prip) ||
6969 			    q_first->b_datap->db_type >= QPCTL) &&
6970 			    (bp = getq_noenab(q)) != NULL) {
6971 			/*
6972 			 * Asked for at least band "prip" and got either at
6973 			 * least that band or a hipri message.
6974 			 */
6975 			ASSERT(bp->b_band >= *prip ||
6976 				bp->b_datap->db_type >= QPCTL);
6977 			if (bp->b_datap->db_type == M_SIG) {
6978 				strsignal_nolock(stp, *bp->b_rptr,
6979 				    (int32_t)bp->b_band);
6980 				continue;
6981 			} else {
6982 				break;
6983 			}
6984 		}
6985 
6986 		/* No data. Time to sleep? */
6987 		qbackenable(q, 0);
6988 
6989 		/*
6990 		 * Delayed error notification?
6991 		 */
6992 		if ((stp->sd_flag & (STRDERR|STPLEX)) &&
6993 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
6994 			error = strgeterr(stp, STRDERR|STPLEX,
6995 					(flags & MSG_IPEEK));
6996 			if (error != 0) {
6997 				mutex_exit(&stp->sd_lock);
6998 				return (error);
6999 			}
7000 		}
7001 
7002 		/*
7003 		 * If STRHUP or STREOF, return 0 length control and data.
7004 		 * If a read(fd,buf,0) has been done, do not sleep, just
7005 		 * return.
7006 		 *
7007 		 * If mctlp == NULL and uiop == NULL, then the code will
7008 		 * do the strwaitq. This is an understood way of saying
7009 		 * sleep "polling" until a message is received.
7010 		 */
7011 		if ((stp->sd_flag & (STRHUP|STREOF)) ||
7012 		    (uiop != NULL && uiop->uio_resid == 0)) {
7013 			if (mctlp != NULL)
7014 				*mctlp = NULL;
7015 			*flagsp = 0;
7016 			mutex_exit(&stp->sd_lock);
7017 			return (0);
7018 		}
7019 
7020 		waitflag = GETWAIT;
7021 		if (flags &
7022 		    (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
7023 			if (flags & MSG_HOLDSIG)
7024 				waitflag |= STR_NOSIG;
7025 			if (flags & MSG_IGNERROR)
7026 				waitflag |= STR_NOERROR;
7027 			if (flags & MSG_IPEEK)
7028 				waitflag |= STR_PEEK;
7029 			if (flags & MSG_DELAYERROR)
7030 				waitflag |= STR_DELAYERR;
7031 		}
7032 		if (uiop != NULL)
7033 			fmode = uiop->uio_fmode;
7034 		else
7035 			fmode = 0;
7036 
7037 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7038 			"kstrgetmsg calls strwaitq:%p, %p",
7039 			vp, uiop);
7040 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7041 		    fmode, timout, &done)) != 0) || done) {
7042 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7043 				"kstrgetmsg error or done:%p, %p",
7044 				vp, uiop);
7045 			mutex_exit(&stp->sd_lock);
7046 			return (error);
7047 		}
7048 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7049 			"kstrgetmsg awakes:%p, %p", vp, uiop);
7050 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7051 			mutex_exit(&stp->sd_lock);
7052 			if (error = straccess(stp, JCREAD))
7053 				return (error);
7054 			mutex_enter(&stp->sd_lock);
7055 		}
7056 		first = 0;
7057 	}
7058 	ASSERT(bp != NULL);
7059 	/*
7060 	 * Extract any mark information. If the message is not completely
7061 	 * consumed this information will be put in the mblk
7062 	 * that is putback.
7063 	 * If MSGMARKNEXT is set and the message is completely consumed
7064 	 * the STRATMARK flag will be set below. Likewise, if
7065 	 * MSGNOTMARKNEXT is set and the message is
7066 	 * completely consumed STRNOTATMARK will be set.
7067 	 */
7068 	mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7069 	ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7070 		(MSGMARKNEXT|MSGNOTMARKNEXT));
7071 	pri = bp->b_band;
7072 	if (mark != 0) {
7073 		/*
7074 		 * If the caller doesn't want the mark return.
7075 		 * Used to implement MSG_WAITALL in sockets.
7076 		 */
7077 		if (flags & MSG_NOMARK) {
7078 			putback(stp, q, bp, pri);
7079 			qbackenable(q, pri);
7080 			mutex_exit(&stp->sd_lock);
7081 			return (EWOULDBLOCK);
7082 		}
7083 		if (bp == stp->sd_mark) {
7084 			mark |= _LASTMARK;
7085 			stp->sd_mark = NULL;
7086 		}
7087 	}
7088 
7089 	/*
7090 	 * keep track of the first message type
7091 	 */
7092 	type = bp->b_datap->db_type;
7093 
7094 	if (bp->b_datap->db_type == M_PASSFP) {
7095 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7096 			stp->sd_mark = bp;
7097 		bp->b_flag |= mark & ~_LASTMARK;
7098 		putback(stp, q, bp, pri);
7099 		qbackenable(q, pri);
7100 		mutex_exit(&stp->sd_lock);
7101 		return (EBADMSG);
7102 	}
7103 	ASSERT(type != M_SIG);
7104 
7105 	if (flags & MSG_IPEEK) {
7106 		/*
7107 		 * Clear any struioflag - we do the uiomove over again
7108 		 * when peeking since it simplifies the code.
7109 		 *
7110 		 * Dup the message and put the original back on the queue.
7111 		 * If dupmsg() fails, try again with copymsg() to see if
7112 		 * there is indeed a shortage of memory.  dupmsg() may fail
7113 		 * if db_ref in any of the messages reaches its limit.
7114 		 */
7115 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7116 			/*
7117 			 * Restore the state of the stream head since we
7118 			 * need to drop sd_lock (strwaitbuf is sleeping).
7119 			 */
7120 			size_t size = msgdsize(bp);
7121 
7122 			if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7123 				stp->sd_mark = bp;
7124 			bp->b_flag |= mark & ~_LASTMARK;
7125 			putback(stp, q, bp, pri);
7126 			mutex_exit(&stp->sd_lock);
7127 			error = strwaitbuf(size, BPRI_HI);
7128 			if (error) {
7129 				/*
7130 				 * There is no net change to the queue thus
7131 				 * no need to qbackenable.
7132 				 */
7133 				return (error);
7134 			}
7135 			goto retry;
7136 		}
7137 
7138 		if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7139 			stp->sd_mark = bp;
7140 		bp->b_flag |= mark & ~_LASTMARK;
7141 		putback(stp, q, bp, pri);
7142 		bp = nbp;
7143 	}
7144 
7145 	/*
7146 	 * Set this flag so strrput will not generate signals. Need to
7147 	 * make sure this flag is cleared before leaving this routine
7148 	 * else signals will stop being sent.
7149 	 */
7150 	stp->sd_flag |= STRGETINPROG;
7151 	mutex_exit(&stp->sd_lock);
7152 
7153 	if (STREAM_NEEDSERVICE(stp))
7154 		stream_runservice(stp);
7155 
7156 	/*
7157 	 * Set HIPRI flag if message is priority.
7158 	 */
7159 	if (type >= QPCTL)
7160 		flg = MSG_HIPRI;
7161 	else
7162 		flg = MSG_BAND;
7163 
7164 	/*
7165 	 * First process PROTO or PCPROTO blocks, if any.
7166 	 */
7167 	if (mctlp != NULL && type != M_DATA) {
7168 		mblk_t *nbp;
7169 
7170 		*mctlp = bp;
7171 		while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7172 			bp = bp->b_cont;
7173 		nbp = bp->b_cont;
7174 		bp->b_cont = NULL;
7175 		bp = nbp;
7176 	}
7177 
7178 	if (bp && bp->b_datap->db_type != M_DATA) {
7179 		/*
7180 		 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7181 		 */
7182 		more |= MORECTL;
7183 		savemp = bp;
7184 		while (bp && bp->b_datap->db_type != M_DATA) {
7185 			savemptail = bp;
7186 			bp = bp->b_cont;
7187 		}
7188 		savemptail->b_cont = NULL;
7189 	}
7190 
7191 	/*
7192 	 * Now process DATA blocks, if any.
7193 	 */
7194 	if (uiop == NULL) {
7195 		/* Append data to tail of mctlp */
7196 		if (mctlp != NULL) {
7197 			mblk_t **mpp = mctlp;
7198 
7199 			while (*mpp != NULL)
7200 				mpp = &((*mpp)->b_cont);
7201 			*mpp = bp;
7202 			bp = NULL;
7203 		}
7204 	} else if (uiop->uio_resid >= 0 && bp) {
7205 		size_t oldresid = uiop->uio_resid;
7206 
7207 		/*
7208 		 * If a streams message is likely to consist
7209 		 * of many small mblks, it is pulled up into
7210 		 * one continuous chunk of memory.
7211 		 * see longer comment at top of page
7212 		 * by mblk_pull_len declaration.
7213 		 */
7214 
7215 		if (MBLKL(bp) < mblk_pull_len) {
7216 			(void) pullupmsg(bp, -1);
7217 		}
7218 
7219 		bp = struiocopyout(bp, uiop, &error);
7220 		if (error != 0) {
7221 			if (mctlp != NULL) {
7222 				freemsg(*mctlp);
7223 				*mctlp = NULL;
7224 			} else
7225 				freemsg(savemp);
7226 			mutex_enter(&stp->sd_lock);
7227 			/*
7228 			 * clear stream head hi pri flag based on
7229 			 * first message
7230 			 */
7231 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7232 				ASSERT(type == M_PCPROTO);
7233 				stp->sd_flag &= ~STRPRI;
7234 			}
7235 			more = 0;
7236 			goto getmout;
7237 		}
7238 		/*
7239 		 * (pr == 1) indicates a partial read.
7240 		 */
7241 		if (oldresid > uiop->uio_resid)
7242 			pr = 1;
7243 	}
7244 
7245 	if (bp) {			/* more data blocks in msg */
7246 		more |= MOREDATA;
7247 		if (savemp)
7248 			savemptail->b_cont = bp;
7249 		else
7250 			savemp = bp;
7251 	}
7252 
7253 	mutex_enter(&stp->sd_lock);
7254 	if (savemp) {
7255 		if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7256 			/*
7257 			 * When MSG_DISCARDTAIL is set or
7258 			 * when peeking discard any tail. When peeking this
7259 			 * is the tail of the dup that was copied out - the
7260 			 * message has already been putback on the queue.
7261 			 * Return MOREDATA to the caller even though the data
7262 			 * is discarded. This is used by sockets (to
7263 			 * set MSG_TRUNC).
7264 			 */
7265 			freemsg(savemp);
7266 			if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7267 				ASSERT(type == M_PCPROTO);
7268 				stp->sd_flag &= ~STRPRI;
7269 			}
7270 		} else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7271 			    msgnodata(savemp)) {
7272 			/*
7273 			 * Avoid queuing a zero-length tail part of
7274 			 * a message. pr=1 indicates that we read some of
7275 			 * the message.
7276 			 */
7277 			freemsg(savemp);
7278 			more &= ~MOREDATA;
7279 			if (type >= QPCTL) {
7280 				ASSERT(type == M_PCPROTO);
7281 				stp->sd_flag &= ~STRPRI;
7282 			}
7283 		} else {
7284 			savemp->b_band = pri;
7285 			/*
7286 			 * If the first message was HIPRI and the one we're
7287 			 * putting back isn't, then clear STRPRI, otherwise
7288 			 * set STRPRI again.  Note that we must set STRPRI
7289 			 * again since the flush logic in strrput_nondata()
7290 			 * may have cleared it while we had sd_lock dropped.
7291 			 */
7292 			if (type >= QPCTL) {
7293 				ASSERT(type == M_PCPROTO);
7294 				if (queclass(savemp) < QPCTL)
7295 					stp->sd_flag &= ~STRPRI;
7296 				else
7297 					stp->sd_flag |= STRPRI;
7298 			} else if (queclass(savemp) >= QPCTL) {
7299 				/*
7300 				 * The first message was not a HIPRI message,
7301 				 * but the one we are about to putback is.
7302 				 * For simplicitly, we do not allow for HIPRI
7303 				 * messages to be embedded in the message
7304 				 * body, so just force it to same type as
7305 				 * first message.
7306 				 */
7307 				ASSERT(type == M_DATA || type == M_PROTO);
7308 				ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7309 				savemp->b_datap->db_type = type;
7310 			}
7311 			if (mark != 0) {
7312 				if ((mark & _LASTMARK) &&
7313 				    (stp->sd_mark == NULL)) {
7314 					/*
7315 					 * If another marked message arrived
7316 					 * while sd_lock was not held sd_mark
7317 					 * would be non-NULL.
7318 					 */
7319 					stp->sd_mark = savemp;
7320 				}
7321 				savemp->b_flag |= mark & ~_LASTMARK;
7322 			}
7323 			putback(stp, q, savemp, pri);
7324 		}
7325 	} else if (!(flags & MSG_IPEEK)) {
7326 		/*
7327 		 * The complete message was consumed.
7328 		 *
7329 		 * If another M_PCPROTO arrived while sd_lock was not held
7330 		 * it would have been discarded since STRPRI was still set.
7331 		 *
7332 		 * Move the MSG*MARKNEXT information
7333 		 * to the stream head just in case
7334 		 * the read queue becomes empty.
7335 		 * clear stream head hi pri flag based on
7336 		 * first message
7337 		 *
7338 		 * If the stream head was at the mark
7339 		 * (STRATMARK) before we dropped sd_lock above
7340 		 * and some data was consumed then we have
7341 		 * moved past the mark thus STRATMARK is
7342 		 * cleared. However, if a message arrived in
7343 		 * strrput during the copyout above causing
7344 		 * STRATMARK to be set we can not clear that
7345 		 * flag.
7346 		 * XXX A "perimeter" would help by single-threading strrput,
7347 		 * strread, strgetmsg and kstrgetmsg.
7348 		 */
7349 		if (type >= QPCTL) {
7350 			ASSERT(type == M_PCPROTO);
7351 			stp->sd_flag &= ~STRPRI;
7352 		}
7353 		if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7354 			if (mark & MSGMARKNEXT) {
7355 				stp->sd_flag &= ~STRNOTATMARK;
7356 				stp->sd_flag |= STRATMARK;
7357 			} else if (mark & MSGNOTMARKNEXT) {
7358 				stp->sd_flag &= ~STRATMARK;
7359 				stp->sd_flag |= STRNOTATMARK;
7360 			} else {
7361 				stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7362 			}
7363 		} else if (pr && (old_sd_flag & STRATMARK)) {
7364 			stp->sd_flag &= ~STRATMARK;
7365 		}
7366 	}
7367 
7368 	*flagsp = flg;
7369 	*prip = pri;
7370 
7371 	/*
7372 	 * Getmsg cleanup processing - if the state of the queue has changed
7373 	 * some signals may need to be sent and/or poll awakened.
7374 	 */
7375 getmout:
7376 	qbackenable(q, pri);
7377 
7378 	/*
7379 	 * We dropped the stream head lock above. Send all M_SIG messages
7380 	 * before processing stream head for SIGPOLL messages.
7381 	 */
7382 	ASSERT(MUTEX_HELD(&stp->sd_lock));
7383 	while ((bp = q->q_first) != NULL &&
7384 	    (bp->b_datap->db_type == M_SIG)) {
7385 		/*
7386 		 * sd_lock is held so the content of the read queue can not
7387 		 * change.
7388 		 */
7389 		bp = getq(q);
7390 		ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7391 
7392 		strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band);
7393 		mutex_exit(&stp->sd_lock);
7394 		freemsg(bp);
7395 		if (STREAM_NEEDSERVICE(stp))
7396 			stream_runservice(stp);
7397 		mutex_enter(&stp->sd_lock);
7398 	}
7399 
7400 	/*
7401 	 * stream head cannot change while we make the determination
7402 	 * whether or not to send a signal. Drop the flag to allow strrput
7403 	 * to send firstmsgsigs again.
7404 	 */
7405 	stp->sd_flag &= ~STRGETINPROG;
7406 
7407 	/*
7408 	 * If the type of message at the front of the queue changed
7409 	 * due to the receive the appropriate signals and pollwakeup events
7410 	 * are generated. The type of changes are:
7411 	 *	Processed a hipri message, q_first is not hipri.
7412 	 *	Processed a band X message, and q_first is band Y.
7413 	 * The generated signals and pollwakeups are identical to what
7414 	 * strrput() generates should the message that is now on q_first
7415 	 * arrive to an empty read queue.
7416 	 *
7417 	 * Note: only strrput will send a signal for a hipri message.
7418 	 */
7419 	if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7420 		strsigset_t signals = 0;
7421 		strpollset_t pollwakeups = 0;
7422 
7423 		if (flg & MSG_HIPRI) {
7424 			/*
7425 			 * Removed a hipri message. Regular data at
7426 			 * the front of  the queue.
7427 			 */
7428 			if (bp->b_band == 0) {
7429 				signals = S_INPUT | S_RDNORM;
7430 				pollwakeups = POLLIN | POLLRDNORM;
7431 			} else {
7432 				signals = S_INPUT | S_RDBAND;
7433 				pollwakeups = POLLIN | POLLRDBAND;
7434 			}
7435 		} else if (pri != bp->b_band) {
7436 			/*
7437 			 * The band is different for the new q_first.
7438 			 */
7439 			if (bp->b_band == 0) {
7440 				signals = S_RDNORM;
7441 				pollwakeups = POLLIN | POLLRDNORM;
7442 			} else {
7443 				signals = S_RDBAND;
7444 				pollwakeups = POLLIN | POLLRDBAND;
7445 			}
7446 		}
7447 
7448 		if (pollwakeups != 0) {
7449 			if (pollwakeups == (POLLIN | POLLRDNORM)) {
7450 				if (!(stp->sd_rput_opt & SR_POLLIN))
7451 					goto no_pollwake;
7452 				stp->sd_rput_opt &= ~SR_POLLIN;
7453 			}
7454 			mutex_exit(&stp->sd_lock);
7455 			pollwakeup(&stp->sd_pollist, pollwakeups);
7456 			mutex_enter(&stp->sd_lock);
7457 		}
7458 no_pollwake:
7459 
7460 		if (stp->sd_sigflags & signals)
7461 			strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7462 	}
7463 	mutex_exit(&stp->sd_lock);
7464 
7465 	rvp->r_val1 = more;
7466 	return (error);
7467 #undef	_LASTMARK
7468 }
7469 
7470 /*
7471  * Put a message downstream.
7472  *
7473  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7474  */
7475 int
7476 strputmsg(
7477 	struct vnode *vp,
7478 	struct strbuf *mctl,
7479 	struct strbuf *mdata,
7480 	unsigned char pri,
7481 	int flag,
7482 	int fmode)
7483 {
7484 	struct stdata *stp;
7485 	queue_t *wqp;
7486 	mblk_t *mp;
7487 	ssize_t msgsize;
7488 	ssize_t rmin, rmax;
7489 	int error;
7490 	struct uio uios;
7491 	struct uio *uiop = &uios;
7492 	struct iovec iovs;
7493 	int xpg4 = 0;
7494 
7495 	ASSERT(vp->v_stream);
7496 	stp = vp->v_stream;
7497 	wqp = stp->sd_wrq;
7498 
7499 	/*
7500 	 * If it is an XPG4 application, we need to send
7501 	 * SIGPIPE below
7502 	 */
7503 
7504 	xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7505 	flag &= ~MSG_XPG4;
7506 
7507 #ifdef C2_AUDIT
7508 	if (audit_active)
7509 		audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7510 #endif
7511 
7512 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7513 		if (error = straccess(stp, JCWRITE))
7514 			return (error);
7515 
7516 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7517 		mutex_enter(&stp->sd_lock);
7518 		error = strwriteable(stp, B_FALSE, xpg4);
7519 		mutex_exit(&stp->sd_lock);
7520 		if (error != 0)
7521 			return (error);
7522 	}
7523 
7524 	/*
7525 	 * Check for legal flag value.
7526 	 */
7527 	switch (flag) {
7528 	case MSG_HIPRI:
7529 		if ((mctl->len < 0) || (pri != 0))
7530 			return (EINVAL);
7531 		break;
7532 	case MSG_BAND:
7533 		break;
7534 
7535 	default:
7536 		return (EINVAL);
7537 	}
7538 
7539 	TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7540 		"strputmsg in:stp %p", stp);
7541 
7542 	/* get these values from those cached in the stream head */
7543 	rmin = stp->sd_qn_minpsz;
7544 	rmax = stp->sd_qn_maxpsz;
7545 
7546 	/*
7547 	 * Make sure ctl and data sizes together fall within the
7548 	 * limits of the max and min receive packet sizes and do
7549 	 * not exceed system limit.
7550 	 */
7551 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7552 	if (rmax == 0) {
7553 		return (ERANGE);
7554 	}
7555 	/*
7556 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7557 	 * Needed to prevent partial failures in the strmakedata loop.
7558 	 */
7559 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7560 		rmax = stp->sd_maxblk;
7561 
7562 	if ((msgsize = mdata->len) < 0) {
7563 		msgsize = 0;
7564 		rmin = 0;	/* no range check for NULL data part */
7565 	}
7566 	if ((msgsize < rmin) ||
7567 	    ((msgsize > rmax) && (rmax != INFPSZ)) ||
7568 	    (mctl->len > strctlsz)) {
7569 		return (ERANGE);
7570 	}
7571 
7572 	/*
7573 	 * Setup uio and iov for data part
7574 	 */
7575 	iovs.iov_base = mdata->buf;
7576 	iovs.iov_len = msgsize;
7577 	uios.uio_iov = &iovs;
7578 	uios.uio_iovcnt = 1;
7579 	uios.uio_loffset = 0;
7580 	uios.uio_segflg = UIO_USERSPACE;
7581 	uios.uio_fmode = fmode;
7582 	uios.uio_extflg = UIO_COPY_DEFAULT;
7583 	uios.uio_resid = msgsize;
7584 	uios.uio_offset = 0;
7585 
7586 	/* Ignore flow control in strput for HIPRI */
7587 	if (flag & MSG_HIPRI)
7588 		flag |= MSG_IGNFLOW;
7589 
7590 	for (;;) {
7591 		int done = 0;
7592 
7593 		/*
7594 		 * strput will always free the ctl mblk - even when strput
7595 		 * fails.
7596 		 */
7597 		if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7598 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7599 				"strputmsg out:stp %p out %d error %d",
7600 				stp, 1, error);
7601 			return (error);
7602 		}
7603 		/*
7604 		 * Verify that the whole message can be transferred by
7605 		 * strput.
7606 		 */
7607 		ASSERT(stp->sd_maxblk == INFPSZ ||
7608 			stp->sd_maxblk >= mdata->len);
7609 
7610 		msgsize = mdata->len;
7611 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7612 		mdata->len = msgsize;
7613 
7614 		if (error == 0)
7615 			break;
7616 
7617 		if (error != EWOULDBLOCK)
7618 			goto out;
7619 
7620 		mutex_enter(&stp->sd_lock);
7621 		/*
7622 		 * Check for a missed wakeup.
7623 		 * Needed since strput did not hold sd_lock across
7624 		 * the canputnext.
7625 		 */
7626 		if (bcanputnext(wqp, pri)) {
7627 			/* Try again */
7628 			mutex_exit(&stp->sd_lock);
7629 			continue;
7630 		}
7631 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7632 			"strputmsg wait:stp %p waits pri %d", stp, pri);
7633 		if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7634 		    &done)) != 0) || done) {
7635 			mutex_exit(&stp->sd_lock);
7636 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7637 				"strputmsg out:q %p out %d error %d",
7638 				stp, 0, error);
7639 			return (error);
7640 		}
7641 		TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7642 			"strputmsg wake:stp %p wakes", stp);
7643 		mutex_exit(&stp->sd_lock);
7644 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
7645 			if (error = straccess(stp, JCWRITE))
7646 				return (error);
7647 	}
7648 out:
7649 	/*
7650 	 * For historic reasons, applications expect EAGAIN
7651 	 * when data mblk could not be allocated. so change
7652 	 * ENOMEM back to EAGAIN
7653 	 */
7654 	if (error == ENOMEM)
7655 		error = EAGAIN;
7656 	TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7657 		"strputmsg out:stp %p out %d error %d", stp, 2, error);
7658 	return (error);
7659 }
7660 
7661 /*
7662  * Put a message downstream.
7663  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7664  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7665  * and the fmode parameter.
7666  *
7667  * This routine handles the consolidation private flags:
7668  *	MSG_IGNERROR	Ignore any stream head error except STPLEX.
7669  *	MSG_HOLDSIG	Hold signals while waiting for data.
7670  *	MSG_IGNFLOW	Don't check streams flow control.
7671  *
7672  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7673  */
7674 int
7675 kstrputmsg(
7676 	struct vnode *vp,
7677 	mblk_t *mctl,
7678 	struct uio *uiop,
7679 	ssize_t msgsize,
7680 	unsigned char pri,
7681 	int flag,
7682 	int fmode)
7683 {
7684 	struct stdata *stp;
7685 	queue_t *wqp;
7686 	ssize_t rmin, rmax;
7687 	int error;
7688 
7689 	ASSERT(vp->v_stream);
7690 	stp = vp->v_stream;
7691 	wqp = stp->sd_wrq;
7692 #ifdef C2_AUDIT
7693 	if (audit_active)
7694 		audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
7695 #endif
7696 
7697 	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7698 		if (error = straccess(stp, JCWRITE)) {
7699 			freemsg(mctl);
7700 			return (error);
7701 		}
7702 	}
7703 
7704 	if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
7705 		if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7706 			mutex_enter(&stp->sd_lock);
7707 			error = strwriteable(stp, B_FALSE, B_TRUE);
7708 			mutex_exit(&stp->sd_lock);
7709 			if (error != 0) {
7710 				freemsg(mctl);
7711 				return (error);
7712 			}
7713 		}
7714 	}
7715 
7716 	/*
7717 	 * Check for legal flag value.
7718 	 */
7719 	switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
7720 	case MSG_HIPRI:
7721 		if ((mctl == NULL) || (pri != 0)) {
7722 			freemsg(mctl);
7723 			return (EINVAL);
7724 		}
7725 		break;
7726 	case MSG_BAND:
7727 		break;
7728 
7729 	default:
7730 		freemsg(mctl);
7731 		return (EINVAL);
7732 	}
7733 
7734 	TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
7735 		"kstrputmsg in:stp %p", stp);
7736 
7737 	/* get these values from those cached in the stream head */
7738 	rmin = stp->sd_qn_minpsz;
7739 	rmax = stp->sd_qn_maxpsz;
7740 
7741 	/*
7742 	 * Make sure ctl and data sizes together fall within the
7743 	 * limits of the max and min receive packet sizes and do
7744 	 * not exceed system limit.
7745 	 */
7746 	ASSERT((rmax >= 0) || (rmax == INFPSZ));
7747 	if (rmax == 0) {
7748 		freemsg(mctl);
7749 		return (ERANGE);
7750 	}
7751 	/*
7752 	 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7753 	 * Needed to prevent partial failures in the strmakedata loop.
7754 	 */
7755 	if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7756 		rmax = stp->sd_maxblk;
7757 
7758 	if (uiop == NULL) {
7759 		msgsize = -1;
7760 		rmin = -1;	/* no range check for NULL data part */
7761 	} else {
7762 		/* Use uio flags as well as the fmode parameter flags */
7763 		fmode |= uiop->uio_fmode;
7764 
7765 		if ((msgsize < rmin) ||
7766 		    ((msgsize > rmax) && (rmax != INFPSZ))) {
7767 			freemsg(mctl);
7768 			return (ERANGE);
7769 		}
7770 	}
7771 
7772 	/* Ignore flow control in strput for HIPRI */
7773 	if (flag & MSG_HIPRI)
7774 		flag |= MSG_IGNFLOW;
7775 
7776 	for (;;) {
7777 		int done = 0;
7778 		int waitflag;
7779 		mblk_t *mp;
7780 
7781 		/*
7782 		 * strput will always free the ctl mblk - even when strput
7783 		 * fails. If MSG_IGNFLOW is set then any error returned
7784 		 * will cause us to break the loop, so we don't need a copy
7785 		 * of the message. If MSG_IGNFLOW is not set, then we can
7786 		 * get hit by flow control and be forced to try again. In
7787 		 * this case we need to have a copy of the message. We
7788 		 * do this using copymsg since the message may get modified
7789 		 * by something below us.
7790 		 *
7791 		 * We've observed that many TPI providers do not check db_ref
7792 		 * on the control messages but blindly reuse them for the
7793 		 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
7794 		 * friendly to such providers than using dupmsg. Also, note
7795 		 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
7796 		 * Only data messages are subject to flow control, hence
7797 		 * subject to this copymsg.
7798 		 */
7799 		if (flag & MSG_IGNFLOW) {
7800 			mp = mctl;
7801 			mctl = NULL;
7802 		} else {
7803 			do {
7804 				/*
7805 				 * If a message has a free pointer, the message
7806 				 * must be dupmsg to maintain this pointer.
7807 				 * Code using this facility must be sure
7808 				 * that modules below will not change the
7809 				 * contents of the dblk without checking db_ref
7810 				 * first. If db_ref is > 1, then the module
7811 				 * needs to do a copymsg first. Otherwise,
7812 				 * the contents of the dblk may become
7813 				 * inconsistent because the freesmg/freeb below
7814 				 * may end up calling atomic_add_32_nv.
7815 				 * The atomic_add_32_nv in freeb (accessing
7816 				 * all of db_ref, db_type, db_flags, and
7817 				 * db_struioflag) does not prevent other threads
7818 				 * from concurrently trying to modify e.g.
7819 				 * db_type.
7820 				 */
7821 				if (mctl->b_datap->db_frtnp != NULL)
7822 					mp = dupmsg(mctl);
7823 				else
7824 					mp = copymsg(mctl);
7825 
7826 				if (mp != NULL || mctl == NULL)
7827 					break;
7828 
7829 				error = strwaitbuf(msgdsize(mctl), BPRI_MED);
7830 				if (error) {
7831 					freemsg(mctl);
7832 					return (error);
7833 				}
7834 			} while (mp == NULL);
7835 		}
7836 		/*
7837 		 * Verify that all of msgsize can be transferred by
7838 		 * strput.
7839 		 */
7840 		ASSERT(stp->sd_maxblk == INFPSZ ||
7841 			stp->sd_maxblk >= msgsize);
7842 		error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7843 		if (error == 0)
7844 			break;
7845 
7846 		if (error != EWOULDBLOCK)
7847 			goto out;
7848 
7849 		/*
7850 		 * IF MSG_IGNFLOW is set we should have broken out of loop
7851 		 * above.
7852 		 */
7853 		ASSERT(!(flag & MSG_IGNFLOW));
7854 		mutex_enter(&stp->sd_lock);
7855 		/*
7856 		 * Check for a missed wakeup.
7857 		 * Needed since strput did not hold sd_lock across
7858 		 * the canputnext.
7859 		 */
7860 		if (bcanputnext(wqp, pri)) {
7861 			/* Try again */
7862 			mutex_exit(&stp->sd_lock);
7863 			continue;
7864 		}
7865 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
7866 			"kstrputmsg wait:stp %p waits pri %d", stp, pri);
7867 
7868 		waitflag = WRITEWAIT;
7869 		if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
7870 			if (flag & MSG_HOLDSIG)
7871 				waitflag |= STR_NOSIG;
7872 			if (flag & MSG_IGNERROR)
7873 				waitflag |= STR_NOERROR;
7874 		}
7875 		if (((error = strwaitq(stp, waitflag,
7876 		    (ssize_t)0, fmode, -1, &done)) != 0) || done) {
7877 			mutex_exit(&stp->sd_lock);
7878 			TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7879 				"kstrputmsg out:stp %p out %d error %d",
7880 				stp, 0, error);
7881 			freemsg(mctl);
7882 			return (error);
7883 		}
7884 		TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
7885 			"kstrputmsg wake:stp %p wakes", stp);
7886 		mutex_exit(&stp->sd_lock);
7887 		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
7888 			if (error = straccess(stp, JCWRITE)) {
7889 				freemsg(mctl);
7890 				return (error);
7891 			}
7892 		}
7893 	}
7894 out:
7895 	freemsg(mctl);
7896 	/*
7897 	 * For historic reasons, applications expect EAGAIN
7898 	 * when data mblk could not be allocated. so change
7899 	 * ENOMEM back to EAGAIN
7900 	 */
7901 	if (error == ENOMEM)
7902 		error = EAGAIN;
7903 	TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
7904 		"kstrputmsg out:stp %p out %d error %d", stp, 2, error);
7905 	return (error);
7906 }
7907 
7908 /*
7909  * Determines whether the necessary conditions are set on a stream
7910  * for it to be readable, writeable, or have exceptions.
7911  *
7912  * strpoll handles the consolidation private events:
7913  *	POLLNOERR	Do not return POLLERR even if there are stream
7914  *			head errors.
7915  *			Used by sockfs.
7916  *	POLLRDDATA	Do not return POLLIN unless at least one message on
7917  *			the queue contains one or more M_DATA mblks. Thus
7918  *			when this flag is set a queue with only
7919  *			M_PROTO/M_PCPROTO mblks does not return POLLIN.
7920  *			Used by sockfs to ignore T_EXDATA_IND messages.
7921  *
7922  * Note: POLLRDDATA assumes that synch streams only return messages with
7923  * an M_DATA attached (i.e. not messages consisting of only
7924  * an M_PROTO/M_PCPROTO part).
7925  */
7926 int
7927 strpoll(
7928 	struct stdata *stp,
7929 	short events_arg,
7930 	int anyyet,
7931 	short *reventsp,
7932 	struct pollhead **phpp)
7933 {
7934 	int events = (ushort_t)events_arg;
7935 	int retevents = 0;
7936 	mblk_t *mp;
7937 	qband_t *qbp;
7938 	long sd_flags = stp->sd_flag;
7939 	int headlocked = 0;
7940 
7941 	/*
7942 	 * For performance, a single 'if' tests for most possible edge
7943 	 * conditions in one shot
7944 	 */
7945 	if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
7946 		if (sd_flags & STPLEX) {
7947 			*reventsp = POLLNVAL;
7948 			return (EINVAL);
7949 		}
7950 		if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
7951 		    (sd_flags & STRDERR)) ||
7952 		    ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
7953 		    (sd_flags & STWRERR))) {
7954 			if (!(events & POLLNOERR)) {
7955 				*reventsp = POLLERR;
7956 				return (0);
7957 			}
7958 		}
7959 	}
7960 	if (sd_flags & STRHUP) {
7961 		retevents |= POLLHUP;
7962 	} else if (events & (POLLWRNORM | POLLWRBAND)) {
7963 		queue_t *tq;
7964 		queue_t	*qp = stp->sd_wrq;
7965 
7966 		claimstr(qp);
7967 		/* Find next module forward that has a service procedure */
7968 		tq = qp->q_next->q_nfsrv;
7969 		ASSERT(tq != NULL);
7970 
7971 		polllock(&stp->sd_pollist, QLOCK(tq));
7972 		if (events & POLLWRNORM) {
7973 			queue_t *sqp;
7974 
7975 			if (tq->q_flag & QFULL)
7976 				/* ensure backq svc procedure runs */
7977 				tq->q_flag |= QWANTW;
7978 			else if ((sqp = stp->sd_struiowrq) != NULL) {
7979 				/* Check sync stream barrier write q */
7980 				mutex_exit(QLOCK(tq));
7981 				polllock(&stp->sd_pollist, QLOCK(sqp));
7982 				if (sqp->q_flag & QFULL)
7983 					/* ensure pollwakeup() is done */
7984 					sqp->q_flag |= QWANTWSYNC;
7985 				else
7986 					retevents |= POLLOUT;
7987 				/* More write events to process ??? */
7988 				if (! (events & POLLWRBAND)) {
7989 					mutex_exit(QLOCK(sqp));
7990 					releasestr(qp);
7991 					goto chkrd;
7992 				}
7993 				mutex_exit(QLOCK(sqp));
7994 				polllock(&stp->sd_pollist, QLOCK(tq));
7995 			} else
7996 				retevents |= POLLOUT;
7997 		}
7998 		if (events & POLLWRBAND) {
7999 			qbp = tq->q_bandp;
8000 			if (qbp) {
8001 				while (qbp) {
8002 					if (qbp->qb_flag & QB_FULL)
8003 						qbp->qb_flag |= QB_WANTW;
8004 					else
8005 						retevents |= POLLWRBAND;
8006 					qbp = qbp->qb_next;
8007 				}
8008 			} else {
8009 				retevents |= POLLWRBAND;
8010 			}
8011 		}
8012 		mutex_exit(QLOCK(tq));
8013 		releasestr(qp);
8014 	}
8015 chkrd:
8016 	if (sd_flags & STRPRI) {
8017 		retevents |= (events & POLLPRI);
8018 	} else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
8019 		queue_t	*qp = _RD(stp->sd_wrq);
8020 		int normevents = (events & (POLLIN | POLLRDNORM));
8021 
8022 		/*
8023 		 * Note: Need to do polllock() here since ps_lock may be
8024 		 * held. See bug 4191544.
8025 		 */
8026 		polllock(&stp->sd_pollist, &stp->sd_lock);
8027 		headlocked = 1;
8028 		mp = qp->q_first;
8029 		while (mp) {
8030 			/*
8031 			 * For POLLRDDATA we scan b_cont and b_next until we
8032 			 * find an M_DATA.
8033 			 */
8034 			if ((events & POLLRDDATA) &&
8035 			    mp->b_datap->db_type != M_DATA) {
8036 				mblk_t *nmp = mp->b_cont;
8037 
8038 				while (nmp != NULL &&
8039 				    nmp->b_datap->db_type != M_DATA)
8040 					nmp = nmp->b_cont;
8041 				if (nmp == NULL) {
8042 					mp = mp->b_next;
8043 					continue;
8044 				}
8045 			}
8046 			if (mp->b_band == 0)
8047 				retevents |= normevents;
8048 			else
8049 				retevents |= (events & (POLLIN | POLLRDBAND));
8050 			break;
8051 		}
8052 		if (! (retevents & normevents) &&
8053 		    (stp->sd_wakeq & RSLEEP)) {
8054 			/*
8055 			 * Sync stream barrier read queue has data.
8056 			 */
8057 			retevents |= normevents;
8058 		}
8059 		/* Treat eof as normal data */
8060 		if (sd_flags & STREOF)
8061 			retevents |= normevents;
8062 	}
8063 
8064 	*reventsp = (short)retevents;
8065 	if (retevents) {
8066 		if (headlocked)
8067 			mutex_exit(&stp->sd_lock);
8068 		return (0);
8069 	}
8070 
8071 	/*
8072 	 * If poll() has not found any events yet, set up event cell
8073 	 * to wake up the poll if a requested event occurs on this
8074 	 * stream.  Check for collisions with outstanding poll requests.
8075 	 */
8076 	if (!anyyet) {
8077 		*phpp = &stp->sd_pollist;
8078 		if (headlocked == 0) {
8079 			polllock(&stp->sd_pollist, &stp->sd_lock);
8080 			headlocked = 1;
8081 		}
8082 		stp->sd_rput_opt |= SR_POLLIN;
8083 	}
8084 	if (headlocked)
8085 		mutex_exit(&stp->sd_lock);
8086 	return (0);
8087 }
8088 
8089 /*
8090  * The purpose of putback() is to assure sleeping polls/reads
8091  * are awakened when there are no new messages arriving at the,
8092  * stream head, and a message is placed back on the read queue.
8093  *
8094  * sd_lock must be held when messages are placed back on stream
8095  * head.  (getq() holds sd_lock when it removes messages from
8096  * the queue)
8097  */
8098 
8099 static void
8100 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8101 {
8102 	ASSERT(MUTEX_HELD(&stp->sd_lock));
8103 	(void) putbq(q, bp);
8104 	/*
8105 	 * A message may have come in when the sd_lock was dropped in the
8106 	 * calling routine. If this is the case and STR*ATMARK info was
8107 	 * received, need to move that from the stream head to the q_last
8108 	 * so that SIOCATMARK can return the proper value.
8109 	 */
8110 	if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8111 		unsigned short *flagp = &q->q_last->b_flag;
8112 		uint_t b_flag = (uint_t)*flagp;
8113 
8114 		if (stp->sd_flag & STRATMARK) {
8115 			b_flag &= ~MSGNOTMARKNEXT;
8116 			b_flag |= MSGMARKNEXT;
8117 			stp->sd_flag &= ~STRATMARK;
8118 		} else {
8119 			b_flag &= ~MSGMARKNEXT;
8120 			b_flag |= MSGNOTMARKNEXT;
8121 			stp->sd_flag &= ~STRNOTATMARK;
8122 		}
8123 		*flagp = (unsigned short) b_flag;
8124 	}
8125 
8126 #ifdef	DEBUG
8127 	/*
8128 	 * Make sure that the flags are not messed up.
8129 	 */
8130 	{
8131 		mblk_t *mp;
8132 		mp = q->q_last;
8133 		while (mp != NULL) {
8134 			ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8135 			    (MSGMARKNEXT|MSGNOTMARKNEXT));
8136 			mp = mp->b_cont;
8137 		}
8138 	}
8139 #endif
8140 	if (q->q_first == bp) {
8141 		short pollevents;
8142 
8143 		if (stp->sd_flag & RSLEEP) {
8144 			stp->sd_flag &= ~RSLEEP;
8145 			cv_broadcast(&q->q_wait);
8146 		}
8147 		if (stp->sd_flag & STRPRI) {
8148 			pollevents = POLLPRI;
8149 		} else {
8150 			if (band == 0) {
8151 				if (!(stp->sd_rput_opt & SR_POLLIN))
8152 					return;
8153 				stp->sd_rput_opt &= ~SR_POLLIN;
8154 				pollevents = POLLIN | POLLRDNORM;
8155 			} else {
8156 				pollevents = POLLIN | POLLRDBAND;
8157 			}
8158 		}
8159 		mutex_exit(&stp->sd_lock);
8160 		pollwakeup(&stp->sd_pollist, pollevents);
8161 		mutex_enter(&stp->sd_lock);
8162 	}
8163 }
8164 
8165 /*
8166  * Return the held vnode attached to the stream head of a
8167  * given queue
8168  * It is the responsibility of the calling routine to ensure
8169  * that the queue does not go away (e.g. pop).
8170  */
8171 vnode_t *
8172 strq2vp(queue_t *qp)
8173 {
8174 	vnode_t *vp;
8175 	vp = STREAM(qp)->sd_vnode;
8176 	ASSERT(vp != NULL);
8177 	VN_HOLD(vp);
8178 	return (vp);
8179 }
8180 
8181 /*
8182  * return the stream head write queue for the given vp
8183  * It is the responsibility of the calling routine to ensure
8184  * that the stream or vnode do not close.
8185  */
8186 queue_t *
8187 strvp2wq(vnode_t *vp)
8188 {
8189 	ASSERT(vp->v_stream != NULL);
8190 	return (vp->v_stream->sd_wrq);
8191 }
8192 
8193 /*
8194  * pollwakeup stream head
8195  * It is the responsibility of the calling routine to ensure
8196  * that the stream or vnode do not close.
8197  */
8198 void
8199 strpollwakeup(vnode_t *vp, short event)
8200 {
8201 	ASSERT(vp->v_stream);
8202 	pollwakeup(&vp->v_stream->sd_pollist, event);
8203 }
8204 
8205 /*
8206  * Mate the stream heads of two vnodes together. If the two vnodes are the
8207  * same, we just make the write-side point at the read-side -- otherwise,
8208  * we do a full mate.  Only works on vnodes associated with streams that are
8209  * still being built and thus have only a stream head.
8210  */
8211 void
8212 strmate(vnode_t *vp1, vnode_t *vp2)
8213 {
8214 	queue_t *wrq1 = strvp2wq(vp1);
8215 	queue_t *wrq2 = strvp2wq(vp2);
8216 
8217 	/*
8218 	 * Verify that there are no modules on the stream yet.  We also
8219 	 * rely on the stream head always having a service procedure to
8220 	 * avoid tweaking q_nfsrv.
8221 	 */
8222 	ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8223 	ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8224 	ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8225 
8226 	/*
8227 	 * If the queues are the same, just twist; otherwise do a full mate.
8228 	 */
8229 	if (wrq1 == wrq2) {
8230 		wrq1->q_next = _RD(wrq1);
8231 	} else {
8232 		wrq1->q_next = _RD(wrq2);
8233 		wrq2->q_next = _RD(wrq1);
8234 		STREAM(wrq1)->sd_mate = STREAM(wrq2);
8235 		STREAM(wrq1)->sd_flag |= STRMATE;
8236 		STREAM(wrq2)->sd_mate = STREAM(wrq1);
8237 		STREAM(wrq2)->sd_flag |= STRMATE;
8238 	}
8239 }
8240 
8241 /*
8242  * XXX will go away when console is correctly fixed.
8243  * Clean up the console PIDS, from previous I_SETSIG,
8244  * called only for cnopen which never calls strclean().
8245  */
8246 void
8247 str_cn_clean(struct vnode *vp)
8248 {
8249 	strsig_t *ssp, *pssp, *tssp;
8250 	struct stdata *stp;
8251 	struct pid  *pidp;
8252 	int update = 0;
8253 
8254 	ASSERT(vp->v_stream);
8255 	stp = vp->v_stream;
8256 	pssp = NULL;
8257 	mutex_enter(&stp->sd_lock);
8258 	ssp = stp->sd_siglist;
8259 	while (ssp) {
8260 		mutex_enter(&pidlock);
8261 		pidp = ssp->ss_pidp;
8262 		/*
8263 		 * Get rid of PID if the proc is gone.
8264 		 */
8265 		if (pidp->pid_prinactive) {
8266 			tssp = ssp->ss_next;
8267 			if (pssp)
8268 				pssp->ss_next = tssp;
8269 			else
8270 				stp->sd_siglist = tssp;
8271 			ASSERT(pidp->pid_ref <= 1);
8272 			PID_RELE(ssp->ss_pidp);
8273 			mutex_exit(&pidlock);
8274 			kmem_free(ssp, sizeof (strsig_t));
8275 			update = 1;
8276 			ssp = tssp;
8277 			continue;
8278 		} else
8279 			mutex_exit(&pidlock);
8280 		pssp = ssp;
8281 		ssp = ssp->ss_next;
8282 	}
8283 	if (update) {
8284 		stp->sd_sigflags = 0;
8285 		for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8286 			stp->sd_sigflags |= ssp->ss_events;
8287 	}
8288 	mutex_exit(&stp->sd_lock);
8289 }
8290 
8291 /*
8292  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8293  */
8294 static boolean_t
8295 msghasdata(mblk_t *bp)
8296 {
8297 	for (; bp; bp = bp->b_cont)
8298 		if (bp->b_datap->db_type == M_DATA) {
8299 			ASSERT(bp->b_wptr >= bp->b_rptr);
8300 			if (bp->b_wptr > bp->b_rptr)
8301 				return (B_TRUE);
8302 		}
8303 	return (B_FALSE);
8304 }
8305