xref: /titanic_53/usr/src/uts/common/io/stream.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
24*7c478bd9Sstevel@tonic-gate 
25*7c478bd9Sstevel@tonic-gate 
26*7c478bd9Sstevel@tonic-gate /*
27*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
29*7c478bd9Sstevel@tonic-gate  */
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/stream.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/conf.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/ftrace.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/multidata.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/multidata_impl.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
53*7c478bd9Sstevel@tonic-gate 
54*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
55*7c478bd9Sstevel@tonic-gate #include <sys/kmem_impl.h>
56*7c478bd9Sstevel@tonic-gate #endif
57*7c478bd9Sstevel@tonic-gate 
58*7c478bd9Sstevel@tonic-gate /*
59*7c478bd9Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
60*7c478bd9Sstevel@tonic-gate  * be used by modules and drivers.
61*7c478bd9Sstevel@tonic-gate  */
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate /*
64*7c478bd9Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
65*7c478bd9Sstevel@tonic-gate  *
66*7c478bd9Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
67*7c478bd9Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
68*7c478bd9Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
69*7c478bd9Sstevel@tonic-gate  * of how the allocator works.
70*7c478bd9Sstevel@tonic-gate  *
71*7c478bd9Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
72*7c478bd9Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
73*7c478bd9Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
74*7c478bd9Sstevel@tonic-gate  * message is allocated:
75*7c478bd9Sstevel@tonic-gate  *
76*7c478bd9Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
77*7c478bd9Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
78*7c478bd9Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
79*7c478bd9Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
80*7c478bd9Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
81*7c478bd9Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
82*7c478bd9Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
83*7c478bd9Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
84*7c478bd9Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
85*7c478bd9Sstevel@tonic-gate  *
86*7c478bd9Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
87*7c478bd9Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
88*7c478bd9Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
89*7c478bd9Sstevel@tonic-gate  *
90*7c478bd9Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
91*7c478bd9Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
92*7c478bd9Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
93*7c478bd9Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
94*7c478bd9Sstevel@tonic-gate  *
95*7c478bd9Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
96*7c478bd9Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
97*7c478bd9Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
98*7c478bd9Sstevel@tonic-gate  *
99*7c478bd9Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
100*7c478bd9Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
101*7c478bd9Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
102*7c478bd9Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
103*7c478bd9Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
104*7c478bd9Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
105*7c478bd9Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
106*7c478bd9Sstevel@tonic-gate  *
107*7c478bd9Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
108*7c478bd9Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
109*7c478bd9Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
110*7c478bd9Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
111*7c478bd9Sstevel@tonic-gate  *
112*7c478bd9Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
113*7c478bd9Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
114*7c478bd9Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
115*7c478bd9Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
116*7c478bd9Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
117*7c478bd9Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
118*7c478bd9Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
119*7c478bd9Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
120*7c478bd9Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
121*7c478bd9Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
122*7c478bd9Sstevel@tonic-gate  *
123*7c478bd9Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
124*7c478bd9Sstevel@tonic-gate  * They represent a good trade-off between internal and external
125*7c478bd9Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
126*7c478bd9Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
127*7c478bd9Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
128*7c478bd9Sstevel@tonic-gate  * straddle cache lines unnecessarily.
129*7c478bd9Sstevel@tonic-gate  */
130*7c478bd9Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
131*7c478bd9Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
132*7c478bd9Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
133*7c478bd9Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate #ifdef _BIG_ENDIAN
136*7c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
137*7c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
138*7c478bd9Sstevel@tonic-gate #else
139*7c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
140*7c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
141*7c478bd9Sstevel@tonic-gate #endif
142*7c478bd9Sstevel@tonic-gate 
143*7c478bd9Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
144*7c478bd9Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
145*7c478bd9Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
146*7c478bd9Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
147*7c478bd9Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
148*7c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
149*7c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
150*7c478bd9Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate static size_t dblk_sizes[] = {
153*7c478bd9Sstevel@tonic-gate #ifdef _LP64
154*7c478bd9Sstevel@tonic-gate 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
155*7c478bd9Sstevel@tonic-gate 	8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
156*7c478bd9Sstevel@tonic-gate 	40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
157*7c478bd9Sstevel@tonic-gate #else
158*7c478bd9Sstevel@tonic-gate 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
159*7c478bd9Sstevel@tonic-gate 	8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
160*7c478bd9Sstevel@tonic-gate 	40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504,
161*7c478bd9Sstevel@tonic-gate #endif
162*7c478bd9Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
163*7c478bd9Sstevel@tonic-gate };
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
166*7c478bd9Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
167*7c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
168*7c478bd9Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
169*7c478bd9Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
170*7c478bd9Sstevel@tonic-gate 
171*7c478bd9Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
172*7c478bd9Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
173*7c478bd9Sstevel@tonic-gate static int allocb_tryhard_fails;
174*7c478bd9Sstevel@tonic-gate static void frnop_func(void *arg);
175*7c478bd9Sstevel@tonic-gate frtn_t frnop = { frnop_func };
176*7c478bd9Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
179*7c478bd9Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate /*
182*7c478bd9Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
183*7c478bd9Sstevel@tonic-gate  */
184*7c478bd9Sstevel@tonic-gate int dblk_kmem_flags = 0;
185*7c478bd9Sstevel@tonic-gate int mblk_kmem_flags = 0;
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate 
188*7c478bd9Sstevel@tonic-gate static int
189*7c478bd9Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
190*7c478bd9Sstevel@tonic-gate {
191*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
192*7c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
193*7c478bd9Sstevel@tonic-gate 	size_t index;
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
198*7c478bd9Sstevel@tonic-gate 
199*7c478bd9Sstevel@tonic-gate 	ASSERT(index <= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
202*7c478bd9Sstevel@tonic-gate 		return (-1);
203*7c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
204*7c478bd9Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
205*7c478bd9Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
206*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
207*7c478bd9Sstevel@tonic-gate 			return (-1);
208*7c478bd9Sstevel@tonic-gate 		}
209*7c478bd9Sstevel@tonic-gate 	} else {
210*7c478bd9Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
211*7c478bd9Sstevel@tonic-gate 	}
212*7c478bd9Sstevel@tonic-gate 
213*7c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
214*7c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
215*7c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
216*7c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
217*7c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
218*7c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
219*7c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
220*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
221*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
222*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
223*7c478bd9Sstevel@tonic-gate 	return (0);
224*7c478bd9Sstevel@tonic-gate }
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
227*7c478bd9Sstevel@tonic-gate static int
228*7c478bd9Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
229*7c478bd9Sstevel@tonic-gate {
230*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
231*7c478bd9Sstevel@tonic-gate 
232*7c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
233*7c478bd9Sstevel@tonic-gate 		return (-1);
234*7c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
235*7c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
236*7c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
237*7c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
238*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
239*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
240*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
241*7c478bd9Sstevel@tonic-gate 	return (0);
242*7c478bd9Sstevel@tonic-gate }
243*7c478bd9Sstevel@tonic-gate 
244*7c478bd9Sstevel@tonic-gate static int
245*7c478bd9Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
246*7c478bd9Sstevel@tonic-gate {
247*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
248*7c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
249*7c478bd9Sstevel@tonic-gate 
250*7c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
251*7c478bd9Sstevel@tonic-gate 		return (-1);
252*7c478bd9Sstevel@tonic-gate 
253*7c478bd9Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
254*7c478bd9Sstevel@tonic-gate 	    kmflags)) == NULL) {
255*7c478bd9Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
256*7c478bd9Sstevel@tonic-gate 		return (-1);
257*7c478bd9Sstevel@tonic-gate 	}
258*7c478bd9Sstevel@tonic-gate 
259*7c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
260*7c478bd9Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
261*7c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
262*7c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
263*7c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
264*7c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
265*7c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
266*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
267*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
268*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
269*7c478bd9Sstevel@tonic-gate 	return (0);
270*7c478bd9Sstevel@tonic-gate }
271*7c478bd9Sstevel@tonic-gate 
272*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
273*7c478bd9Sstevel@tonic-gate static void
274*7c478bd9Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
275*7c478bd9Sstevel@tonic-gate {
276*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
277*7c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
284*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
285*7c478bd9Sstevel@tonic-gate 
286*7c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
287*7c478bd9Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
288*7c478bd9Sstevel@tonic-gate 	}
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
291*7c478bd9Sstevel@tonic-gate }
292*7c478bd9Sstevel@tonic-gate 
293*7c478bd9Sstevel@tonic-gate static void
294*7c478bd9Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
295*7c478bd9Sstevel@tonic-gate {
296*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
297*7c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
298*7c478bd9Sstevel@tonic-gate 
299*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
302*7c478bd9Sstevel@tonic-gate 
303*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
304*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
305*7c478bd9Sstevel@tonic-gate 
306*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
307*7c478bd9Sstevel@tonic-gate }
308*7c478bd9Sstevel@tonic-gate 
309*7c478bd9Sstevel@tonic-gate void
310*7c478bd9Sstevel@tonic-gate streams_msg_init(void)
311*7c478bd9Sstevel@tonic-gate {
312*7c478bd9Sstevel@tonic-gate 	char name[40];
313*7c478bd9Sstevel@tonic-gate 	size_t size;
314*7c478bd9Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
315*7c478bd9Sstevel@tonic-gate 	size_t *sizep;
316*7c478bd9Sstevel@tonic-gate 	struct kmem_cache *cp;
317*7c478bd9Sstevel@tonic-gate 	size_t tot_size;
318*7c478bd9Sstevel@tonic-gate 	int offset;
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
321*7c478bd9Sstevel@tonic-gate 		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
322*7c478bd9Sstevel@tonic-gate 		mblk_kmem_flags);
323*7c478bd9Sstevel@tonic-gate 
324*7c478bd9Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
325*7c478bd9Sstevel@tonic-gate 
326*7c478bd9Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
327*7c478bd9Sstevel@tonic-gate 			/*
328*7c478bd9Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
329*7c478bd9Sstevel@tonic-gate 			 * be allocated on the same page
330*7c478bd9Sstevel@tonic-gate 			 */
331*7c478bd9Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
332*7c478bd9Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
333*7c478bd9Sstevel@tonic-gate 								< PAGESIZE);
334*7c478bd9Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
335*7c478bd9Sstevel@tonic-gate 
336*7c478bd9Sstevel@tonic-gate 		} else {
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate 			/*
339*7c478bd9Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
340*7c478bd9Sstevel@tonic-gate 			 * buffer are allocated separately.
341*7c478bd9Sstevel@tonic-gate 			 */
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
344*7c478bd9Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
345*7c478bd9Sstevel@tonic-gate 		}
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
348*7c478bd9Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
349*7c478bd9Sstevel@tonic-gate 			DBLK_CACHE_ALIGN, dblk_constructor,
350*7c478bd9Sstevel@tonic-gate 			dblk_destructor, NULL,
351*7c478bd9Sstevel@tonic-gate 			(void *)(size), NULL, dblk_kmem_flags);
352*7c478bd9Sstevel@tonic-gate 
353*7c478bd9Sstevel@tonic-gate 		while (lastsize <= size) {
354*7c478bd9Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
355*7c478bd9Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
356*7c478bd9Sstevel@tonic-gate 		}
357*7c478bd9Sstevel@tonic-gate 	}
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
360*7c478bd9Sstevel@tonic-gate 			sizeof (dblk_t), DBLK_CACHE_ALIGN,
361*7c478bd9Sstevel@tonic-gate 			dblk_esb_constructor, dblk_destructor, NULL,
362*7c478bd9Sstevel@tonic-gate 			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
363*7c478bd9Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
364*7c478bd9Sstevel@tonic-gate 		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
365*7c478bd9Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
366*7c478bd9Sstevel@tonic-gate 		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
367*7c478bd9Sstevel@tonic-gate 
368*7c478bd9Sstevel@tonic-gate 	/* Initialize Multidata caches */
369*7c478bd9Sstevel@tonic-gate 	mmd_init();
370*7c478bd9Sstevel@tonic-gate }
371*7c478bd9Sstevel@tonic-gate 
372*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
373*7c478bd9Sstevel@tonic-gate mblk_t *
374*7c478bd9Sstevel@tonic-gate allocb(size_t size, uint_t pri)
375*7c478bd9Sstevel@tonic-gate {
376*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
377*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
378*7c478bd9Sstevel@tonic-gate 	size_t index;
379*7c478bd9Sstevel@tonic-gate 
380*7c478bd9Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
383*7c478bd9Sstevel@tonic-gate 		if (size != 0) {
384*7c478bd9Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
385*7c478bd9Sstevel@tonic-gate 			goto out;
386*7c478bd9Sstevel@tonic-gate 		}
387*7c478bd9Sstevel@tonic-gate 		index = 0;
388*7c478bd9Sstevel@tonic-gate 	}
389*7c478bd9Sstevel@tonic-gate 
390*7c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
391*7c478bd9Sstevel@tonic-gate 		mp = NULL;
392*7c478bd9Sstevel@tonic-gate 		goto out;
393*7c478bd9Sstevel@tonic-gate 	}
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
396*7c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
397*7c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
398*7c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
399*7c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
400*7c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
401*7c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
402*7c478bd9Sstevel@tonic-gate out:
403*7c478bd9Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
404*7c478bd9Sstevel@tonic-gate 
405*7c478bd9Sstevel@tonic-gate 	return (mp);
406*7c478bd9Sstevel@tonic-gate }
407*7c478bd9Sstevel@tonic-gate 
408*7c478bd9Sstevel@tonic-gate mblk_t *
409*7c478bd9Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
410*7c478bd9Sstevel@tonic-gate {
411*7c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
412*7c478bd9Sstevel@tonic-gate 
413*7c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
414*7c478bd9Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
415*7c478bd9Sstevel@tonic-gate 		if (cr != NULL)
416*7c478bd9Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
417*7c478bd9Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
418*7c478bd9Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
419*7c478bd9Sstevel@tonic-gate 	}
420*7c478bd9Sstevel@tonic-gate 	return (mp);
421*7c478bd9Sstevel@tonic-gate }
422*7c478bd9Sstevel@tonic-gate 
423*7c478bd9Sstevel@tonic-gate mblk_t *
424*7c478bd9Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
425*7c478bd9Sstevel@tonic-gate {
426*7c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
427*7c478bd9Sstevel@tonic-gate 
428*7c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
429*7c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
430*7c478bd9Sstevel@tonic-gate 
431*7c478bd9Sstevel@tonic-gate 	return (mp);
432*7c478bd9Sstevel@tonic-gate }
433*7c478bd9Sstevel@tonic-gate 
434*7c478bd9Sstevel@tonic-gate mblk_t *
435*7c478bd9Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
436*7c478bd9Sstevel@tonic-gate {
437*7c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
438*7c478bd9Sstevel@tonic-gate 
439*7c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
440*7c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
441*7c478bd9Sstevel@tonic-gate 
442*7c478bd9Sstevel@tonic-gate 	return (mp);
443*7c478bd9Sstevel@tonic-gate }
444*7c478bd9Sstevel@tonic-gate 
445*7c478bd9Sstevel@tonic-gate void
446*7c478bd9Sstevel@tonic-gate freeb(mblk_t *mp)
447*7c478bd9Sstevel@tonic-gate {
448*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
449*7c478bd9Sstevel@tonic-gate 
450*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
451*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
452*7c478bd9Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
453*7c478bd9Sstevel@tonic-gate 
454*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
457*7c478bd9Sstevel@tonic-gate }
458*7c478bd9Sstevel@tonic-gate 
459*7c478bd9Sstevel@tonic-gate void
460*7c478bd9Sstevel@tonic-gate freemsg(mblk_t *mp)
461*7c478bd9Sstevel@tonic-gate {
462*7c478bd9Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
463*7c478bd9Sstevel@tonic-gate 	while (mp) {
464*7c478bd9Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
465*7c478bd9Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
468*7c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
469*7c478bd9Sstevel@tonic-gate 
470*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
471*7c478bd9Sstevel@tonic-gate 
472*7c478bd9Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
473*7c478bd9Sstevel@tonic-gate 		mp = mp_cont;
474*7c478bd9Sstevel@tonic-gate 	}
475*7c478bd9Sstevel@tonic-gate }
476*7c478bd9Sstevel@tonic-gate 
477*7c478bd9Sstevel@tonic-gate /*
478*7c478bd9Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
479*7c478bd9Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
480*7c478bd9Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
481*7c478bd9Sstevel@tonic-gate  *
482*7c478bd9Sstevel@tonic-gate  * This routine is private and unstable.
483*7c478bd9Sstevel@tonic-gate  */
484*7c478bd9Sstevel@tonic-gate mblk_t	*
485*7c478bd9Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
486*7c478bd9Sstevel@tonic-gate {
487*7c478bd9Sstevel@tonic-gate 	mblk_t		*mp1;
488*7c478bd9Sstevel@tonic-gate 	unsigned char	*old_rptr;
489*7c478bd9Sstevel@tonic-gate 	ptrdiff_t	cur_size;
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 	if (mp == NULL)
492*7c478bd9Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
493*7c478bd9Sstevel@tonic-gate 
494*7c478bd9Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
495*7c478bd9Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
496*7c478bd9Sstevel@tonic-gate 
497*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
498*7c478bd9Sstevel@tonic-gate 
499*7c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
500*7c478bd9Sstevel@tonic-gate 		/*
501*7c478bd9Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
502*7c478bd9Sstevel@tonic-gate 		 * work is required.
503*7c478bd9Sstevel@tonic-gate 		 */
504*7c478bd9Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
505*7c478bd9Sstevel@tonic-gate 			return (mp);
506*7c478bd9Sstevel@tonic-gate 
507*7c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
508*7c478bd9Sstevel@tonic-gate 		mp1 = mp;
509*7c478bd9Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
510*7c478bd9Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
511*7c478bd9Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
512*7c478bd9Sstevel@tonic-gate 	} else {
513*7c478bd9Sstevel@tonic-gate 		return (NULL);
514*7c478bd9Sstevel@tonic-gate 	}
515*7c478bd9Sstevel@tonic-gate 
516*7c478bd9Sstevel@tonic-gate 	if (copy) {
517*7c478bd9Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
518*7c478bd9Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
519*7c478bd9Sstevel@tonic-gate 	}
520*7c478bd9Sstevel@tonic-gate 
521*7c478bd9Sstevel@tonic-gate 	if (mp != mp1)
522*7c478bd9Sstevel@tonic-gate 		freeb(mp);
523*7c478bd9Sstevel@tonic-gate 
524*7c478bd9Sstevel@tonic-gate 	return (mp1);
525*7c478bd9Sstevel@tonic-gate }
526*7c478bd9Sstevel@tonic-gate 
527*7c478bd9Sstevel@tonic-gate static void
528*7c478bd9Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
529*7c478bd9Sstevel@tonic-gate {
530*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
531*7c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
532*7c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
533*7c478bd9Sstevel@tonic-gate 
534*7c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
535*7c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
536*7c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
537*7c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
538*7c478bd9Sstevel@tonic-gate 	}
539*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
540*7c478bd9Sstevel@tonic-gate 
541*7c478bd9Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
542*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
543*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
546*7c478bd9Sstevel@tonic-gate }
547*7c478bd9Sstevel@tonic-gate 
548*7c478bd9Sstevel@tonic-gate static void
549*7c478bd9Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
550*7c478bd9Sstevel@tonic-gate {
551*7c478bd9Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
552*7c478bd9Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
553*7c478bd9Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
554*7c478bd9Sstevel@tonic-gate 		/*
555*7c478bd9Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
556*7c478bd9Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
557*7c478bd9Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
558*7c478bd9Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
559*7c478bd9Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
560*7c478bd9Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
561*7c478bd9Sstevel@tonic-gate 		 */
562*7c478bd9Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
563*7c478bd9Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
564*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
565*7c478bd9Sstevel@tonic-gate 			return;
566*7c478bd9Sstevel@tonic-gate 		}
567*7c478bd9Sstevel@tonic-gate 	}
568*7c478bd9Sstevel@tonic-gate 	dbp->db_mblk = mp;
569*7c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
570*7c478bd9Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
571*7c478bd9Sstevel@tonic-gate }
572*7c478bd9Sstevel@tonic-gate 
573*7c478bd9Sstevel@tonic-gate mblk_t *
574*7c478bd9Sstevel@tonic-gate dupb(mblk_t *mp)
575*7c478bd9Sstevel@tonic-gate {
576*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
577*7c478bd9Sstevel@tonic-gate 	mblk_t *new_mp;
578*7c478bd9Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
581*7c478bd9Sstevel@tonic-gate 		goto out;
582*7c478bd9Sstevel@tonic-gate 
583*7c478bd9Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
584*7c478bd9Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
585*7c478bd9Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
586*7c478bd9Sstevel@tonic-gate 	new_mp->b_datap = dbp;
587*7c478bd9Sstevel@tonic-gate 	new_mp->b_queue = NULL;
588*7c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
589*7c478bd9Sstevel@tonic-gate 
590*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
591*7c478bd9Sstevel@tonic-gate 
592*7c478bd9Sstevel@tonic-gate 	/*
593*7c478bd9Sstevel@tonic-gate 	 * First-dup optimization.  The enabling assumption is that there
594*7c478bd9Sstevel@tonic-gate 	 * can can never be a race (in correct code) to dup the first copy
595*7c478bd9Sstevel@tonic-gate 	 * of a message.  Therefore we don't need to do it atomically.
596*7c478bd9Sstevel@tonic-gate 	 */
597*7c478bd9Sstevel@tonic-gate 	if (dbp->db_free != dblk_decref) {
598*7c478bd9Sstevel@tonic-gate 		dbp->db_free = dblk_decref;
599*7c478bd9Sstevel@tonic-gate 		dbp->db_ref++;
600*7c478bd9Sstevel@tonic-gate 		goto out;
601*7c478bd9Sstevel@tonic-gate 	}
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 	do {
604*7c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
605*7c478bd9Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
606*7c478bd9Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
607*7c478bd9Sstevel@tonic-gate 		/*
608*7c478bd9Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
609*7c478bd9Sstevel@tonic-gate 		 */
610*7c478bd9Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
611*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
612*7c478bd9Sstevel@tonic-gate 			new_mp = NULL;
613*7c478bd9Sstevel@tonic-gate 			goto out;
614*7c478bd9Sstevel@tonic-gate 		}
615*7c478bd9Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
616*7c478bd9Sstevel@tonic-gate 
617*7c478bd9Sstevel@tonic-gate out:
618*7c478bd9Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
619*7c478bd9Sstevel@tonic-gate 	return (new_mp);
620*7c478bd9Sstevel@tonic-gate }
621*7c478bd9Sstevel@tonic-gate 
622*7c478bd9Sstevel@tonic-gate static void
623*7c478bd9Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
624*7c478bd9Sstevel@tonic-gate {
625*7c478bd9Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
626*7c478bd9Sstevel@tonic-gate 
627*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
628*7c478bd9Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
629*7c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
630*7c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
631*7c478bd9Sstevel@tonic-gate 
632*7c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
633*7c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
634*7c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
635*7c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
636*7c478bd9Sstevel@tonic-gate 	}
637*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
638*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
639*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
640*7c478bd9Sstevel@tonic-gate 
641*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
642*7c478bd9Sstevel@tonic-gate }
643*7c478bd9Sstevel@tonic-gate 
644*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
645*7c478bd9Sstevel@tonic-gate static void
646*7c478bd9Sstevel@tonic-gate frnop_func(void *arg)
647*7c478bd9Sstevel@tonic-gate {
648*7c478bd9Sstevel@tonic-gate }
649*7c478bd9Sstevel@tonic-gate 
650*7c478bd9Sstevel@tonic-gate /*
651*7c478bd9Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
652*7c478bd9Sstevel@tonic-gate  */
653*7c478bd9Sstevel@tonic-gate static mblk_t *
654*7c478bd9Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
655*7c478bd9Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
656*7c478bd9Sstevel@tonic-gate {
657*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
658*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
659*7c478bd9Sstevel@tonic-gate 
660*7c478bd9Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
661*7c478bd9Sstevel@tonic-gate 
662*7c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
663*7c478bd9Sstevel@tonic-gate 		mp = NULL;
664*7c478bd9Sstevel@tonic-gate 		goto out;
665*7c478bd9Sstevel@tonic-gate 	}
666*7c478bd9Sstevel@tonic-gate 
667*7c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
668*7c478bd9Sstevel@tonic-gate 	dbp->db_base = base;
669*7c478bd9Sstevel@tonic-gate 	dbp->db_lim = base + size;
670*7c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
671*7c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = frp;
672*7c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
673*7c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
674*7c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
675*7c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
676*7c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
677*7c478bd9Sstevel@tonic-gate 
678*7c478bd9Sstevel@tonic-gate out:
679*7c478bd9Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
680*7c478bd9Sstevel@tonic-gate 	return (mp);
681*7c478bd9Sstevel@tonic-gate }
682*7c478bd9Sstevel@tonic-gate 
683*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
684*7c478bd9Sstevel@tonic-gate mblk_t *
685*7c478bd9Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
686*7c478bd9Sstevel@tonic-gate {
687*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
688*7c478bd9Sstevel@tonic-gate 
689*7c478bd9Sstevel@tonic-gate 	/*
690*7c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
691*7c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
692*7c478bd9Sstevel@tonic-gate 	 * call optimization.
693*7c478bd9Sstevel@tonic-gate 	 */
694*7c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
695*7c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
696*7c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
697*7c478bd9Sstevel@tonic-gate 
698*7c478bd9Sstevel@tonic-gate 		if (mp != NULL)
699*7c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
700*7c478bd9Sstevel@tonic-gate 		return (mp);
701*7c478bd9Sstevel@tonic-gate 	}
702*7c478bd9Sstevel@tonic-gate 
703*7c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
704*7c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
705*7c478bd9Sstevel@tonic-gate }
706*7c478bd9Sstevel@tonic-gate 
707*7c478bd9Sstevel@tonic-gate /*
708*7c478bd9Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
709*7c478bd9Sstevel@tonic-gate  */
710*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
711*7c478bd9Sstevel@tonic-gate mblk_t *
712*7c478bd9Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
713*7c478bd9Sstevel@tonic-gate {
714*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
715*7c478bd9Sstevel@tonic-gate 
716*7c478bd9Sstevel@tonic-gate 	/*
717*7c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
718*7c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
719*7c478bd9Sstevel@tonic-gate 	 * call optimization.
720*7c478bd9Sstevel@tonic-gate 	 */
721*7c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
722*7c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
723*7c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
724*7c478bd9Sstevel@tonic-gate 
725*7c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
726*7c478bd9Sstevel@tonic-gate 		return (mp);
727*7c478bd9Sstevel@tonic-gate 	}
728*7c478bd9Sstevel@tonic-gate 
729*7c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
730*7c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
731*7c478bd9Sstevel@tonic-gate }
732*7c478bd9Sstevel@tonic-gate 
733*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
734*7c478bd9Sstevel@tonic-gate mblk_t *
735*7c478bd9Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
736*7c478bd9Sstevel@tonic-gate {
737*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
738*7c478bd9Sstevel@tonic-gate 
739*7c478bd9Sstevel@tonic-gate 	/*
740*7c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
741*7c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
742*7c478bd9Sstevel@tonic-gate 	 * call optimization.
743*7c478bd9Sstevel@tonic-gate 	 */
744*7c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
745*7c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
746*7c478bd9Sstevel@tonic-gate 			frp, dblk_lastfree_desb, KM_NOSLEEP);
747*7c478bd9Sstevel@tonic-gate 
748*7c478bd9Sstevel@tonic-gate 		if (mp != NULL)
749*7c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
750*7c478bd9Sstevel@tonic-gate 		return (mp);
751*7c478bd9Sstevel@tonic-gate 	}
752*7c478bd9Sstevel@tonic-gate 
753*7c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
754*7c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
755*7c478bd9Sstevel@tonic-gate }
756*7c478bd9Sstevel@tonic-gate 
757*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
758*7c478bd9Sstevel@tonic-gate mblk_t *
759*7c478bd9Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
760*7c478bd9Sstevel@tonic-gate {
761*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
762*7c478bd9Sstevel@tonic-gate 
763*7c478bd9Sstevel@tonic-gate 	/*
764*7c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
765*7c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
766*7c478bd9Sstevel@tonic-gate 	 * call optimization.
767*7c478bd9Sstevel@tonic-gate 	 */
768*7c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
769*7c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
770*7c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
771*7c478bd9Sstevel@tonic-gate 
772*7c478bd9Sstevel@tonic-gate 		if (mp != NULL)
773*7c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
774*7c478bd9Sstevel@tonic-gate 		return (mp);
775*7c478bd9Sstevel@tonic-gate 	}
776*7c478bd9Sstevel@tonic-gate 
777*7c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
778*7c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
779*7c478bd9Sstevel@tonic-gate }
780*7c478bd9Sstevel@tonic-gate 
781*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
782*7c478bd9Sstevel@tonic-gate mblk_t *
783*7c478bd9Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
784*7c478bd9Sstevel@tonic-gate {
785*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
786*7c478bd9Sstevel@tonic-gate 
787*7c478bd9Sstevel@tonic-gate 	/*
788*7c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
789*7c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
790*7c478bd9Sstevel@tonic-gate 	 * call optimization.
791*7c478bd9Sstevel@tonic-gate 	 */
792*7c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
793*7c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
794*7c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
795*7c478bd9Sstevel@tonic-gate 
796*7c478bd9Sstevel@tonic-gate 		if (mp != NULL)
797*7c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
798*7c478bd9Sstevel@tonic-gate 		return (mp);
799*7c478bd9Sstevel@tonic-gate 	}
800*7c478bd9Sstevel@tonic-gate 
801*7c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
802*7c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
803*7c478bd9Sstevel@tonic-gate }
804*7c478bd9Sstevel@tonic-gate 
805*7c478bd9Sstevel@tonic-gate static void
806*7c478bd9Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
807*7c478bd9Sstevel@tonic-gate {
808*7c478bd9Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
809*7c478bd9Sstevel@tonic-gate 
810*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
811*7c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
812*7c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
813*7c478bd9Sstevel@tonic-gate 
814*7c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
815*7c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
816*7c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
817*7c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
818*7c478bd9Sstevel@tonic-gate 	}
819*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
820*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
821*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
822*7c478bd9Sstevel@tonic-gate 
823*7c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
824*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
825*7c478bd9Sstevel@tonic-gate 	bcp->alloc--;
826*7c478bd9Sstevel@tonic-gate 
827*7c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
828*7c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
829*7c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
830*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
831*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
832*7c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
833*7c478bd9Sstevel@tonic-gate 	} else {
834*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
835*7c478bd9Sstevel@tonic-gate 	}
836*7c478bd9Sstevel@tonic-gate }
837*7c478bd9Sstevel@tonic-gate 
838*7c478bd9Sstevel@tonic-gate bcache_t *
839*7c478bd9Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
840*7c478bd9Sstevel@tonic-gate {
841*7c478bd9Sstevel@tonic-gate 	bcache_t *bcp;
842*7c478bd9Sstevel@tonic-gate 	char buffer[255];
843*7c478bd9Sstevel@tonic-gate 
844*7c478bd9Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
845*7c478bd9Sstevel@tonic-gate 
846*7c478bd9Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
847*7c478bd9Sstevel@tonic-gate 	    NULL) {
848*7c478bd9Sstevel@tonic-gate 		return (NULL);
849*7c478bd9Sstevel@tonic-gate 	}
850*7c478bd9Sstevel@tonic-gate 
851*7c478bd9Sstevel@tonic-gate 	bcp->size = size;
852*7c478bd9Sstevel@tonic-gate 	bcp->align = align;
853*7c478bd9Sstevel@tonic-gate 	bcp->alloc = 0;
854*7c478bd9Sstevel@tonic-gate 	bcp->destroy = 0;
855*7c478bd9Sstevel@tonic-gate 
856*7c478bd9Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
857*7c478bd9Sstevel@tonic-gate 
858*7c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
859*7c478bd9Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
860*7c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
861*7c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
862*7c478bd9Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
863*7c478bd9Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
864*7c478bd9Sstevel@tonic-gate 						NULL, (void *)bcp, NULL, 0);
865*7c478bd9Sstevel@tonic-gate 
866*7c478bd9Sstevel@tonic-gate 	return (bcp);
867*7c478bd9Sstevel@tonic-gate }
868*7c478bd9Sstevel@tonic-gate 
869*7c478bd9Sstevel@tonic-gate void
870*7c478bd9Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
871*7c478bd9Sstevel@tonic-gate {
872*7c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
873*7c478bd9Sstevel@tonic-gate 
874*7c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
875*7c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0) {
876*7c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
877*7c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
878*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
879*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
880*7c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
881*7c478bd9Sstevel@tonic-gate 	} else {
882*7c478bd9Sstevel@tonic-gate 		bcp->destroy++;
883*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
884*7c478bd9Sstevel@tonic-gate 	}
885*7c478bd9Sstevel@tonic-gate }
886*7c478bd9Sstevel@tonic-gate 
887*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
888*7c478bd9Sstevel@tonic-gate mblk_t *
889*7c478bd9Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
890*7c478bd9Sstevel@tonic-gate {
891*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
892*7c478bd9Sstevel@tonic-gate 	mblk_t *mp = NULL;
893*7c478bd9Sstevel@tonic-gate 
894*7c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
895*7c478bd9Sstevel@tonic-gate 
896*7c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
897*7c478bd9Sstevel@tonic-gate 	if (bcp->destroy != 0) {
898*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
899*7c478bd9Sstevel@tonic-gate 		goto out;
900*7c478bd9Sstevel@tonic-gate 	}
901*7c478bd9Sstevel@tonic-gate 
902*7c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
903*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
904*7c478bd9Sstevel@tonic-gate 		goto out;
905*7c478bd9Sstevel@tonic-gate 	}
906*7c478bd9Sstevel@tonic-gate 	bcp->alloc++;
907*7c478bd9Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
908*7c478bd9Sstevel@tonic-gate 
909*7c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
910*7c478bd9Sstevel@tonic-gate 
911*7c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
912*7c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
913*7c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
914*7c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
915*7c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
916*7c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
917*7c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
918*7c478bd9Sstevel@tonic-gate out:
919*7c478bd9Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
920*7c478bd9Sstevel@tonic-gate 
921*7c478bd9Sstevel@tonic-gate 	return (mp);
922*7c478bd9Sstevel@tonic-gate }
923*7c478bd9Sstevel@tonic-gate 
924*7c478bd9Sstevel@tonic-gate static void
925*7c478bd9Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
926*7c478bd9Sstevel@tonic-gate {
927*7c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
928*7c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
929*7c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
932*7c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
933*7c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
934*7c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
935*7c478bd9Sstevel@tonic-gate 	}
936*7c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
937*7c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
938*7c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
939*7c478bd9Sstevel@tonic-gate 
940*7c478bd9Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
941*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
942*7c478bd9Sstevel@tonic-gate }
943*7c478bd9Sstevel@tonic-gate 
944*7c478bd9Sstevel@tonic-gate static mblk_t *
945*7c478bd9Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
946*7c478bd9Sstevel@tonic-gate {
947*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
948*7c478bd9Sstevel@tonic-gate 	void *buf;
949*7c478bd9Sstevel@tonic-gate 
950*7c478bd9Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
951*7c478bd9Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
952*7c478bd9Sstevel@tonic-gate 		return (NULL);
953*7c478bd9Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
954*7c478bd9Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
955*7c478bd9Sstevel@tonic-gate 		kmem_free(buf, size);
956*7c478bd9Sstevel@tonic-gate 
957*7c478bd9Sstevel@tonic-gate 	if (mp != NULL)
958*7c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
959*7c478bd9Sstevel@tonic-gate 
960*7c478bd9Sstevel@tonic-gate 	return (mp);
961*7c478bd9Sstevel@tonic-gate }
962*7c478bd9Sstevel@tonic-gate 
963*7c478bd9Sstevel@tonic-gate mblk_t *
964*7c478bd9Sstevel@tonic-gate allocb_tryhard(size_t target_size)
965*7c478bd9Sstevel@tonic-gate {
966*7c478bd9Sstevel@tonic-gate 	size_t size;
967*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
970*7c478bd9Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
971*7c478bd9Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
972*7c478bd9Sstevel@tonic-gate 			return (bp);
973*7c478bd9Sstevel@tonic-gate 	allocb_tryhard_fails++;
974*7c478bd9Sstevel@tonic-gate 	return (NULL);
975*7c478bd9Sstevel@tonic-gate }
976*7c478bd9Sstevel@tonic-gate 
977*7c478bd9Sstevel@tonic-gate /*
978*7c478bd9Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
979*7c478bd9Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
980*7c478bd9Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
981*7c478bd9Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
982*7c478bd9Sstevel@tonic-gate  * allocb() implementation details in header files.
983*7c478bd9Sstevel@tonic-gate  */
984*7c478bd9Sstevel@tonic-gate mblk_t *
985*7c478bd9Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
986*7c478bd9Sstevel@tonic-gate {
987*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
988*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
989*7c478bd9Sstevel@tonic-gate 	size_t index;
990*7c478bd9Sstevel@tonic-gate 
991*7c478bd9Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
992*7c478bd9Sstevel@tonic-gate 
993*7c478bd9Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
994*7c478bd9Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
995*7c478bd9Sstevel@tonic-gate 			if (size != 0) {
996*7c478bd9Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
997*7c478bd9Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
998*7c478bd9Sstevel@tonic-gate 				    (uintptr_t)mp);
999*7c478bd9Sstevel@tonic-gate 				return (mp);
1000*7c478bd9Sstevel@tonic-gate 			}
1001*7c478bd9Sstevel@tonic-gate 			index = 0;
1002*7c478bd9Sstevel@tonic-gate 		}
1003*7c478bd9Sstevel@tonic-gate 
1004*7c478bd9Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
1005*7c478bd9Sstevel@tonic-gate 		mp = dbp->db_mblk;
1006*7c478bd9Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
1007*7c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
1008*7c478bd9Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
1009*7c478bd9Sstevel@tonic-gate 		mp->b_queue = NULL;
1010*7c478bd9Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
1011*7c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
1012*7c478bd9Sstevel@tonic-gate 
1013*7c478bd9Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
1014*7c478bd9Sstevel@tonic-gate 
1015*7c478bd9Sstevel@tonic-gate 	} else {
1016*7c478bd9Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
1017*7c478bd9Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
1018*7c478bd9Sstevel@tonic-gate 				return (NULL);
1019*7c478bd9Sstevel@tonic-gate 		}
1020*7c478bd9Sstevel@tonic-gate 	}
1021*7c478bd9Sstevel@tonic-gate 
1022*7c478bd9Sstevel@tonic-gate 	return (mp);
1023*7c478bd9Sstevel@tonic-gate }
1024*7c478bd9Sstevel@tonic-gate 
1025*7c478bd9Sstevel@tonic-gate /*
1026*7c478bd9Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
1027*7c478bd9Sstevel@tonic-gate  * be allocated with priority 'pri'.
1028*7c478bd9Sstevel@tonic-gate  */
1029*7c478bd9Sstevel@tonic-gate bufcall_id_t
1030*7c478bd9Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
1031*7c478bd9Sstevel@tonic-gate {
1032*7c478bd9Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
1033*7c478bd9Sstevel@tonic-gate }
1034*7c478bd9Sstevel@tonic-gate 
1035*7c478bd9Sstevel@tonic-gate /*
1036*7c478bd9Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
1037*7c478bd9Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
1038*7c478bd9Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
1039*7c478bd9Sstevel@tonic-gate  */
1040*7c478bd9Sstevel@tonic-gate mblk_t *
1041*7c478bd9Sstevel@tonic-gate mkiocb(uint_t cmd)
1042*7c478bd9Sstevel@tonic-gate {
1043*7c478bd9Sstevel@tonic-gate 	struct iocblk	*ioc;
1044*7c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
1045*7c478bd9Sstevel@tonic-gate 
1046*7c478bd9Sstevel@tonic-gate 	/*
1047*7c478bd9Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
1048*7c478bd9Sstevel@tonic-gate 	 */
1049*7c478bd9Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
1050*7c478bd9Sstevel@tonic-gate 		return (NULL);
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
1053*7c478bd9Sstevel@tonic-gate 
1054*7c478bd9Sstevel@tonic-gate 	/*
1055*7c478bd9Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
1056*7c478bd9Sstevel@tonic-gate 	 */
1057*7c478bd9Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
1058*7c478bd9Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
1059*7c478bd9Sstevel@tonic-gate 
1060*7c478bd9Sstevel@tonic-gate 	/*
1061*7c478bd9Sstevel@tonic-gate 	 * Fill in the fields.
1062*7c478bd9Sstevel@tonic-gate 	 */
1063*7c478bd9Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
1064*7c478bd9Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
1065*7c478bd9Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
1066*7c478bd9Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
1067*7c478bd9Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
1068*7c478bd9Sstevel@tonic-gate 	return (mp);
1069*7c478bd9Sstevel@tonic-gate }
1070*7c478bd9Sstevel@tonic-gate 
1071*7c478bd9Sstevel@tonic-gate /*
1072*7c478bd9Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
1073*7c478bd9Sstevel@tonic-gate  * the given priority.
1074*7c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
1075*7c478bd9Sstevel@tonic-gate  */
1076*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1077*7c478bd9Sstevel@tonic-gate int
1078*7c478bd9Sstevel@tonic-gate testb(size_t size, uint_t pri)
1079*7c478bd9Sstevel@tonic-gate {
1080*7c478bd9Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
1081*7c478bd9Sstevel@tonic-gate }
1082*7c478bd9Sstevel@tonic-gate 
1083*7c478bd9Sstevel@tonic-gate /*
1084*7c478bd9Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
1085*7c478bd9Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
1086*7c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
1087*7c478bd9Sstevel@tonic-gate  */
1088*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1089*7c478bd9Sstevel@tonic-gate bufcall_id_t
1090*7c478bd9Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
1091*7c478bd9Sstevel@tonic-gate {
1092*7c478bd9Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
1093*7c478bd9Sstevel@tonic-gate 	bufcall_id_t bc_id;
1094*7c478bd9Sstevel@tonic-gate 	struct strbufcall *bcp;
1095*7c478bd9Sstevel@tonic-gate 
1096*7c478bd9Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
1097*7c478bd9Sstevel@tonic-gate 		return (0);
1098*7c478bd9Sstevel@tonic-gate 
1099*7c478bd9Sstevel@tonic-gate 	bcp->bc_func = func;
1100*7c478bd9Sstevel@tonic-gate 	bcp->bc_arg = arg;
1101*7c478bd9Sstevel@tonic-gate 	bcp->bc_size = size;
1102*7c478bd9Sstevel@tonic-gate 	bcp->bc_next = NULL;
1103*7c478bd9Sstevel@tonic-gate 	bcp->bc_executor = NULL;
1104*7c478bd9Sstevel@tonic-gate 
1105*7c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
1106*7c478bd9Sstevel@tonic-gate 	/*
1107*7c478bd9Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
1108*7c478bd9Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
1109*7c478bd9Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
1110*7c478bd9Sstevel@tonic-gate 	 * the local var.
1111*7c478bd9Sstevel@tonic-gate 	 */
1112*7c478bd9Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
1113*7c478bd9Sstevel@tonic-gate 
1114*7c478bd9Sstevel@tonic-gate 	/*
1115*7c478bd9Sstevel@tonic-gate 	 * add newly allocated stream event to existing
1116*7c478bd9Sstevel@tonic-gate 	 * linked list of events.
1117*7c478bd9Sstevel@tonic-gate 	 */
1118*7c478bd9Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
1119*7c478bd9Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
1120*7c478bd9Sstevel@tonic-gate 	} else {
1121*7c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
1122*7c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
1123*7c478bd9Sstevel@tonic-gate 	}
1124*7c478bd9Sstevel@tonic-gate 
1125*7c478bd9Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
1126*7c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
1127*7c478bd9Sstevel@tonic-gate 	return (bc_id);
1128*7c478bd9Sstevel@tonic-gate }
1129*7c478bd9Sstevel@tonic-gate 
1130*7c478bd9Sstevel@tonic-gate /*
1131*7c478bd9Sstevel@tonic-gate  * Cancel a bufcall request.
1132*7c478bd9Sstevel@tonic-gate  */
1133*7c478bd9Sstevel@tonic-gate void
1134*7c478bd9Sstevel@tonic-gate unbufcall(bufcall_id_t id)
1135*7c478bd9Sstevel@tonic-gate {
1136*7c478bd9Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
1137*7c478bd9Sstevel@tonic-gate 
1138*7c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
1139*7c478bd9Sstevel@tonic-gate again:
1140*7c478bd9Sstevel@tonic-gate 	pbcp = NULL;
1141*7c478bd9Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
1142*7c478bd9Sstevel@tonic-gate 		if (id == bcp->bc_id)
1143*7c478bd9Sstevel@tonic-gate 			break;
1144*7c478bd9Sstevel@tonic-gate 		pbcp = bcp;
1145*7c478bd9Sstevel@tonic-gate 	}
1146*7c478bd9Sstevel@tonic-gate 	if (bcp) {
1147*7c478bd9Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
1148*7c478bd9Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
1149*7c478bd9Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
1150*7c478bd9Sstevel@tonic-gate 				goto again;
1151*7c478bd9Sstevel@tonic-gate 			}
1152*7c478bd9Sstevel@tonic-gate 		} else {
1153*7c478bd9Sstevel@tonic-gate 			if (pbcp)
1154*7c478bd9Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
1155*7c478bd9Sstevel@tonic-gate 			else
1156*7c478bd9Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
1157*7c478bd9Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
1158*7c478bd9Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
1159*7c478bd9Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
1160*7c478bd9Sstevel@tonic-gate 		}
1161*7c478bd9Sstevel@tonic-gate 	}
1162*7c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
1163*7c478bd9Sstevel@tonic-gate }
1164*7c478bd9Sstevel@tonic-gate 
1165*7c478bd9Sstevel@tonic-gate /*
1166*7c478bd9Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
1167*7c478bd9Sstevel@tonic-gate  * a pointer to the duplicate message.
1168*7c478bd9Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
1169*7c478bd9Sstevel@tonic-gate  * was dup'd.
1170*7c478bd9Sstevel@tonic-gate  */
1171*7c478bd9Sstevel@tonic-gate mblk_t *
1172*7c478bd9Sstevel@tonic-gate dupmsg(mblk_t *bp)
1173*7c478bd9Sstevel@tonic-gate {
1174*7c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
1175*7c478bd9Sstevel@tonic-gate 
1176*7c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
1177*7c478bd9Sstevel@tonic-gate 		return (NULL);
1178*7c478bd9Sstevel@tonic-gate 
1179*7c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
1180*7c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
1181*7c478bd9Sstevel@tonic-gate 			freemsg(head);
1182*7c478bd9Sstevel@tonic-gate 			return (NULL);
1183*7c478bd9Sstevel@tonic-gate 		}
1184*7c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
1185*7c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
1186*7c478bd9Sstevel@tonic-gate 	}
1187*7c478bd9Sstevel@tonic-gate 	return (head);
1188*7c478bd9Sstevel@tonic-gate }
1189*7c478bd9Sstevel@tonic-gate 
1190*7c478bd9Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
1191*7c478bd9Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
1192*7c478bd9Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
1193*7c478bd9Sstevel@tonic-gate 
1194*7c478bd9Sstevel@tonic-gate mblk_t *
1195*7c478bd9Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
1196*7c478bd9Sstevel@tonic-gate {
1197*7c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
1198*7c478bd9Sstevel@tonic-gate 
1199*7c478bd9Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
1200*7c478bd9Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
1201*7c478bd9Sstevel@tonic-gate 		return (NULL);
1202*7c478bd9Sstevel@tonic-gate 
1203*7c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
1204*7c478bd9Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
1205*7c478bd9Sstevel@tonic-gate 			freemsg(head);
1206*7c478bd9Sstevel@tonic-gate 			return (NULL);
1207*7c478bd9Sstevel@tonic-gate 		}
1208*7c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
1209*7c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
1210*7c478bd9Sstevel@tonic-gate 	}
1211*7c478bd9Sstevel@tonic-gate 	return (head);
1212*7c478bd9Sstevel@tonic-gate }
1213*7c478bd9Sstevel@tonic-gate 
1214*7c478bd9Sstevel@tonic-gate /*
1215*7c478bd9Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
1216*7c478bd9Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
1217*7c478bd9Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
1218*7c478bd9Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
1219*7c478bd9Sstevel@tonic-gate  */
1220*7c478bd9Sstevel@tonic-gate mblk_t *
1221*7c478bd9Sstevel@tonic-gate copyb(mblk_t *bp)
1222*7c478bd9Sstevel@tonic-gate {
1223*7c478bd9Sstevel@tonic-gate 	mblk_t	*nbp;
1224*7c478bd9Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
1225*7c478bd9Sstevel@tonic-gate 	uchar_t *base;
1226*7c478bd9Sstevel@tonic-gate 	size_t	size;
1227*7c478bd9Sstevel@tonic-gate 	size_t	unaligned;
1228*7c478bd9Sstevel@tonic-gate 
1229*7c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
1230*7c478bd9Sstevel@tonic-gate 
1231*7c478bd9Sstevel@tonic-gate 	dp = bp->b_datap;
1232*7c478bd9Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
1233*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
1234*7c478bd9Sstevel@tonic-gate 
1235*7c478bd9Sstevel@tonic-gate 	/*
1236*7c478bd9Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
1237*7c478bd9Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
1238*7c478bd9Sstevel@tonic-gate 	 */
1239*7c478bd9Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
1240*7c478bd9Sstevel@tonic-gate 		cred_t *cr;
1241*7c478bd9Sstevel@tonic-gate 
1242*7c478bd9Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
1243*7c478bd9Sstevel@tonic-gate 			return (NULL);
1244*7c478bd9Sstevel@tonic-gate 
1245*7c478bd9Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
1246*7c478bd9Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
1247*7c478bd9Sstevel@tonic-gate 		ndp = nbp->b_datap;
1248*7c478bd9Sstevel@tonic-gate 
1249*7c478bd9Sstevel@tonic-gate 		/* See comments below on potential issues. */
1250*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
1251*7c478bd9Sstevel@tonic-gate 
1252*7c478bd9Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
1253*7c478bd9Sstevel@tonic-gate 		cr = dp->db_credp;
1254*7c478bd9Sstevel@tonic-gate 		if (cr != NULL)
1255*7c478bd9Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
1256*7c478bd9Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
1257*7c478bd9Sstevel@tonic-gate 		return (nbp);
1258*7c478bd9Sstevel@tonic-gate 	}
1259*7c478bd9Sstevel@tonic-gate 
1260*7c478bd9Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
1261*7c478bd9Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
1262*7c478bd9Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
1263*7c478bd9Sstevel@tonic-gate 		return (NULL);
1264*7c478bd9Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
1265*7c478bd9Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
1266*7c478bd9Sstevel@tonic-gate 	ndp = nbp->b_datap;
1267*7c478bd9Sstevel@tonic-gate 
1268*7c478bd9Sstevel@tonic-gate 	/*
1269*7c478bd9Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
1270*7c478bd9Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
1271*7c478bd9Sstevel@tonic-gate 	 * information about where this message might have been.
1272*7c478bd9Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
1273*7c478bd9Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
1274*7c478bd9Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
1275*7c478bd9Sstevel@tonic-gate 	 */
1276*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
1277*7c478bd9Sstevel@tonic-gate 
1278*7c478bd9Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
1279*7c478bd9Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
1280*7c478bd9Sstevel@tonic-gate 
1281*7c478bd9Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
1282*7c478bd9Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
1283*7c478bd9Sstevel@tonic-gate 
1284*7c478bd9Sstevel@tonic-gate 	return (nbp);
1285*7c478bd9Sstevel@tonic-gate }
1286*7c478bd9Sstevel@tonic-gate 
1287*7c478bd9Sstevel@tonic-gate /*
1288*7c478bd9Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
1289*7c478bd9Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
1290*7c478bd9Sstevel@tonic-gate  */
1291*7c478bd9Sstevel@tonic-gate mblk_t *
1292*7c478bd9Sstevel@tonic-gate copymsg(mblk_t *bp)
1293*7c478bd9Sstevel@tonic-gate {
1294*7c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
1295*7c478bd9Sstevel@tonic-gate 
1296*7c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
1297*7c478bd9Sstevel@tonic-gate 		return (NULL);
1298*7c478bd9Sstevel@tonic-gate 
1299*7c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
1300*7c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
1301*7c478bd9Sstevel@tonic-gate 			freemsg(head);
1302*7c478bd9Sstevel@tonic-gate 			return (NULL);
1303*7c478bd9Sstevel@tonic-gate 		}
1304*7c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
1305*7c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
1306*7c478bd9Sstevel@tonic-gate 	}
1307*7c478bd9Sstevel@tonic-gate 	return (head);
1308*7c478bd9Sstevel@tonic-gate }
1309*7c478bd9Sstevel@tonic-gate 
1310*7c478bd9Sstevel@tonic-gate /*
1311*7c478bd9Sstevel@tonic-gate  * link a message block to tail of message
1312*7c478bd9Sstevel@tonic-gate  */
1313*7c478bd9Sstevel@tonic-gate void
1314*7c478bd9Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
1315*7c478bd9Sstevel@tonic-gate {
1316*7c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
1317*7c478bd9Sstevel@tonic-gate 
1318*7c478bd9Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
1319*7c478bd9Sstevel@tonic-gate 		;
1320*7c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;
1321*7c478bd9Sstevel@tonic-gate }
1322*7c478bd9Sstevel@tonic-gate 
1323*7c478bd9Sstevel@tonic-gate /*
1324*7c478bd9Sstevel@tonic-gate  * unlink a message block from head of message
1325*7c478bd9Sstevel@tonic-gate  * return pointer to new message.
1326*7c478bd9Sstevel@tonic-gate  * NULL if message becomes empty.
1327*7c478bd9Sstevel@tonic-gate  */
1328*7c478bd9Sstevel@tonic-gate mblk_t *
1329*7c478bd9Sstevel@tonic-gate unlinkb(mblk_t *bp)
1330*7c478bd9Sstevel@tonic-gate {
1331*7c478bd9Sstevel@tonic-gate 	mblk_t *bp1;
1332*7c478bd9Sstevel@tonic-gate 
1333*7c478bd9Sstevel@tonic-gate 	bp1 = bp->b_cont;
1334*7c478bd9Sstevel@tonic-gate 	bp->b_cont = NULL;
1335*7c478bd9Sstevel@tonic-gate 	return (bp1);
1336*7c478bd9Sstevel@tonic-gate }
1337*7c478bd9Sstevel@tonic-gate 
1338*7c478bd9Sstevel@tonic-gate /*
1339*7c478bd9Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
1340*7c478bd9Sstevel@tonic-gate  *
1341*7c478bd9Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
1342*7c478bd9Sstevel@tonic-gate  * Return -1 if bp is not found in message.
1343*7c478bd9Sstevel@tonic-gate  */
1344*7c478bd9Sstevel@tonic-gate mblk_t *
1345*7c478bd9Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
1346*7c478bd9Sstevel@tonic-gate {
1347*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
1348*7c478bd9Sstevel@tonic-gate 	mblk_t *lastp = NULL;
1349*7c478bd9Sstevel@tonic-gate 
1350*7c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
1351*7c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1352*7c478bd9Sstevel@tonic-gate 		if (tmp == bp) {
1353*7c478bd9Sstevel@tonic-gate 			if (lastp)
1354*7c478bd9Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
1355*7c478bd9Sstevel@tonic-gate 			else
1356*7c478bd9Sstevel@tonic-gate 				mp = tmp->b_cont;
1357*7c478bd9Sstevel@tonic-gate 			tmp->b_cont = NULL;
1358*7c478bd9Sstevel@tonic-gate 			return (mp);
1359*7c478bd9Sstevel@tonic-gate 		}
1360*7c478bd9Sstevel@tonic-gate 		lastp = tmp;
1361*7c478bd9Sstevel@tonic-gate 	}
1362*7c478bd9Sstevel@tonic-gate 	return ((mblk_t *)-1);
1363*7c478bd9Sstevel@tonic-gate }
1364*7c478bd9Sstevel@tonic-gate 
1365*7c478bd9Sstevel@tonic-gate /*
1366*7c478bd9Sstevel@tonic-gate  * Concatenate and align first len bytes of common
1367*7c478bd9Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
1368*7c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
1369*7c478bd9Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
1370*7c478bd9Sstevel@tonic-gate  */
1371*7c478bd9Sstevel@tonic-gate int
1372*7c478bd9Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
1373*7c478bd9Sstevel@tonic-gate {
1374*7c478bd9Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
1375*7c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
1376*7c478bd9Sstevel@tonic-gate 	ssize_t n;
1377*7c478bd9Sstevel@tonic-gate 
1378*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
1379*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
1380*7c478bd9Sstevel@tonic-gate 
1381*7c478bd9Sstevel@tonic-gate 	/*
1382*7c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1383*7c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1384*7c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1385*7c478bd9Sstevel@tonic-gate 	 */
1386*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1387*7c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1388*7c478bd9Sstevel@tonic-gate 		return (0);
1389*7c478bd9Sstevel@tonic-gate 
1390*7c478bd9Sstevel@tonic-gate 	if (len == -1) {
1391*7c478bd9Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
1392*7c478bd9Sstevel@tonic-gate 			return (1);
1393*7c478bd9Sstevel@tonic-gate 		len = xmsgsize(mp);
1394*7c478bd9Sstevel@tonic-gate 	} else {
1395*7c478bd9Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
1396*7c478bd9Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
1397*7c478bd9Sstevel@tonic-gate 		/*
1398*7c478bd9Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
1399*7c478bd9Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
1400*7c478bd9Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
1401*7c478bd9Sstevel@tonic-gate 		 */
1402*7c478bd9Sstevel@tonic-gate 		if (len <= first_mblk_len) {
1403*7c478bd9Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
1404*7c478bd9Sstevel@tonic-gate 				return (1);
1405*7c478bd9Sstevel@tonic-gate 			len = first_mblk_len;
1406*7c478bd9Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
1407*7c478bd9Sstevel@tonic-gate 			return (0);
1408*7c478bd9Sstevel@tonic-gate 	}
1409*7c478bd9Sstevel@tonic-gate 
1410*7c478bd9Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
1411*7c478bd9Sstevel@tonic-gate 		return (0);
1412*7c478bd9Sstevel@tonic-gate 
1413*7c478bd9Sstevel@tonic-gate 	dbp = bp->b_datap;
1414*7c478bd9Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
1415*7c478bd9Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
1416*7c478bd9Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
1417*7c478bd9Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
1418*7c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
1419*7c478bd9Sstevel@tonic-gate 
1420*7c478bd9Sstevel@tonic-gate 	do {
1421*7c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
1422*7c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
1423*7c478bd9Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
1424*7c478bd9Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
1425*7c478bd9Sstevel@tonic-gate 		mp->b_wptr += n;
1426*7c478bd9Sstevel@tonic-gate 		bp->b_rptr += n;
1427*7c478bd9Sstevel@tonic-gate 		len -= n;
1428*7c478bd9Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
1429*7c478bd9Sstevel@tonic-gate 			break;
1430*7c478bd9Sstevel@tonic-gate 		b_cont = bp->b_cont;
1431*7c478bd9Sstevel@tonic-gate 		freeb(bp);
1432*7c478bd9Sstevel@tonic-gate 		bp = b_cont;
1433*7c478bd9Sstevel@tonic-gate 	} while (len && bp);
1434*7c478bd9Sstevel@tonic-gate 
1435*7c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
1436*7c478bd9Sstevel@tonic-gate 
1437*7c478bd9Sstevel@tonic-gate 	return (1);
1438*7c478bd9Sstevel@tonic-gate }
1439*7c478bd9Sstevel@tonic-gate 
1440*7c478bd9Sstevel@tonic-gate /*
1441*7c478bd9Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
1442*7c478bd9Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
1443*7c478bd9Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
1444*7c478bd9Sstevel@tonic-gate  * returns NULL.
1445*7c478bd9Sstevel@tonic-gate  */
1446*7c478bd9Sstevel@tonic-gate mblk_t *
1447*7c478bd9Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
1448*7c478bd9Sstevel@tonic-gate {
1449*7c478bd9Sstevel@tonic-gate 	mblk_t	*newmp;
1450*7c478bd9Sstevel@tonic-gate 	ssize_t	totlen;
1451*7c478bd9Sstevel@tonic-gate 	ssize_t	n;
1452*7c478bd9Sstevel@tonic-gate 
1453*7c478bd9Sstevel@tonic-gate 	/*
1454*7c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1455*7c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1456*7c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1457*7c478bd9Sstevel@tonic-gate 	 */
1458*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1459*7c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1460*7c478bd9Sstevel@tonic-gate 		return (NULL);
1461*7c478bd9Sstevel@tonic-gate 
1462*7c478bd9Sstevel@tonic-gate 	totlen = xmsgsize(mp);
1463*7c478bd9Sstevel@tonic-gate 
1464*7c478bd9Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
1465*7c478bd9Sstevel@tonic-gate 		return (NULL);
1466*7c478bd9Sstevel@tonic-gate 
1467*7c478bd9Sstevel@tonic-gate 	/*
1468*7c478bd9Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
1469*7c478bd9Sstevel@tonic-gate 	 * and link the rest onto this.
1470*7c478bd9Sstevel@tonic-gate 	 */
1471*7c478bd9Sstevel@tonic-gate 
1472*7c478bd9Sstevel@tonic-gate 	len = totlen;
1473*7c478bd9Sstevel@tonic-gate 
1474*7c478bd9Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
1475*7c478bd9Sstevel@tonic-gate 		return (NULL);
1476*7c478bd9Sstevel@tonic-gate 
1477*7c478bd9Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
1478*7c478bd9Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
1479*7c478bd9Sstevel@tonic-gate 
1480*7c478bd9Sstevel@tonic-gate 	while (len > 0) {
1481*7c478bd9Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
1482*7c478bd9Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
1483*7c478bd9Sstevel@tonic-gate 		if (n > 0)
1484*7c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
1485*7c478bd9Sstevel@tonic-gate 		newmp->b_wptr += n;
1486*7c478bd9Sstevel@tonic-gate 		len -= n;
1487*7c478bd9Sstevel@tonic-gate 		mp = mp->b_cont;
1488*7c478bd9Sstevel@tonic-gate 	}
1489*7c478bd9Sstevel@tonic-gate 
1490*7c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
1491*7c478bd9Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
1492*7c478bd9Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
1493*7c478bd9Sstevel@tonic-gate 			freemsg(newmp);
1494*7c478bd9Sstevel@tonic-gate 			return (NULL);
1495*7c478bd9Sstevel@tonic-gate 		}
1496*7c478bd9Sstevel@tonic-gate 	}
1497*7c478bd9Sstevel@tonic-gate 
1498*7c478bd9Sstevel@tonic-gate 	return (newmp);
1499*7c478bd9Sstevel@tonic-gate }
1500*7c478bd9Sstevel@tonic-gate 
1501*7c478bd9Sstevel@tonic-gate /*
1502*7c478bd9Sstevel@tonic-gate  * Trim bytes from message
1503*7c478bd9Sstevel@tonic-gate  *  len > 0, trim from head
1504*7c478bd9Sstevel@tonic-gate  *  len < 0, trim from tail
1505*7c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
1506*7c478bd9Sstevel@tonic-gate  */
1507*7c478bd9Sstevel@tonic-gate int
1508*7c478bd9Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
1509*7c478bd9Sstevel@tonic-gate {
1510*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1511*7c478bd9Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
1512*7c478bd9Sstevel@tonic-gate 	mblk_t *prev_bp;
1513*7c478bd9Sstevel@tonic-gate 	mblk_t *bcont;
1514*7c478bd9Sstevel@tonic-gate 	unsigned char type;
1515*7c478bd9Sstevel@tonic-gate 	ssize_t n;
1516*7c478bd9Sstevel@tonic-gate 	int fromhead;
1517*7c478bd9Sstevel@tonic-gate 	int first;
1518*7c478bd9Sstevel@tonic-gate 
1519*7c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
1520*7c478bd9Sstevel@tonic-gate 	/*
1521*7c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
1522*7c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
1523*7c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
1524*7c478bd9Sstevel@tonic-gate 	 */
1525*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
1526*7c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
1527*7c478bd9Sstevel@tonic-gate 		return (0);
1528*7c478bd9Sstevel@tonic-gate 
1529*7c478bd9Sstevel@tonic-gate 	if (len < 0) {
1530*7c478bd9Sstevel@tonic-gate 		fromhead = 0;
1531*7c478bd9Sstevel@tonic-gate 		len = -len;
1532*7c478bd9Sstevel@tonic-gate 	} else {
1533*7c478bd9Sstevel@tonic-gate 		fromhead = 1;
1534*7c478bd9Sstevel@tonic-gate 	}
1535*7c478bd9Sstevel@tonic-gate 
1536*7c478bd9Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
1537*7c478bd9Sstevel@tonic-gate 		return (0);
1538*7c478bd9Sstevel@tonic-gate 
1539*7c478bd9Sstevel@tonic-gate 
1540*7c478bd9Sstevel@tonic-gate 	if (fromhead) {
1541*7c478bd9Sstevel@tonic-gate 		first = 1;
1542*7c478bd9Sstevel@tonic-gate 		while (len) {
1543*7c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
1544*7c478bd9Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
1545*7c478bd9Sstevel@tonic-gate 			mp->b_rptr += n;
1546*7c478bd9Sstevel@tonic-gate 			len -= n;
1547*7c478bd9Sstevel@tonic-gate 
1548*7c478bd9Sstevel@tonic-gate 			/*
1549*7c478bd9Sstevel@tonic-gate 			 * If this is not the first zero length
1550*7c478bd9Sstevel@tonic-gate 			 * message remove it
1551*7c478bd9Sstevel@tonic-gate 			 */
1552*7c478bd9Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
1553*7c478bd9Sstevel@tonic-gate 				bcont = mp->b_cont;
1554*7c478bd9Sstevel@tonic-gate 				freeb(mp);
1555*7c478bd9Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
1556*7c478bd9Sstevel@tonic-gate 			} else {
1557*7c478bd9Sstevel@tonic-gate 				save_bp = mp;
1558*7c478bd9Sstevel@tonic-gate 				mp = mp->b_cont;
1559*7c478bd9Sstevel@tonic-gate 			}
1560*7c478bd9Sstevel@tonic-gate 			first = 0;
1561*7c478bd9Sstevel@tonic-gate 		}
1562*7c478bd9Sstevel@tonic-gate 	} else {
1563*7c478bd9Sstevel@tonic-gate 		type = mp->b_datap->db_type;
1564*7c478bd9Sstevel@tonic-gate 		while (len) {
1565*7c478bd9Sstevel@tonic-gate 			bp = mp;
1566*7c478bd9Sstevel@tonic-gate 			save_bp = NULL;
1567*7c478bd9Sstevel@tonic-gate 
1568*7c478bd9Sstevel@tonic-gate 			/*
1569*7c478bd9Sstevel@tonic-gate 			 * Find the last message of same type
1570*7c478bd9Sstevel@tonic-gate 			 */
1571*7c478bd9Sstevel@tonic-gate 
1572*7c478bd9Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
1573*7c478bd9Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
1574*7c478bd9Sstevel@tonic-gate 				prev_bp = save_bp;
1575*7c478bd9Sstevel@tonic-gate 				save_bp = bp;
1576*7c478bd9Sstevel@tonic-gate 				bp = bp->b_cont;
1577*7c478bd9Sstevel@tonic-gate 			}
1578*7c478bd9Sstevel@tonic-gate 			if (save_bp == NULL)
1579*7c478bd9Sstevel@tonic-gate 				break;
1580*7c478bd9Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
1581*7c478bd9Sstevel@tonic-gate 			save_bp->b_wptr -= n;
1582*7c478bd9Sstevel@tonic-gate 			len -= n;
1583*7c478bd9Sstevel@tonic-gate 
1584*7c478bd9Sstevel@tonic-gate 			/*
1585*7c478bd9Sstevel@tonic-gate 			 * If this is not the first message
1586*7c478bd9Sstevel@tonic-gate 			 * and we have taken away everything
1587*7c478bd9Sstevel@tonic-gate 			 * from this message, remove it
1588*7c478bd9Sstevel@tonic-gate 			 */
1589*7c478bd9Sstevel@tonic-gate 
1590*7c478bd9Sstevel@tonic-gate 			if ((save_bp != mp) &&
1591*7c478bd9Sstevel@tonic-gate 				(save_bp->b_wptr == save_bp->b_rptr)) {
1592*7c478bd9Sstevel@tonic-gate 				bcont = save_bp->b_cont;
1593*7c478bd9Sstevel@tonic-gate 				freeb(save_bp);
1594*7c478bd9Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
1595*7c478bd9Sstevel@tonic-gate 			}
1596*7c478bd9Sstevel@tonic-gate 		}
1597*7c478bd9Sstevel@tonic-gate 	}
1598*7c478bd9Sstevel@tonic-gate 	return (1);
1599*7c478bd9Sstevel@tonic-gate }
1600*7c478bd9Sstevel@tonic-gate 
1601*7c478bd9Sstevel@tonic-gate /*
1602*7c478bd9Sstevel@tonic-gate  * get number of data bytes in message
1603*7c478bd9Sstevel@tonic-gate  */
1604*7c478bd9Sstevel@tonic-gate size_t
1605*7c478bd9Sstevel@tonic-gate msgdsize(mblk_t *bp)
1606*7c478bd9Sstevel@tonic-gate {
1607*7c478bd9Sstevel@tonic-gate 	size_t count = 0;
1608*7c478bd9Sstevel@tonic-gate 
1609*7c478bd9Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
1610*7c478bd9Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
1611*7c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
1612*7c478bd9Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
1613*7c478bd9Sstevel@tonic-gate 		}
1614*7c478bd9Sstevel@tonic-gate 	return (count);
1615*7c478bd9Sstevel@tonic-gate }
1616*7c478bd9Sstevel@tonic-gate 
1617*7c478bd9Sstevel@tonic-gate /*
1618*7c478bd9Sstevel@tonic-gate  * Get a message off head of queue
1619*7c478bd9Sstevel@tonic-gate  *
1620*7c478bd9Sstevel@tonic-gate  * If queue has no buffers then mark queue
1621*7c478bd9Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
1622*7c478bd9Sstevel@tonic-gate  * someone when data becomes available)
1623*7c478bd9Sstevel@tonic-gate  *
1624*7c478bd9Sstevel@tonic-gate  * If there is something to take off then do so.
1625*7c478bd9Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
1626*7c478bd9Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
1627*7c478bd9Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
1628*7c478bd9Sstevel@tonic-gate  *
1629*7c478bd9Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
1630*7c478bd9Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
1631*7c478bd9Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
1632*7c478bd9Sstevel@tonic-gate  * fields in their respective qband structures
1633*7c478bd9Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
1634*7c478bd9Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
1635*7c478bd9Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
1636*7c478bd9Sstevel@tonic-gate  * not reflect the size of all the messages on the
1637*7c478bd9Sstevel@tonic-gate  * queue.  It only reflects those messages in the
1638*7c478bd9Sstevel@tonic-gate  * normal band of flow.  The one exception to this
1639*7c478bd9Sstevel@tonic-gate  * deals with high priority messages.  They are in
1640*7c478bd9Sstevel@tonic-gate  * their own conceptual "band", but are accounted
1641*7c478bd9Sstevel@tonic-gate  * against q_count.
1642*7c478bd9Sstevel@tonic-gate  *
1643*7c478bd9Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
1644*7c478bd9Sstevel@tonic-gate  * is set, enable the closest backq which has a service
1645*7c478bd9Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
1646*7c478bd9Sstevel@tonic-gate  *
1647*7c478bd9Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
1648*7c478bd9Sstevel@tonic-gate  * of performance considerations.
1649*7c478bd9Sstevel@tonic-gate  *
1650*7c478bd9Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
1651*7c478bd9Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
1652*7c478bd9Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
1653*7c478bd9Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
1654*7c478bd9Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
1655*7c478bd9Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
1656*7c478bd9Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
1657*7c478bd9Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
1658*7c478bd9Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
1659*7c478bd9Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
1660*7c478bd9Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
1661*7c478bd9Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
1662*7c478bd9Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
1663*7c478bd9Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
1664*7c478bd9Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
1665*7c478bd9Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
1666*7c478bd9Sstevel@tonic-gate  *   mark.
1667*7c478bd9Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
1668*7c478bd9Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
1669*7c478bd9Sstevel@tonic-gate  *   framework can still try and limit resource usage.
1670*7c478bd9Sstevel@tonic-gate  */
1671*7c478bd9Sstevel@tonic-gate mblk_t *
1672*7c478bd9Sstevel@tonic-gate getq(queue_t *q)
1673*7c478bd9Sstevel@tonic-gate {
1674*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1675*7c478bd9Sstevel@tonic-gate 	int band = 0;
1676*7c478bd9Sstevel@tonic-gate 
1677*7c478bd9Sstevel@tonic-gate 	bp = getq_noenab(q);
1678*7c478bd9Sstevel@tonic-gate 	if (bp != NULL)
1679*7c478bd9Sstevel@tonic-gate 		band = bp->b_band;
1680*7c478bd9Sstevel@tonic-gate 
1681*7c478bd9Sstevel@tonic-gate 	/*
1682*7c478bd9Sstevel@tonic-gate 	 * Inlined from qbackenable().
1683*7c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
1684*7c478bd9Sstevel@tonic-gate 	 */
1685*7c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
1686*7c478bd9Sstevel@tonic-gate 		return (bp);
1687*7c478bd9Sstevel@tonic-gate 
1688*7c478bd9Sstevel@tonic-gate 	qbackenable(q, band);
1689*7c478bd9Sstevel@tonic-gate 	return (bp);
1690*7c478bd9Sstevel@tonic-gate }
1691*7c478bd9Sstevel@tonic-gate 
1692*7c478bd9Sstevel@tonic-gate /*
1693*7c478bd9Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
1694*7c478bd9Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
1695*7c478bd9Sstevel@tonic-gate  * after it is done with accessing the queue.
1696*7c478bd9Sstevel@tonic-gate  */
1697*7c478bd9Sstevel@tonic-gate mblk_t *
1698*7c478bd9Sstevel@tonic-gate getq_noenab(queue_t *q)
1699*7c478bd9Sstevel@tonic-gate {
1700*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1701*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
1702*7c478bd9Sstevel@tonic-gate 	qband_t *qbp;
1703*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
1704*7c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
1705*7c478bd9Sstevel@tonic-gate 
1706*7c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
1707*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1708*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
1709*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1710*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1711*7c478bd9Sstevel@tonic-gate 	} else
1712*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1713*7c478bd9Sstevel@tonic-gate 
1714*7c478bd9Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
1715*7c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTR;
1716*7c478bd9Sstevel@tonic-gate 	} else {
1717*7c478bd9Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
1718*7c478bd9Sstevel@tonic-gate 			q->q_last = NULL;
1719*7c478bd9Sstevel@tonic-gate 		else
1720*7c478bd9Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
1721*7c478bd9Sstevel@tonic-gate 
1722*7c478bd9Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
1723*7c478bd9Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
1724*7c478bd9Sstevel@tonic-gate 			bytecnt += (tmp->b_wptr - tmp->b_rptr);
1725*7c478bd9Sstevel@tonic-gate 			mblkcnt++;
1726*7c478bd9Sstevel@tonic-gate 		}
1727*7c478bd9Sstevel@tonic-gate 
1728*7c478bd9Sstevel@tonic-gate 		if (bp->b_band == 0) {
1729*7c478bd9Sstevel@tonic-gate 			q->q_count -= bytecnt;
1730*7c478bd9Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
1731*7c478bd9Sstevel@tonic-gate 			if ((q->q_count < q->q_hiwat) &&
1732*7c478bd9Sstevel@tonic-gate 			    (q->q_mblkcnt < q->q_hiwat)) {
1733*7c478bd9Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
1734*7c478bd9Sstevel@tonic-gate 			}
1735*7c478bd9Sstevel@tonic-gate 		} else {
1736*7c478bd9Sstevel@tonic-gate 			int i;
1737*7c478bd9Sstevel@tonic-gate 
1738*7c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
1739*7c478bd9Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
1740*7c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
1741*7c478bd9Sstevel@tonic-gate 			qbp = q->q_bandp;
1742*7c478bd9Sstevel@tonic-gate 			i = bp->b_band;
1743*7c478bd9Sstevel@tonic-gate 			while (--i > 0)
1744*7c478bd9Sstevel@tonic-gate 				qbp = qbp->qb_next;
1745*7c478bd9Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
1746*7c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
1747*7c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
1748*7c478bd9Sstevel@tonic-gate 			} else {
1749*7c478bd9Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
1750*7c478bd9Sstevel@tonic-gate 			}
1751*7c478bd9Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
1752*7c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
1753*7c478bd9Sstevel@tonic-gate 			if ((qbp->qb_count < qbp->qb_hiwat) &&
1754*7c478bd9Sstevel@tonic-gate 			    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
1755*7c478bd9Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
1756*7c478bd9Sstevel@tonic-gate 			}
1757*7c478bd9Sstevel@tonic-gate 		}
1758*7c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
1759*7c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
1760*7c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
1761*7c478bd9Sstevel@tonic-gate 	}
1762*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
1763*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1764*7c478bd9Sstevel@tonic-gate 
1765*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
1766*7c478bd9Sstevel@tonic-gate 
1767*7c478bd9Sstevel@tonic-gate 	return (bp);
1768*7c478bd9Sstevel@tonic-gate }
1769*7c478bd9Sstevel@tonic-gate 
1770*7c478bd9Sstevel@tonic-gate /*
1771*7c478bd9Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
1772*7c478bd9Sstevel@tonic-gate  * specified band.
1773*7c478bd9Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
1774*7c478bd9Sstevel@tonic-gate  * already called.
1775*7c478bd9Sstevel@tonic-gate  *
1776*7c478bd9Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
1777*7c478bd9Sstevel@tonic-gate  * stream head often does).
1778*7c478bd9Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
1779*7c478bd9Sstevel@tonic-gate  */
1780*7c478bd9Sstevel@tonic-gate void
1781*7c478bd9Sstevel@tonic-gate qbackenable(queue_t *q, int band)
1782*7c478bd9Sstevel@tonic-gate {
1783*7c478bd9Sstevel@tonic-gate 	int backenab = 0;
1784*7c478bd9Sstevel@tonic-gate 	qband_t *qbp;
1785*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
1786*7c478bd9Sstevel@tonic-gate 
1787*7c478bd9Sstevel@tonic-gate 	ASSERT(q);
1788*7c478bd9Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
1789*7c478bd9Sstevel@tonic-gate 
1790*7c478bd9Sstevel@tonic-gate 	/*
1791*7c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
1792*7c478bd9Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
1793*7c478bd9Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
1794*7c478bd9Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
1795*7c478bd9Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
1796*7c478bd9Sstevel@tonic-gate 	 * drops below q_lowat.
1797*7c478bd9Sstevel@tonic-gate 	 */
1798*7c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
1799*7c478bd9Sstevel@tonic-gate 		return;
1800*7c478bd9Sstevel@tonic-gate 
1801*7c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
1802*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1803*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
1804*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1805*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1806*7c478bd9Sstevel@tonic-gate 	} else
1807*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1808*7c478bd9Sstevel@tonic-gate 
1809*7c478bd9Sstevel@tonic-gate 	if (band == 0) {
1810*7c478bd9Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
1811*7c478bd9Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
1812*7c478bd9Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
1813*7c478bd9Sstevel@tonic-gate 		}
1814*7c478bd9Sstevel@tonic-gate 	} else {
1815*7c478bd9Sstevel@tonic-gate 		int i;
1816*7c478bd9Sstevel@tonic-gate 
1817*7c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
1818*7c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
1819*7c478bd9Sstevel@tonic-gate 
1820*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
1821*7c478bd9Sstevel@tonic-gate 		i = band;
1822*7c478bd9Sstevel@tonic-gate 		while (--i > 0)
1823*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
1824*7c478bd9Sstevel@tonic-gate 
1825*7c478bd9Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
1826*7c478bd9Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
1827*7c478bd9Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
1828*7c478bd9Sstevel@tonic-gate 		}
1829*7c478bd9Sstevel@tonic-gate 	}
1830*7c478bd9Sstevel@tonic-gate 
1831*7c478bd9Sstevel@tonic-gate 	if (backenab == 0) {
1832*7c478bd9Sstevel@tonic-gate 		if (freezer != curthread)
1833*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
1834*7c478bd9Sstevel@tonic-gate 		return;
1835*7c478bd9Sstevel@tonic-gate 	}
1836*7c478bd9Sstevel@tonic-gate 
1837*7c478bd9Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
1838*7c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
1839*7c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
1840*7c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
1841*7c478bd9Sstevel@tonic-gate 		if (band != 0)
1842*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
1843*7c478bd9Sstevel@tonic-gate 		else {
1844*7c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
1845*7c478bd9Sstevel@tonic-gate 		}
1846*7c478bd9Sstevel@tonic-gate 	}
1847*7c478bd9Sstevel@tonic-gate 
1848*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
1849*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1850*7c478bd9Sstevel@tonic-gate 
1851*7c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
1852*7c478bd9Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
1853*7c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
1854*7c478bd9Sstevel@tonic-gate 		backenable(q, band);
1855*7c478bd9Sstevel@tonic-gate }
1856*7c478bd9Sstevel@tonic-gate 
1857*7c478bd9Sstevel@tonic-gate /*
1858*7c478bd9Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
1859*7c478bd9Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
1860*7c478bd9Sstevel@tonic-gate  * enabled if necessary.
1861*7c478bd9Sstevel@tonic-gate  *
1862*7c478bd9Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
1863*7c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
1864*7c478bd9Sstevel@tonic-gate  */
1865*7c478bd9Sstevel@tonic-gate void
1866*7c478bd9Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
1867*7c478bd9Sstevel@tonic-gate {
1868*7c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
1869*7c478bd9Sstevel@tonic-gate 
1870*7c478bd9Sstevel@tonic-gate 	rmvq_noenab(q, mp);
1871*7c478bd9Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
1872*7c478bd9Sstevel@tonic-gate 		/*
1873*7c478bd9Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
1874*7c478bd9Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
1875*7c478bd9Sstevel@tonic-gate 		 */
1876*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1877*7c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
1878*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1879*7c478bd9Sstevel@tonic-gate 	} else {
1880*7c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
1881*7c478bd9Sstevel@tonic-gate 	}
1882*7c478bd9Sstevel@tonic-gate }
1883*7c478bd9Sstevel@tonic-gate 
1884*7c478bd9Sstevel@tonic-gate /*
1885*7c478bd9Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
1886*7c478bd9Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
1887*7c478bd9Sstevel@tonic-gate  */
1888*7c478bd9Sstevel@tonic-gate void
1889*7c478bd9Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
1890*7c478bd9Sstevel@tonic-gate {
1891*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
1892*7c478bd9Sstevel@tonic-gate 	int i;
1893*7c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
1894*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
1895*7c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
1896*7c478bd9Sstevel@tonic-gate 
1897*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
1898*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
1899*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
1900*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
1901*7c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
1902*7c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
1903*7c478bd9Sstevel@tonic-gate 		freezer = curthread;
1904*7c478bd9Sstevel@tonic-gate 	} else
1905*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
1906*7c478bd9Sstevel@tonic-gate 
1907*7c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
1908*7c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
1909*7c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
1910*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
1911*7c478bd9Sstevel@tonic-gate 		i = mp->b_band;
1912*7c478bd9Sstevel@tonic-gate 		while (--i > 0)
1913*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
1914*7c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
1915*7c478bd9Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
1916*7c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
1917*7c478bd9Sstevel@tonic-gate 			else
1918*7c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
1919*7c478bd9Sstevel@tonic-gate 		}
1920*7c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
1921*7c478bd9Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
1922*7c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
1923*7c478bd9Sstevel@tonic-gate 			else
1924*7c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
1925*7c478bd9Sstevel@tonic-gate 		}
1926*7c478bd9Sstevel@tonic-gate 	}
1927*7c478bd9Sstevel@tonic-gate 
1928*7c478bd9Sstevel@tonic-gate 	/*
1929*7c478bd9Sstevel@tonic-gate 	 * Remove the message from the list.
1930*7c478bd9Sstevel@tonic-gate 	 */
1931*7c478bd9Sstevel@tonic-gate 	if (mp->b_prev)
1932*7c478bd9Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
1933*7c478bd9Sstevel@tonic-gate 	else
1934*7c478bd9Sstevel@tonic-gate 		q->q_first = mp->b_next;
1935*7c478bd9Sstevel@tonic-gate 	if (mp->b_next)
1936*7c478bd9Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
1937*7c478bd9Sstevel@tonic-gate 	else
1938*7c478bd9Sstevel@tonic-gate 		q->q_last = mp->b_prev;
1939*7c478bd9Sstevel@tonic-gate 	mp->b_next = NULL;
1940*7c478bd9Sstevel@tonic-gate 	mp->b_prev = NULL;
1941*7c478bd9Sstevel@tonic-gate 
1942*7c478bd9Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
1943*7c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1944*7c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
1945*7c478bd9Sstevel@tonic-gate 		mblkcnt++;
1946*7c478bd9Sstevel@tonic-gate 	}
1947*7c478bd9Sstevel@tonic-gate 
1948*7c478bd9Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
1949*7c478bd9Sstevel@tonic-gate 		q->q_count -= bytecnt;
1950*7c478bd9Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
1951*7c478bd9Sstevel@tonic-gate 		if ((q->q_count < q->q_hiwat) &&
1952*7c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_hiwat)) {
1953*7c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
1954*7c478bd9Sstevel@tonic-gate 		}
1955*7c478bd9Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
1956*7c478bd9Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
1957*7c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
1958*7c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count < qbp->qb_hiwat) &&
1959*7c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
1960*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
1961*7c478bd9Sstevel@tonic-gate 		}
1962*7c478bd9Sstevel@tonic-gate 	}
1963*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
1964*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
1965*7c478bd9Sstevel@tonic-gate 
1966*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
1967*7c478bd9Sstevel@tonic-gate }
1968*7c478bd9Sstevel@tonic-gate 
1969*7c478bd9Sstevel@tonic-gate /*
1970*7c478bd9Sstevel@tonic-gate  * Empty a queue.
1971*7c478bd9Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
1972*7c478bd9Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
1973*7c478bd9Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
1974*7c478bd9Sstevel@tonic-gate  * service procedure.
1975*7c478bd9Sstevel@tonic-gate  *
1976*7c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
1977*7c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
1978*7c478bd9Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
1979*7c478bd9Sstevel@tonic-gate  *
1980*7c478bd9Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
1981*7c478bd9Sstevel@tonic-gate  * if one exists on the queue.
1982*7c478bd9Sstevel@tonic-gate  */
1983*7c478bd9Sstevel@tonic-gate void
1984*7c478bd9Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
1985*7c478bd9Sstevel@tonic-gate {
1986*7c478bd9Sstevel@tonic-gate 	mblk_t *mp, *nmp;
1987*7c478bd9Sstevel@tonic-gate 	qband_t *qbp;
1988*7c478bd9Sstevel@tonic-gate 	int backenab = 0;
1989*7c478bd9Sstevel@tonic-gate 	unsigned char bpri;
1990*7c478bd9Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
1991*7c478bd9Sstevel@tonic-gate 
1992*7c478bd9Sstevel@tonic-gate 	if (q->q_first == NULL)
1993*7c478bd9Sstevel@tonic-gate 		return;
1994*7c478bd9Sstevel@tonic-gate 
1995*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
1996*7c478bd9Sstevel@tonic-gate 	mp = q->q_first;
1997*7c478bd9Sstevel@tonic-gate 	q->q_first = NULL;
1998*7c478bd9Sstevel@tonic-gate 	q->q_last = NULL;
1999*7c478bd9Sstevel@tonic-gate 	q->q_count = 0;
2000*7c478bd9Sstevel@tonic-gate 	q->q_mblkcnt = 0;
2001*7c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2002*7c478bd9Sstevel@tonic-gate 		qbp->qb_first = NULL;
2003*7c478bd9Sstevel@tonic-gate 		qbp->qb_last = NULL;
2004*7c478bd9Sstevel@tonic-gate 		qbp->qb_count = 0;
2005*7c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
2006*7c478bd9Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
2007*7c478bd9Sstevel@tonic-gate 	}
2008*7c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
2009*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2010*7c478bd9Sstevel@tonic-gate 	while (mp) {
2011*7c478bd9Sstevel@tonic-gate 		nmp = mp->b_next;
2012*7c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
2013*7c478bd9Sstevel@tonic-gate 
2014*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
2015*7c478bd9Sstevel@tonic-gate 
2016*7c478bd9Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
2017*7c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
2018*7c478bd9Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
2019*7c478bd9Sstevel@tonic-gate 			freemsg(mp);
2020*7c478bd9Sstevel@tonic-gate 		else
2021*7c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
2022*7c478bd9Sstevel@tonic-gate 		mp = nmp;
2023*7c478bd9Sstevel@tonic-gate 	}
2024*7c478bd9Sstevel@tonic-gate 	bpri = 1;
2025*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2026*7c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2027*7c478bd9Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
2028*7c478bd9Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
2029*7c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
2030*7c478bd9Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
2031*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
2032*7c478bd9Sstevel@tonic-gate 			backenab = 1;
2033*7c478bd9Sstevel@tonic-gate 			qbf[bpri] = 1;
2034*7c478bd9Sstevel@tonic-gate 		} else
2035*7c478bd9Sstevel@tonic-gate 			qbf[bpri] = 0;
2036*7c478bd9Sstevel@tonic-gate 		bpri++;
2037*7c478bd9Sstevel@tonic-gate 	}
2038*7c478bd9Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
2039*7c478bd9Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
2040*7c478bd9Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
2041*7c478bd9Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
2042*7c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
2043*7c478bd9Sstevel@tonic-gate 		backenab = 1;
2044*7c478bd9Sstevel@tonic-gate 		qbf[0] = 1;
2045*7c478bd9Sstevel@tonic-gate 	} else
2046*7c478bd9Sstevel@tonic-gate 		qbf[0] = 0;
2047*7c478bd9Sstevel@tonic-gate 
2048*7c478bd9Sstevel@tonic-gate 	/*
2049*7c478bd9Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
2050*7c478bd9Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
2051*7c478bd9Sstevel@tonic-gate 	 */
2052*7c478bd9Sstevel@tonic-gate 	if (backenab) {
2053*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2054*7c478bd9Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
2055*7c478bd9Sstevel@tonic-gate 			if (qbf[bpri])
2056*7c478bd9Sstevel@tonic-gate 				backenable(q, (int)bpri);
2057*7c478bd9Sstevel@tonic-gate 		if (qbf[0])
2058*7c478bd9Sstevel@tonic-gate 			backenable(q, 0);
2059*7c478bd9Sstevel@tonic-gate 	} else
2060*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2061*7c478bd9Sstevel@tonic-gate }
2062*7c478bd9Sstevel@tonic-gate 
2063*7c478bd9Sstevel@tonic-gate /*
2064*7c478bd9Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
2065*7c478bd9Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
2066*7c478bd9Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
2067*7c478bd9Sstevel@tonic-gate  */
2068*7c478bd9Sstevel@tonic-gate void
2069*7c478bd9Sstevel@tonic-gate flushq(queue_t *q, int flag)
2070*7c478bd9Sstevel@tonic-gate {
2071*7c478bd9Sstevel@tonic-gate 	flushq_common(q, flag, 0);
2072*7c478bd9Sstevel@tonic-gate }
2073*7c478bd9Sstevel@tonic-gate 
2074*7c478bd9Sstevel@tonic-gate /*
2075*7c478bd9Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
2076*7c478bd9Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
2077*7c478bd9Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
2078*7c478bd9Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
2079*7c478bd9Sstevel@tonic-gate  * (priority 0) band than in any other.
2080*7c478bd9Sstevel@tonic-gate  *
2081*7c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
2082*7c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
2083*7c478bd9Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
2084*7c478bd9Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
2085*7c478bd9Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
2086*7c478bd9Sstevel@tonic-gate  */
2087*7c478bd9Sstevel@tonic-gate void
2088*7c478bd9Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
2089*7c478bd9Sstevel@tonic-gate {
2090*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
2091*7c478bd9Sstevel@tonic-gate 	mblk_t *nmp;
2092*7c478bd9Sstevel@tonic-gate 	mblk_t *last;
2093*7c478bd9Sstevel@tonic-gate 	qband_t *qbp;
2094*7c478bd9Sstevel@tonic-gate 	int band;
2095*7c478bd9Sstevel@tonic-gate 
2096*7c478bd9Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
2097*7c478bd9Sstevel@tonic-gate 	if (pri > q->q_nband) {
2098*7c478bd9Sstevel@tonic-gate 		return;
2099*7c478bd9Sstevel@tonic-gate 	}
2100*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2101*7c478bd9Sstevel@tonic-gate 	if (pri == 0) {
2102*7c478bd9Sstevel@tonic-gate 		mp = q->q_first;
2103*7c478bd9Sstevel@tonic-gate 		q->q_first = NULL;
2104*7c478bd9Sstevel@tonic-gate 		q->q_last = NULL;
2105*7c478bd9Sstevel@tonic-gate 		q->q_count = 0;
2106*7c478bd9Sstevel@tonic-gate 		q->q_mblkcnt = 0;
2107*7c478bd9Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
2108*7c478bd9Sstevel@tonic-gate 			qbp->qb_first = NULL;
2109*7c478bd9Sstevel@tonic-gate 			qbp->qb_last = NULL;
2110*7c478bd9Sstevel@tonic-gate 			qbp->qb_count = 0;
2111*7c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
2112*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
2113*7c478bd9Sstevel@tonic-gate 		}
2114*7c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
2115*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2116*7c478bd9Sstevel@tonic-gate 		while (mp) {
2117*7c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
2118*7c478bd9Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
2119*7c478bd9Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
2120*7c478bd9Sstevel@tonic-gate 				((flag == FLUSHALL) ||
2121*7c478bd9Sstevel@tonic-gate 				datamsg(mp->b_datap->db_type)))
2122*7c478bd9Sstevel@tonic-gate 				freemsg(mp);
2123*7c478bd9Sstevel@tonic-gate 			else
2124*7c478bd9Sstevel@tonic-gate 				(void) putq(q, mp);
2125*7c478bd9Sstevel@tonic-gate 			mp = nmp;
2126*7c478bd9Sstevel@tonic-gate 		}
2127*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2128*7c478bd9Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
2129*7c478bd9Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
2130*7c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
2131*7c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
2132*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2133*7c478bd9Sstevel@tonic-gate 
2134*7c478bd9Sstevel@tonic-gate 			backenable(q, (int)pri);
2135*7c478bd9Sstevel@tonic-gate 		} else
2136*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2137*7c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
2138*7c478bd9Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
2139*7c478bd9Sstevel@tonic-gate 		band = pri;
2140*7c478bd9Sstevel@tonic-gate 
2141*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2142*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2143*7c478bd9Sstevel@tonic-gate 		while (--band > 0)
2144*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2145*7c478bd9Sstevel@tonic-gate 		mp = qbp->qb_first;
2146*7c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
2147*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2148*7c478bd9Sstevel@tonic-gate 			return;
2149*7c478bd9Sstevel@tonic-gate 		}
2150*7c478bd9Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
2151*7c478bd9Sstevel@tonic-gate 		/*
2152*7c478bd9Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
2153*7c478bd9Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
2154*7c478bd9Sstevel@tonic-gate 		 * mblk has been processed.
2155*7c478bd9Sstevel@tonic-gate 		 */
2156*7c478bd9Sstevel@tonic-gate 		while (mp != last) {
2157*7c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
2158*7c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
2159*7c478bd9Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
2160*7c478bd9Sstevel@tonic-gate 				rmvq_noenab(q, mp);
2161*7c478bd9Sstevel@tonic-gate 				freemsg(mp);
2162*7c478bd9Sstevel@tonic-gate 				flushed = B_TRUE;
2163*7c478bd9Sstevel@tonic-gate 			}
2164*7c478bd9Sstevel@tonic-gate 			mp = nmp;
2165*7c478bd9Sstevel@tonic-gate 		}
2166*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2167*7c478bd9Sstevel@tonic-gate 
2168*7c478bd9Sstevel@tonic-gate 		/*
2169*7c478bd9Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
2170*7c478bd9Sstevel@tonic-gate 		 * will need to be called.
2171*7c478bd9Sstevel@tonic-gate 		 */
2172*7c478bd9Sstevel@tonic-gate 		if (flushed)
2173*7c478bd9Sstevel@tonic-gate 			qbackenable(q, (int)pri);
2174*7c478bd9Sstevel@tonic-gate 	}
2175*7c478bd9Sstevel@tonic-gate }
2176*7c478bd9Sstevel@tonic-gate 
2177*7c478bd9Sstevel@tonic-gate /*
2178*7c478bd9Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
2179*7c478bd9Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
2180*7c478bd9Sstevel@tonic-gate  * to the queue).
2181*7c478bd9Sstevel@tonic-gate  */
2182*7c478bd9Sstevel@tonic-gate int
2183*7c478bd9Sstevel@tonic-gate canput(queue_t *q)
2184*7c478bd9Sstevel@tonic-gate {
2185*7c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
2186*7c478bd9Sstevel@tonic-gate 
2187*7c478bd9Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
2188*7c478bd9Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
2189*7c478bd9Sstevel@tonic-gate 
2190*7c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
2191*7c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
2192*7c478bd9Sstevel@tonic-gate 
2193*7c478bd9Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
2194*7c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
2195*7c478bd9Sstevel@tonic-gate 		return (1);
2196*7c478bd9Sstevel@tonic-gate 	}
2197*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2198*7c478bd9Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
2199*7c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTW;
2200*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2201*7c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
2202*7c478bd9Sstevel@tonic-gate 		return (0);
2203*7c478bd9Sstevel@tonic-gate 	}
2204*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2205*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
2206*7c478bd9Sstevel@tonic-gate 	return (1);
2207*7c478bd9Sstevel@tonic-gate }
2208*7c478bd9Sstevel@tonic-gate 
2209*7c478bd9Sstevel@tonic-gate /*
2210*7c478bd9Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
2211*7c478bd9Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
2212*7c478bd9Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
2213*7c478bd9Sstevel@tonic-gate  * write to the queue).
2214*7c478bd9Sstevel@tonic-gate  */
2215*7c478bd9Sstevel@tonic-gate int
2216*7c478bd9Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
2217*7c478bd9Sstevel@tonic-gate {
2218*7c478bd9Sstevel@tonic-gate 	qband_t *qbp;
2219*7c478bd9Sstevel@tonic-gate 
2220*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
2221*7c478bd9Sstevel@tonic-gate 	if (!q)
2222*7c478bd9Sstevel@tonic-gate 		return (0);
2223*7c478bd9Sstevel@tonic-gate 
2224*7c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
2225*7c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
2226*7c478bd9Sstevel@tonic-gate 
2227*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2228*7c478bd9Sstevel@tonic-gate 	if (pri == 0) {
2229*7c478bd9Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
2230*7c478bd9Sstevel@tonic-gate 			q->q_flag |= QWANTW;
2231*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2232*7c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2233*7c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
2234*7c478bd9Sstevel@tonic-gate 			return (0);
2235*7c478bd9Sstevel@tonic-gate 		}
2236*7c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
2237*7c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
2238*7c478bd9Sstevel@tonic-gate 			/*
2239*7c478bd9Sstevel@tonic-gate 			 * No band exists yet, so return success.
2240*7c478bd9Sstevel@tonic-gate 			 */
2241*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2242*7c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2243*7c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 1);
2244*7c478bd9Sstevel@tonic-gate 			return (1);
2245*7c478bd9Sstevel@tonic-gate 		}
2246*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2247*7c478bd9Sstevel@tonic-gate 		while (--pri)
2248*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2249*7c478bd9Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
2250*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
2251*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
2252*7c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2253*7c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
2254*7c478bd9Sstevel@tonic-gate 			return (0);
2255*7c478bd9Sstevel@tonic-gate 		}
2256*7c478bd9Sstevel@tonic-gate 	}
2257*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2258*7c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
2259*7c478bd9Sstevel@tonic-gate 		"bcanput:%p %X %d", q, pri, 1);
2260*7c478bd9Sstevel@tonic-gate 	return (1);
2261*7c478bd9Sstevel@tonic-gate }
2262*7c478bd9Sstevel@tonic-gate 
2263*7c478bd9Sstevel@tonic-gate /*
2264*7c478bd9Sstevel@tonic-gate  * Put a message on a queue.
2265*7c478bd9Sstevel@tonic-gate  *
2266*7c478bd9Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
2267*7c478bd9Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
2268*7c478bd9Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
2269*7c478bd9Sstevel@tonic-gate  *
2270*7c478bd9Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
2271*7c478bd9Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
2272*7c478bd9Sstevel@tonic-gate  *
2273*7c478bd9Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
2274*7c478bd9Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
2275*7c478bd9Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
2276*7c478bd9Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
2277*7c478bd9Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
2278*7c478bd9Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
2279*7c478bd9Sstevel@tonic-gate  */
2280*7c478bd9Sstevel@tonic-gate int
2281*7c478bd9Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
2282*7c478bd9Sstevel@tonic-gate {
2283*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
2284*7c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
2285*7c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
2286*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
2287*7c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2288*7c478bd9Sstevel@tonic-gate 
2289*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2290*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
2291*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2292*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2293*7c478bd9Sstevel@tonic-gate 	} else
2294*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2295*7c478bd9Sstevel@tonic-gate 
2296*7c478bd9Sstevel@tonic-gate 	/*
2297*7c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
2298*7c478bd9Sstevel@tonic-gate 	 * allocated, do so.
2299*7c478bd9Sstevel@tonic-gate 	 */
2300*7c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
2301*7c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
2302*7c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
2303*7c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
2304*7c478bd9Sstevel@tonic-gate 		int i;
2305*7c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
2306*7c478bd9Sstevel@tonic-gate 
2307*7c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
2308*7c478bd9Sstevel@tonic-gate 
2309*7c478bd9Sstevel@tonic-gate 			/*
2310*7c478bd9Sstevel@tonic-gate 			 * The qband structure for this priority band is
2311*7c478bd9Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
2312*7c478bd9Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
2313*7c478bd9Sstevel@tonic-gate 			 * associate the qband structures with every
2314*7c478bd9Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
2315*7c478bd9Sstevel@tonic-gate 			 * because most queues will only need the normal
2316*7c478bd9Sstevel@tonic-gate 			 * band of flow which can be described entirely
2317*7c478bd9Sstevel@tonic-gate 			 * by the queue itself.
2318*7c478bd9Sstevel@tonic-gate 			 */
2319*7c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2320*7c478bd9Sstevel@tonic-gate 			while (*qbpp)
2321*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2322*7c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
2323*7c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2324*7c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
2325*7c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2326*7c478bd9Sstevel@tonic-gate 					return (0);
2327*7c478bd9Sstevel@tonic-gate 				}
2328*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2329*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2330*7c478bd9Sstevel@tonic-gate 				q->q_nband++;
2331*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2332*7c478bd9Sstevel@tonic-gate 			}
2333*7c478bd9Sstevel@tonic-gate 		}
2334*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2335*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2336*7c478bd9Sstevel@tonic-gate 		i = bp->b_band;
2337*7c478bd9Sstevel@tonic-gate 		while (--i)
2338*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2339*7c478bd9Sstevel@tonic-gate 	}
2340*7c478bd9Sstevel@tonic-gate 
2341*7c478bd9Sstevel@tonic-gate 	/*
2342*7c478bd9Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
2343*7c478bd9Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
2344*7c478bd9Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
2345*7c478bd9Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
2346*7c478bd9Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
2347*7c478bd9Sstevel@tonic-gate 	 */
2348*7c478bd9Sstevel@tonic-gate 	if (!q->q_first) {
2349*7c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
2350*7c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
2351*7c478bd9Sstevel@tonic-gate 		q->q_first = bp;
2352*7c478bd9Sstevel@tonic-gate 		q->q_last = bp;
2353*7c478bd9Sstevel@tonic-gate 		if (qbp) {
2354*7c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
2355*7c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
2356*7c478bd9Sstevel@tonic-gate 		}
2357*7c478bd9Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
2358*7c478bd9Sstevel@tonic-gate 
2359*7c478bd9Sstevel@tonic-gate 		/*
2360*7c478bd9Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
2361*7c478bd9Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
2362*7c478bd9Sstevel@tonic-gate 		 */
2363*7c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
2364*7c478bd9Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
2365*7c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
2366*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
2367*7c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
2368*7c478bd9Sstevel@tonic-gate 			q->q_last = bp;
2369*7c478bd9Sstevel@tonic-gate 		} else {
2370*7c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
2371*7c478bd9Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
2372*7c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
2373*7c478bd9Sstevel@tonic-gate 
2374*7c478bd9Sstevel@tonic-gate 			/*
2375*7c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
2376*7c478bd9Sstevel@tonic-gate 			 */
2377*7c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
2378*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2379*7c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
2380*7c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2381*7c478bd9Sstevel@tonic-gate 			else
2382*7c478bd9Sstevel@tonic-gate 				q->q_first = bp;
2383*7c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
2384*7c478bd9Sstevel@tonic-gate 		}
2385*7c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
2386*7c478bd9Sstevel@tonic-gate 		if (qbp->qb_first) {
2387*7c478bd9Sstevel@tonic-gate 			tmp = qbp->qb_last;
2388*7c478bd9Sstevel@tonic-gate 
2389*7c478bd9Sstevel@tonic-gate 			/*
2390*7c478bd9Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
2391*7c478bd9Sstevel@tonic-gate 			 */
2392*7c478bd9Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
2393*7c478bd9Sstevel@tonic-gate 			if (tmp->b_next)
2394*7c478bd9Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
2395*7c478bd9Sstevel@tonic-gate 			else
2396*7c478bd9Sstevel@tonic-gate 				q->q_last = bp;
2397*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
2398*7c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
2399*7c478bd9Sstevel@tonic-gate 		} else {
2400*7c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
2401*7c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
2402*7c478bd9Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
2403*7c478bd9Sstevel@tonic-gate 
2404*7c478bd9Sstevel@tonic-gate 				/*
2405*7c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
2406*7c478bd9Sstevel@tonic-gate 				 */
2407*7c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
2408*7c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
2409*7c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
2410*7c478bd9Sstevel@tonic-gate 				q->q_last = bp;
2411*7c478bd9Sstevel@tonic-gate 			} else {
2412*7c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
2413*7c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
2414*7c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
2415*7c478bd9Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
2416*7c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
2417*7c478bd9Sstevel@tonic-gate 
2418*7c478bd9Sstevel@tonic-gate 				/*
2419*7c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
2420*7c478bd9Sstevel@tonic-gate 				 */
2421*7c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
2422*7c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
2423*7c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
2424*7c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
2425*7c478bd9Sstevel@tonic-gate 				else
2426*7c478bd9Sstevel@tonic-gate 					q->q_first = bp;
2427*7c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
2428*7c478bd9Sstevel@tonic-gate 			}
2429*7c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
2430*7c478bd9Sstevel@tonic-gate 		}
2431*7c478bd9Sstevel@tonic-gate 		qbp->qb_last = bp;
2432*7c478bd9Sstevel@tonic-gate 	}
2433*7c478bd9Sstevel@tonic-gate 
2434*7c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2435*7c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2436*7c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2437*7c478bd9Sstevel@tonic-gate 		mblkcnt++;
2438*7c478bd9Sstevel@tonic-gate 	}
2439*7c478bd9Sstevel@tonic-gate 	if (qbp) {
2440*7c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2441*7c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2442*7c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2443*7c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2444*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2445*7c478bd9Sstevel@tonic-gate 		}
2446*7c478bd9Sstevel@tonic-gate 	} else {
2447*7c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
2448*7c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2449*7c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2450*7c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2451*7c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
2452*7c478bd9Sstevel@tonic-gate 		}
2453*7c478bd9Sstevel@tonic-gate 	}
2454*7c478bd9Sstevel@tonic-gate 
2455*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
2456*7c478bd9Sstevel@tonic-gate 
2457*7c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) ||
2458*7c478bd9Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
2459*7c478bd9Sstevel@tonic-gate 		qenable_locked(q);
2460*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2461*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
2462*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2463*7c478bd9Sstevel@tonic-gate 
2464*7c478bd9Sstevel@tonic-gate 	return (1);
2465*7c478bd9Sstevel@tonic-gate }
2466*7c478bd9Sstevel@tonic-gate 
2467*7c478bd9Sstevel@tonic-gate /*
2468*7c478bd9Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
2469*7c478bd9Sstevel@tonic-gate  * See comment on putq above for details.
2470*7c478bd9Sstevel@tonic-gate  */
2471*7c478bd9Sstevel@tonic-gate int
2472*7c478bd9Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
2473*7c478bd9Sstevel@tonic-gate {
2474*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
2475*7c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
2476*7c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
2477*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
2478*7c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2479*7c478bd9Sstevel@tonic-gate 
2480*7c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
2481*7c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
2482*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2483*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
2484*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2485*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2486*7c478bd9Sstevel@tonic-gate 	} else
2487*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2488*7c478bd9Sstevel@tonic-gate 
2489*7c478bd9Sstevel@tonic-gate 	/*
2490*7c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
2491*7c478bd9Sstevel@tonic-gate 	 * allocated, do so.
2492*7c478bd9Sstevel@tonic-gate 	 */
2493*7c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
2494*7c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
2495*7c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
2496*7c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
2497*7c478bd9Sstevel@tonic-gate 		int i;
2498*7c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
2499*7c478bd9Sstevel@tonic-gate 
2500*7c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
2501*7c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2502*7c478bd9Sstevel@tonic-gate 			while (*qbpp)
2503*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2504*7c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
2505*7c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2506*7c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
2507*7c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2508*7c478bd9Sstevel@tonic-gate 					return (0);
2509*7c478bd9Sstevel@tonic-gate 				}
2510*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2511*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2512*7c478bd9Sstevel@tonic-gate 				q->q_nband++;
2513*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2514*7c478bd9Sstevel@tonic-gate 			}
2515*7c478bd9Sstevel@tonic-gate 		}
2516*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2517*7c478bd9Sstevel@tonic-gate 		i = bp->b_band;
2518*7c478bd9Sstevel@tonic-gate 		while (--i)
2519*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2520*7c478bd9Sstevel@tonic-gate 	}
2521*7c478bd9Sstevel@tonic-gate 
2522*7c478bd9Sstevel@tonic-gate 	/*
2523*7c478bd9Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
2524*7c478bd9Sstevel@tonic-gate 	 * place on the front of the queue.
2525*7c478bd9Sstevel@tonic-gate 	 */
2526*7c478bd9Sstevel@tonic-gate 	tmp = q->q_first;
2527*7c478bd9Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
2528*7c478bd9Sstevel@tonic-gate 		bp->b_next = tmp;
2529*7c478bd9Sstevel@tonic-gate 		if (tmp)
2530*7c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
2531*7c478bd9Sstevel@tonic-gate 		else
2532*7c478bd9Sstevel@tonic-gate 			q->q_last = bp;
2533*7c478bd9Sstevel@tonic-gate 		q->q_first = bp;
2534*7c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
2535*7c478bd9Sstevel@tonic-gate 		if (qbp) {
2536*7c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
2537*7c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
2538*7c478bd9Sstevel@tonic-gate 		}
2539*7c478bd9Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
2540*7c478bd9Sstevel@tonic-gate 		tmp = qbp->qb_first;
2541*7c478bd9Sstevel@tonic-gate 		if (tmp) {
2542*7c478bd9Sstevel@tonic-gate 
2543*7c478bd9Sstevel@tonic-gate 			/*
2544*7c478bd9Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
2545*7c478bd9Sstevel@tonic-gate 			 */
2546*7c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
2547*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2548*7c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
2549*7c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2550*7c478bd9Sstevel@tonic-gate 			else
2551*7c478bd9Sstevel@tonic-gate 				q->q_first = bp;
2552*7c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
2553*7c478bd9Sstevel@tonic-gate 		} else {
2554*7c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
2555*7c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
2556*7c478bd9Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
2557*7c478bd9Sstevel@tonic-gate 
2558*7c478bd9Sstevel@tonic-gate 				/*
2559*7c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
2560*7c478bd9Sstevel@tonic-gate 				 */
2561*7c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
2562*7c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
2563*7c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
2564*7c478bd9Sstevel@tonic-gate 				q->q_last = bp;
2565*7c478bd9Sstevel@tonic-gate 			} else {
2566*7c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
2567*7c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
2568*7c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
2569*7c478bd9Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
2570*7c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
2571*7c478bd9Sstevel@tonic-gate 
2572*7c478bd9Sstevel@tonic-gate 				/*
2573*7c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
2574*7c478bd9Sstevel@tonic-gate 				 */
2575*7c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
2576*7c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
2577*7c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
2578*7c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
2579*7c478bd9Sstevel@tonic-gate 				else
2580*7c478bd9Sstevel@tonic-gate 					q->q_first = bp;
2581*7c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
2582*7c478bd9Sstevel@tonic-gate 			}
2583*7c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
2584*7c478bd9Sstevel@tonic-gate 		}
2585*7c478bd9Sstevel@tonic-gate 		qbp->qb_first = bp;
2586*7c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
2587*7c478bd9Sstevel@tonic-gate 
2588*7c478bd9Sstevel@tonic-gate 		/*
2589*7c478bd9Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
2590*7c478bd9Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
2591*7c478bd9Sstevel@tonic-gate 		 */
2592*7c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
2593*7c478bd9Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
2594*7c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
2595*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
2596*7c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
2597*7c478bd9Sstevel@tonic-gate 			q->q_last = bp;
2598*7c478bd9Sstevel@tonic-gate 		} else {
2599*7c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
2600*7c478bd9Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
2601*7c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
2602*7c478bd9Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
2603*7c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
2604*7c478bd9Sstevel@tonic-gate 
2605*7c478bd9Sstevel@tonic-gate 			/*
2606*7c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
2607*7c478bd9Sstevel@tonic-gate 			 */
2608*7c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
2609*7c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
2610*7c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
2611*7c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
2612*7c478bd9Sstevel@tonic-gate 			else
2613*7c478bd9Sstevel@tonic-gate 				q->q_first = bp;
2614*7c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
2615*7c478bd9Sstevel@tonic-gate 		}
2616*7c478bd9Sstevel@tonic-gate 	}
2617*7c478bd9Sstevel@tonic-gate 
2618*7c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
2619*7c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2620*7c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2621*7c478bd9Sstevel@tonic-gate 		mblkcnt++;
2622*7c478bd9Sstevel@tonic-gate 	}
2623*7c478bd9Sstevel@tonic-gate 	if (qbp) {
2624*7c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2625*7c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2626*7c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2627*7c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2628*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2629*7c478bd9Sstevel@tonic-gate 		}
2630*7c478bd9Sstevel@tonic-gate 	} else {
2631*7c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
2632*7c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2633*7c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2634*7c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2635*7c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
2636*7c478bd9Sstevel@tonic-gate 		}
2637*7c478bd9Sstevel@tonic-gate 	}
2638*7c478bd9Sstevel@tonic-gate 
2639*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
2640*7c478bd9Sstevel@tonic-gate 
2641*7c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
2642*7c478bd9Sstevel@tonic-gate 		qenable_locked(q);
2643*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2644*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
2645*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2646*7c478bd9Sstevel@tonic-gate 
2647*7c478bd9Sstevel@tonic-gate 	return (1);
2648*7c478bd9Sstevel@tonic-gate }
2649*7c478bd9Sstevel@tonic-gate 
2650*7c478bd9Sstevel@tonic-gate /*
2651*7c478bd9Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
2652*7c478bd9Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
2653*7c478bd9Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
2654*7c478bd9Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
2655*7c478bd9Sstevel@tonic-gate  * ordering:
2656*7c478bd9Sstevel@tonic-gate  *
2657*7c478bd9Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
2658*7c478bd9Sstevel@tonic-gate  *
2659*7c478bd9Sstevel@tonic-gate  * All flow control parameters are updated.
2660*7c478bd9Sstevel@tonic-gate  *
2661*7c478bd9Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
2662*7c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
2663*7c478bd9Sstevel@tonic-gate  */
2664*7c478bd9Sstevel@tonic-gate int
2665*7c478bd9Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
2666*7c478bd9Sstevel@tonic-gate {
2667*7c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
2668*7c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
2669*7c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
2670*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
2671*7c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
2672*7c478bd9Sstevel@tonic-gate 
2673*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2674*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
2675*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2676*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2677*7c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
2678*7c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
2679*7c478bd9Sstevel@tonic-gate 		freezer = curthread;
2680*7c478bd9Sstevel@tonic-gate 	} else
2681*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2682*7c478bd9Sstevel@tonic-gate 
2683*7c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
2684*7c478bd9Sstevel@tonic-gate 		if (mp->b_band != 0)
2685*7c478bd9Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
2686*7c478bd9Sstevel@tonic-gate 		if (emp && emp->b_prev &&
2687*7c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
2688*7c478bd9Sstevel@tonic-gate 			goto badord;
2689*7c478bd9Sstevel@tonic-gate 	}
2690*7c478bd9Sstevel@tonic-gate 	if (emp) {
2691*7c478bd9Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
2692*7c478bd9Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
2693*7c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
2694*7c478bd9Sstevel@tonic-gate 			goto badord;
2695*7c478bd9Sstevel@tonic-gate 		}
2696*7c478bd9Sstevel@tonic-gate 	} else {
2697*7c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
2698*7c478bd9Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
2699*7c478bd9Sstevel@tonic-gate badord:
2700*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
2701*7c478bd9Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
2702*7c478bd9Sstevel@tonic-gate 			    "on q %p", (void *)q);
2703*7c478bd9Sstevel@tonic-gate 			if (freezer != curthread)
2704*7c478bd9Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
2705*7c478bd9Sstevel@tonic-gate 			return (0);
2706*7c478bd9Sstevel@tonic-gate 		}
2707*7c478bd9Sstevel@tonic-gate 	}
2708*7c478bd9Sstevel@tonic-gate 
2709*7c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {
2710*7c478bd9Sstevel@tonic-gate 		int i;
2711*7c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
2712*7c478bd9Sstevel@tonic-gate 
2713*7c478bd9Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
2714*7c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2715*7c478bd9Sstevel@tonic-gate 			while (*qbpp)
2716*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2717*7c478bd9Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
2718*7c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2719*7c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
2720*7c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
2721*7c478bd9Sstevel@tonic-gate 					return (0);
2722*7c478bd9Sstevel@tonic-gate 				}
2723*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2724*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2725*7c478bd9Sstevel@tonic-gate 				q->q_nband++;
2726*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2727*7c478bd9Sstevel@tonic-gate 			}
2728*7c478bd9Sstevel@tonic-gate 		}
2729*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2730*7c478bd9Sstevel@tonic-gate 		i = mp->b_band;
2731*7c478bd9Sstevel@tonic-gate 		while (--i)
2732*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2733*7c478bd9Sstevel@tonic-gate 	}
2734*7c478bd9Sstevel@tonic-gate 
2735*7c478bd9Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
2736*7c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
2737*7c478bd9Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
2738*7c478bd9Sstevel@tonic-gate 		else
2739*7c478bd9Sstevel@tonic-gate 			q->q_first = mp;
2740*7c478bd9Sstevel@tonic-gate 		emp->b_prev = mp;
2741*7c478bd9Sstevel@tonic-gate 	} else {
2742*7c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
2743*7c478bd9Sstevel@tonic-gate 			q->q_last->b_next = mp;
2744*7c478bd9Sstevel@tonic-gate 		else
2745*7c478bd9Sstevel@tonic-gate 			q->q_first = mp;
2746*7c478bd9Sstevel@tonic-gate 		q->q_last = mp;
2747*7c478bd9Sstevel@tonic-gate 	}
2748*7c478bd9Sstevel@tonic-gate 
2749*7c478bd9Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
2750*7c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
2751*7c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
2752*7c478bd9Sstevel@tonic-gate 		mblkcnt++;
2753*7c478bd9Sstevel@tonic-gate 	}
2754*7c478bd9Sstevel@tonic-gate 
2755*7c478bd9Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
2756*7c478bd9Sstevel@tonic-gate 		if (!qbp->qb_first) {
2757*7c478bd9Sstevel@tonic-gate 			qbp->qb_first = mp;
2758*7c478bd9Sstevel@tonic-gate 			qbp->qb_last = mp;
2759*7c478bd9Sstevel@tonic-gate 		} else {
2760*7c478bd9Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
2761*7c478bd9Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
2762*7c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp;
2763*7c478bd9Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
2764*7c478bd9Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
2765*7c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp;
2766*7c478bd9Sstevel@tonic-gate 		}
2767*7c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
2768*7c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
2769*7c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
2770*7c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
2771*7c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
2772*7c478bd9Sstevel@tonic-gate 		}
2773*7c478bd9Sstevel@tonic-gate 	} else {
2774*7c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
2775*7c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
2776*7c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
2777*7c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
2778*7c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
2779*7c478bd9Sstevel@tonic-gate 		}
2780*7c478bd9Sstevel@tonic-gate 	}
2781*7c478bd9Sstevel@tonic-gate 
2782*7c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
2783*7c478bd9Sstevel@tonic-gate 
2784*7c478bd9Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
2785*7c478bd9Sstevel@tonic-gate 		qenable_locked(q);
2786*7c478bd9Sstevel@tonic-gate 
2787*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
2788*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
2789*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
2790*7c478bd9Sstevel@tonic-gate 
2791*7c478bd9Sstevel@tonic-gate 	return (1);
2792*7c478bd9Sstevel@tonic-gate }
2793*7c478bd9Sstevel@tonic-gate 
2794*7c478bd9Sstevel@tonic-gate /*
2795*7c478bd9Sstevel@tonic-gate  * Create and put a control message on queue.
2796*7c478bd9Sstevel@tonic-gate  */
2797*7c478bd9Sstevel@tonic-gate int
2798*7c478bd9Sstevel@tonic-gate putctl(queue_t *q, int type)
2799*7c478bd9Sstevel@tonic-gate {
2800*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
2801*7c478bd9Sstevel@tonic-gate 
2802*7c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2803*7c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
2804*7c478bd9Sstevel@tonic-gate 		return (0);
2805*7c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
2806*7c478bd9Sstevel@tonic-gate 
2807*7c478bd9Sstevel@tonic-gate 	put(q, bp);
2808*7c478bd9Sstevel@tonic-gate 
2809*7c478bd9Sstevel@tonic-gate 	return (1);
2810*7c478bd9Sstevel@tonic-gate }
2811*7c478bd9Sstevel@tonic-gate 
2812*7c478bd9Sstevel@tonic-gate /*
2813*7c478bd9Sstevel@tonic-gate  * Control message with a single-byte parameter
2814*7c478bd9Sstevel@tonic-gate  */
2815*7c478bd9Sstevel@tonic-gate int
2816*7c478bd9Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
2817*7c478bd9Sstevel@tonic-gate {
2818*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
2819*7c478bd9Sstevel@tonic-gate 
2820*7c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2821*7c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
2822*7c478bd9Sstevel@tonic-gate 		return (0);
2823*7c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2824*7c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
2825*7c478bd9Sstevel@tonic-gate 
2826*7c478bd9Sstevel@tonic-gate 	put(q, bp);
2827*7c478bd9Sstevel@tonic-gate 
2828*7c478bd9Sstevel@tonic-gate 	return (1);
2829*7c478bd9Sstevel@tonic-gate }
2830*7c478bd9Sstevel@tonic-gate 
2831*7c478bd9Sstevel@tonic-gate int
2832*7c478bd9Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
2833*7c478bd9Sstevel@tonic-gate {
2834*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
2835*7c478bd9Sstevel@tonic-gate 
2836*7c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2837*7c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(1)) == NULL))
2838*7c478bd9Sstevel@tonic-gate 		return (0);
2839*7c478bd9Sstevel@tonic-gate 
2840*7c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2841*7c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
2842*7c478bd9Sstevel@tonic-gate 
2843*7c478bd9Sstevel@tonic-gate 	putnext(q, bp);
2844*7c478bd9Sstevel@tonic-gate 
2845*7c478bd9Sstevel@tonic-gate 	return (1);
2846*7c478bd9Sstevel@tonic-gate }
2847*7c478bd9Sstevel@tonic-gate 
2848*7c478bd9Sstevel@tonic-gate int
2849*7c478bd9Sstevel@tonic-gate putnextctl(queue_t *q, int type)
2850*7c478bd9Sstevel@tonic-gate {
2851*7c478bd9Sstevel@tonic-gate 	mblk_t *bp;
2852*7c478bd9Sstevel@tonic-gate 
2853*7c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
2854*7c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(0)) == NULL))
2855*7c478bd9Sstevel@tonic-gate 		return (0);
2856*7c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
2857*7c478bd9Sstevel@tonic-gate 
2858*7c478bd9Sstevel@tonic-gate 	putnext(q, bp);
2859*7c478bd9Sstevel@tonic-gate 
2860*7c478bd9Sstevel@tonic-gate 	return (1);
2861*7c478bd9Sstevel@tonic-gate }
2862*7c478bd9Sstevel@tonic-gate 
2863*7c478bd9Sstevel@tonic-gate /*
2864*7c478bd9Sstevel@tonic-gate  * Return the queue upstream from this one
2865*7c478bd9Sstevel@tonic-gate  */
2866*7c478bd9Sstevel@tonic-gate queue_t *
2867*7c478bd9Sstevel@tonic-gate backq(queue_t *q)
2868*7c478bd9Sstevel@tonic-gate {
2869*7c478bd9Sstevel@tonic-gate 	q = _OTHERQ(q);
2870*7c478bd9Sstevel@tonic-gate 	if (q->q_next) {
2871*7c478bd9Sstevel@tonic-gate 		q = q->q_next;
2872*7c478bd9Sstevel@tonic-gate 		return (_OTHERQ(q));
2873*7c478bd9Sstevel@tonic-gate 	}
2874*7c478bd9Sstevel@tonic-gate 	return (NULL);
2875*7c478bd9Sstevel@tonic-gate }
2876*7c478bd9Sstevel@tonic-gate 
2877*7c478bd9Sstevel@tonic-gate /*
2878*7c478bd9Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
2879*7c478bd9Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
2880*7c478bd9Sstevel@tonic-gate  */
2881*7c478bd9Sstevel@tonic-gate void
2882*7c478bd9Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
2883*7c478bd9Sstevel@tonic-gate {
2884*7c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
2885*7c478bd9Sstevel@tonic-gate 
2886*7c478bd9Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
2887*7c478bd9Sstevel@tonic-gate }
2888*7c478bd9Sstevel@tonic-gate 
2889*7c478bd9Sstevel@tonic-gate /*
2890*7c478bd9Sstevel@tonic-gate  * Streams Queue Scheduling
2891*7c478bd9Sstevel@tonic-gate  *
2892*7c478bd9Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
2893*7c478bd9Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
2894*7c478bd9Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
2895*7c478bd9Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
2896*7c478bd9Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
2897*7c478bd9Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
2898*7c478bd9Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
2899*7c478bd9Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
2900*7c478bd9Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
2901*7c478bd9Sstevel@tonic-gate  */
2902*7c478bd9Sstevel@tonic-gate 
2903*7c478bd9Sstevel@tonic-gate /*
2904*7c478bd9Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
2905*7c478bd9Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
2906*7c478bd9Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
2907*7c478bd9Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
2908*7c478bd9Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
2909*7c478bd9Sstevel@tonic-gate  */
2910*7c478bd9Sstevel@tonic-gate void
2911*7c478bd9Sstevel@tonic-gate qenable(queue_t *q)
2912*7c478bd9Sstevel@tonic-gate {
2913*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2914*7c478bd9Sstevel@tonic-gate 	qenable_locked(q);
2915*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2916*7c478bd9Sstevel@tonic-gate }
2917*7c478bd9Sstevel@tonic-gate /*
2918*7c478bd9Sstevel@tonic-gate  * Return number of messages on queue
2919*7c478bd9Sstevel@tonic-gate  */
2920*7c478bd9Sstevel@tonic-gate int
2921*7c478bd9Sstevel@tonic-gate qsize(queue_t *qp)
2922*7c478bd9Sstevel@tonic-gate {
2923*7c478bd9Sstevel@tonic-gate 	int count = 0;
2924*7c478bd9Sstevel@tonic-gate 	mblk_t *mp;
2925*7c478bd9Sstevel@tonic-gate 
2926*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
2927*7c478bd9Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
2928*7c478bd9Sstevel@tonic-gate 		count++;
2929*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
2930*7c478bd9Sstevel@tonic-gate 	return (count);
2931*7c478bd9Sstevel@tonic-gate }
2932*7c478bd9Sstevel@tonic-gate 
2933*7c478bd9Sstevel@tonic-gate /*
2934*7c478bd9Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
2935*7c478bd9Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
2936*7c478bd9Sstevel@tonic-gate  */
2937*7c478bd9Sstevel@tonic-gate void
2938*7c478bd9Sstevel@tonic-gate noenable(queue_t *q)
2939*7c478bd9Sstevel@tonic-gate {
2940*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2941*7c478bd9Sstevel@tonic-gate 	q->q_flag |= QNOENB;
2942*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2943*7c478bd9Sstevel@tonic-gate }
2944*7c478bd9Sstevel@tonic-gate 
2945*7c478bd9Sstevel@tonic-gate void
2946*7c478bd9Sstevel@tonic-gate enableok(queue_t *q)
2947*7c478bd9Sstevel@tonic-gate {
2948*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
2949*7c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
2950*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
2951*7c478bd9Sstevel@tonic-gate }
2952*7c478bd9Sstevel@tonic-gate 
2953*7c478bd9Sstevel@tonic-gate /*
2954*7c478bd9Sstevel@tonic-gate  * Set queue fields.
2955*7c478bd9Sstevel@tonic-gate  */
2956*7c478bd9Sstevel@tonic-gate int
2957*7c478bd9Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
2958*7c478bd9Sstevel@tonic-gate {
2959*7c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
2960*7c478bd9Sstevel@tonic-gate 	queue_t	*wrq;
2961*7c478bd9Sstevel@tonic-gate 	int error = 0;
2962*7c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
2963*7c478bd9Sstevel@tonic-gate 
2964*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
2965*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
2966*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
2967*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
2968*7c478bd9Sstevel@tonic-gate 	} else
2969*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
2970*7c478bd9Sstevel@tonic-gate 
2971*7c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
2972*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
2973*7c478bd9Sstevel@tonic-gate 		goto done;
2974*7c478bd9Sstevel@tonic-gate 	}
2975*7c478bd9Sstevel@tonic-gate 	if (pri != 0) {
2976*7c478bd9Sstevel@tonic-gate 		int i;
2977*7c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
2978*7c478bd9Sstevel@tonic-gate 
2979*7c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
2980*7c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
2981*7c478bd9Sstevel@tonic-gate 			while (*qbpp)
2982*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2983*7c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
2984*7c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
2985*7c478bd9Sstevel@tonic-gate 					error = EAGAIN;
2986*7c478bd9Sstevel@tonic-gate 					goto done;
2987*7c478bd9Sstevel@tonic-gate 				}
2988*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
2989*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
2990*7c478bd9Sstevel@tonic-gate 				q->q_nband++;
2991*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
2992*7c478bd9Sstevel@tonic-gate 			}
2993*7c478bd9Sstevel@tonic-gate 		}
2994*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
2995*7c478bd9Sstevel@tonic-gate 		i = pri;
2996*7c478bd9Sstevel@tonic-gate 		while (--i)
2997*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
2998*7c478bd9Sstevel@tonic-gate 	}
2999*7c478bd9Sstevel@tonic-gate 	switch (what) {
3000*7c478bd9Sstevel@tonic-gate 
3001*7c478bd9Sstevel@tonic-gate 	case QHIWAT:
3002*7c478bd9Sstevel@tonic-gate 		if (qbp)
3003*7c478bd9Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
3004*7c478bd9Sstevel@tonic-gate 		else
3005*7c478bd9Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
3006*7c478bd9Sstevel@tonic-gate 		break;
3007*7c478bd9Sstevel@tonic-gate 
3008*7c478bd9Sstevel@tonic-gate 	case QLOWAT:
3009*7c478bd9Sstevel@tonic-gate 		if (qbp)
3010*7c478bd9Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
3011*7c478bd9Sstevel@tonic-gate 		else
3012*7c478bd9Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
3013*7c478bd9Sstevel@tonic-gate 		break;
3014*7c478bd9Sstevel@tonic-gate 
3015*7c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
3016*7c478bd9Sstevel@tonic-gate 		if (qbp)
3017*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3018*7c478bd9Sstevel@tonic-gate 		else
3019*7c478bd9Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
3020*7c478bd9Sstevel@tonic-gate 
3021*7c478bd9Sstevel@tonic-gate 		/*
3022*7c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
3023*7c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
3024*7c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
3025*7c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
3026*7c478bd9Sstevel@tonic-gate 		 */
3027*7c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
3028*7c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
3029*7c478bd9Sstevel@tonic-gate 			break;
3030*7c478bd9Sstevel@tonic-gate 
3031*7c478bd9Sstevel@tonic-gate 		/*
3032*7c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
3033*7c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
3034*7c478bd9Sstevel@tonic-gate 		 */
3035*7c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
3036*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
3037*7c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
3038*7c478bd9Sstevel@tonic-gate 		}
3039*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
3040*7c478bd9Sstevel@tonic-gate 
3041*7c478bd9Sstevel@tonic-gate 		if (strmsgsz != 0) {
3042*7c478bd9Sstevel@tonic-gate 			if (val == INFPSZ)
3043*7c478bd9Sstevel@tonic-gate 				val = strmsgsz;
3044*7c478bd9Sstevel@tonic-gate 			else  {
3045*7c478bd9Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
3046*7c478bd9Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
3047*7c478bd9Sstevel@tonic-gate 				else
3048*7c478bd9Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
3049*7c478bd9Sstevel@tonic-gate 			}
3050*7c478bd9Sstevel@tonic-gate 		}
3051*7c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
3052*7c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
3053*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
3054*7c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
3055*7c478bd9Sstevel@tonic-gate 		}
3056*7c478bd9Sstevel@tonic-gate 		break;
3057*7c478bd9Sstevel@tonic-gate 
3058*7c478bd9Sstevel@tonic-gate 	case QMINPSZ:
3059*7c478bd9Sstevel@tonic-gate 		if (qbp)
3060*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3061*7c478bd9Sstevel@tonic-gate 		else
3062*7c478bd9Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
3063*7c478bd9Sstevel@tonic-gate 
3064*7c478bd9Sstevel@tonic-gate 		/*
3065*7c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
3066*7c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
3067*7c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
3068*7c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
3069*7c478bd9Sstevel@tonic-gate 		 */
3070*7c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
3071*7c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
3072*7c478bd9Sstevel@tonic-gate 			break;
3073*7c478bd9Sstevel@tonic-gate 
3074*7c478bd9Sstevel@tonic-gate 		/*
3075*7c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
3076*7c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
3077*7c478bd9Sstevel@tonic-gate 		 */
3078*7c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
3079*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
3080*7c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
3081*7c478bd9Sstevel@tonic-gate 		}
3082*7c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
3083*7c478bd9Sstevel@tonic-gate 
3084*7c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
3085*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
3086*7c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
3087*7c478bd9Sstevel@tonic-gate 		}
3088*7c478bd9Sstevel@tonic-gate 		break;
3089*7c478bd9Sstevel@tonic-gate 
3090*7c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
3091*7c478bd9Sstevel@tonic-gate 		if (qbp)
3092*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3093*7c478bd9Sstevel@tonic-gate 		else
3094*7c478bd9Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
3095*7c478bd9Sstevel@tonic-gate 		break;
3096*7c478bd9Sstevel@tonic-gate 
3097*7c478bd9Sstevel@tonic-gate 	case QCOUNT:
3098*7c478bd9Sstevel@tonic-gate 	case QFIRST:
3099*7c478bd9Sstevel@tonic-gate 	case QLAST:
3100*7c478bd9Sstevel@tonic-gate 	case QFLAG:
3101*7c478bd9Sstevel@tonic-gate 		error = EPERM;
3102*7c478bd9Sstevel@tonic-gate 		break;
3103*7c478bd9Sstevel@tonic-gate 
3104*7c478bd9Sstevel@tonic-gate 	default:
3105*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
3106*7c478bd9Sstevel@tonic-gate 		break;
3107*7c478bd9Sstevel@tonic-gate 	}
3108*7c478bd9Sstevel@tonic-gate done:
3109*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
3110*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
3111*7c478bd9Sstevel@tonic-gate 	return (error);
3112*7c478bd9Sstevel@tonic-gate }
3113*7c478bd9Sstevel@tonic-gate 
3114*7c478bd9Sstevel@tonic-gate /*
3115*7c478bd9Sstevel@tonic-gate  * Get queue fields.
3116*7c478bd9Sstevel@tonic-gate  */
3117*7c478bd9Sstevel@tonic-gate int
3118*7c478bd9Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
3119*7c478bd9Sstevel@tonic-gate {
3120*7c478bd9Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
3121*7c478bd9Sstevel@tonic-gate 	int 		error = 0;
3122*7c478bd9Sstevel@tonic-gate 	kthread_id_t 	freezer;
3123*7c478bd9Sstevel@tonic-gate 
3124*7c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
3125*7c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
3126*7c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
3127*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
3128*7c478bd9Sstevel@tonic-gate 	} else
3129*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
3130*7c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
3131*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
3132*7c478bd9Sstevel@tonic-gate 		goto done;
3133*7c478bd9Sstevel@tonic-gate 	}
3134*7c478bd9Sstevel@tonic-gate 	if (pri != 0) {
3135*7c478bd9Sstevel@tonic-gate 		int i;
3136*7c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
3137*7c478bd9Sstevel@tonic-gate 
3138*7c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
3139*7c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
3140*7c478bd9Sstevel@tonic-gate 			while (*qbpp)
3141*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
3142*7c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
3143*7c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
3144*7c478bd9Sstevel@tonic-gate 					error = EAGAIN;
3145*7c478bd9Sstevel@tonic-gate 					goto done;
3146*7c478bd9Sstevel@tonic-gate 				}
3147*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
3148*7c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
3149*7c478bd9Sstevel@tonic-gate 				q->q_nband++;
3150*7c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
3151*7c478bd9Sstevel@tonic-gate 			}
3152*7c478bd9Sstevel@tonic-gate 		}
3153*7c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
3154*7c478bd9Sstevel@tonic-gate 		i = pri;
3155*7c478bd9Sstevel@tonic-gate 		while (--i)
3156*7c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
3157*7c478bd9Sstevel@tonic-gate 	}
3158*7c478bd9Sstevel@tonic-gate 	switch (what) {
3159*7c478bd9Sstevel@tonic-gate 	case QHIWAT:
3160*7c478bd9Sstevel@tonic-gate 		if (qbp)
3161*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
3162*7c478bd9Sstevel@tonic-gate 		else
3163*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
3164*7c478bd9Sstevel@tonic-gate 		break;
3165*7c478bd9Sstevel@tonic-gate 
3166*7c478bd9Sstevel@tonic-gate 	case QLOWAT:
3167*7c478bd9Sstevel@tonic-gate 		if (qbp)
3168*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
3169*7c478bd9Sstevel@tonic-gate 		else
3170*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
3171*7c478bd9Sstevel@tonic-gate 		break;
3172*7c478bd9Sstevel@tonic-gate 
3173*7c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
3174*7c478bd9Sstevel@tonic-gate 		if (qbp)
3175*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3176*7c478bd9Sstevel@tonic-gate 		else
3177*7c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
3178*7c478bd9Sstevel@tonic-gate 		break;
3179*7c478bd9Sstevel@tonic-gate 
3180*7c478bd9Sstevel@tonic-gate 	case QMINPSZ:
3181*7c478bd9Sstevel@tonic-gate 		if (qbp)
3182*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3183*7c478bd9Sstevel@tonic-gate 		else
3184*7c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
3185*7c478bd9Sstevel@tonic-gate 		break;
3186*7c478bd9Sstevel@tonic-gate 
3187*7c478bd9Sstevel@tonic-gate 	case QCOUNT:
3188*7c478bd9Sstevel@tonic-gate 		if (qbp)
3189*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
3190*7c478bd9Sstevel@tonic-gate 		else
3191*7c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
3192*7c478bd9Sstevel@tonic-gate 		break;
3193*7c478bd9Sstevel@tonic-gate 
3194*7c478bd9Sstevel@tonic-gate 	case QFIRST:
3195*7c478bd9Sstevel@tonic-gate 		if (qbp)
3196*7c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
3197*7c478bd9Sstevel@tonic-gate 		else
3198*7c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
3199*7c478bd9Sstevel@tonic-gate 		break;
3200*7c478bd9Sstevel@tonic-gate 
3201*7c478bd9Sstevel@tonic-gate 	case QLAST:
3202*7c478bd9Sstevel@tonic-gate 		if (qbp)
3203*7c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
3204*7c478bd9Sstevel@tonic-gate 		else
3205*7c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
3206*7c478bd9Sstevel@tonic-gate 		break;
3207*7c478bd9Sstevel@tonic-gate 
3208*7c478bd9Sstevel@tonic-gate 	case QFLAG:
3209*7c478bd9Sstevel@tonic-gate 		if (qbp)
3210*7c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
3211*7c478bd9Sstevel@tonic-gate 		else
3212*7c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
3213*7c478bd9Sstevel@tonic-gate 		break;
3214*7c478bd9Sstevel@tonic-gate 
3215*7c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
3216*7c478bd9Sstevel@tonic-gate 		if (qbp)
3217*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
3218*7c478bd9Sstevel@tonic-gate 		else
3219*7c478bd9Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
3220*7c478bd9Sstevel@tonic-gate 		break;
3221*7c478bd9Sstevel@tonic-gate 
3222*7c478bd9Sstevel@tonic-gate 	default:
3223*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
3224*7c478bd9Sstevel@tonic-gate 		break;
3225*7c478bd9Sstevel@tonic-gate 	}
3226*7c478bd9Sstevel@tonic-gate done:
3227*7c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
3228*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
3229*7c478bd9Sstevel@tonic-gate 	return (error);
3230*7c478bd9Sstevel@tonic-gate }
3231*7c478bd9Sstevel@tonic-gate 
3232*7c478bd9Sstevel@tonic-gate /*
3233*7c478bd9Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
3234*7c478bd9Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
3235*7c478bd9Sstevel@tonic-gate  *
3236*7c478bd9Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
3237*7c478bd9Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
3238*7c478bd9Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
3239*7c478bd9Sstevel@tonic-gate  */
3240*7c478bd9Sstevel@tonic-gate void
3241*7c478bd9Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
3242*7c478bd9Sstevel@tonic-gate {
3243*7c478bd9Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
3244*7c478bd9Sstevel@tonic-gate 	pollhead_t 	*pl;
3245*7c478bd9Sstevel@tonic-gate 
3246*7c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
3247*7c478bd9Sstevel@tonic-gate 	pl = &stp->sd_pollist;
3248*7c478bd9Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
3249*7c478bd9Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
3250*7c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
3251*7c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
3252*7c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
3253*7c478bd9Sstevel@tonic-gate 		} else {
3254*7c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
3255*7c478bd9Sstevel@tonic-gate 		}
3256*7c478bd9Sstevel@tonic-gate 
3257*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3258*7c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
3259*7c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3260*7c478bd9Sstevel@tonic-gate 
3261*7c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
3262*7c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3263*7c478bd9Sstevel@tonic-gate 	} else if (flag & QWANTR) {
3264*7c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
3265*7c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
3266*7c478bd9Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
3267*7c478bd9Sstevel@tonic-gate 		} else {
3268*7c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
3269*7c478bd9Sstevel@tonic-gate 		}
3270*7c478bd9Sstevel@tonic-gate 
3271*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3272*7c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
3273*7c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3274*7c478bd9Sstevel@tonic-gate 
3275*7c478bd9Sstevel@tonic-gate 		{
3276*7c478bd9Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
3277*7c478bd9Sstevel@tonic-gate 
3278*7c478bd9Sstevel@tonic-gate 			if (events)
3279*7c478bd9Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
3280*7c478bd9Sstevel@tonic-gate 		}
3281*7c478bd9Sstevel@tonic-gate 	} else {
3282*7c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
3283*7c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
3284*7c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
3285*7c478bd9Sstevel@tonic-gate 		}
3286*7c478bd9Sstevel@tonic-gate 
3287*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3288*7c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
3289*7c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3290*7c478bd9Sstevel@tonic-gate 
3291*7c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
3292*7c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3293*7c478bd9Sstevel@tonic-gate 	}
3294*7c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3295*7c478bd9Sstevel@tonic-gate }
3296*7c478bd9Sstevel@tonic-gate 
3297*7c478bd9Sstevel@tonic-gate int
3298*7c478bd9Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
3299*7c478bd9Sstevel@tonic-gate {
3300*7c478bd9Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
3301*7c478bd9Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
3302*7c478bd9Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
3303*7c478bd9Sstevel@tonic-gate 	dblk_t	 *dbp;
3304*7c478bd9Sstevel@tonic-gate 	ssize_t	 uiocnt;
3305*7c478bd9Sstevel@tonic-gate 	ssize_t	 cnt;
3306*7c478bd9Sstevel@tonic-gate 	unsigned char *ptr;
3307*7c478bd9Sstevel@tonic-gate 	ssize_t	 resid;
3308*7c478bd9Sstevel@tonic-gate 	int	 error = 0;
3309*7c478bd9Sstevel@tonic-gate 	on_trap_data_t otd;
3310*7c478bd9Sstevel@tonic-gate 	queue_t	*stwrq;
3311*7c478bd9Sstevel@tonic-gate 
3312*7c478bd9Sstevel@tonic-gate 	/*
3313*7c478bd9Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
3314*7c478bd9Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
3315*7c478bd9Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
3316*7c478bd9Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
3317*7c478bd9Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
3318*7c478bd9Sstevel@tonic-gate 	 * processing the message the way it would have been processed
3319*7c478bd9Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
3320*7c478bd9Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
3321*7c478bd9Sstevel@tonic-gate 	 *
3322*7c478bd9Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
3323*7c478bd9Sstevel@tonic-gate 	 * very hot.
3324*7c478bd9Sstevel@tonic-gate 	 */
3325*7c478bd9Sstevel@tonic-gate 
3326*7c478bd9Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
3327*7c478bd9Sstevel@tonic-gate 	if (stwrq)
3328*7c478bd9Sstevel@tonic-gate 		typ = stwrq->q_struiot;
3329*7c478bd9Sstevel@tonic-gate 
3330*7c478bd9Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
3331*7c478bd9Sstevel@tonic-gate 		dbp = mp->b_datap;
3332*7c478bd9Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
3333*7c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
3334*7c478bd9Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
3335*7c478bd9Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
3336*7c478bd9Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
3337*7c478bd9Sstevel@tonic-gate 			/*
3338*7c478bd9Sstevel@tonic-gate 			 * Either this mblk has already been processed
3339*7c478bd9Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
3340*7c478bd9Sstevel@tonic-gate 			 */
3341*7c478bd9Sstevel@tonic-gate 			continue;
3342*7c478bd9Sstevel@tonic-gate 		}
3343*7c478bd9Sstevel@tonic-gate 		switch (typ) {
3344*7c478bd9Sstevel@tonic-gate 		case STRUIOT_STANDARD:
3345*7c478bd9Sstevel@tonic-gate 			if (noblock) {
3346*7c478bd9Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
3347*7c478bd9Sstevel@tonic-gate 					no_trap();
3348*7c478bd9Sstevel@tonic-gate 					error = EWOULDBLOCK;
3349*7c478bd9Sstevel@tonic-gate 					goto out;
3350*7c478bd9Sstevel@tonic-gate 				}
3351*7c478bd9Sstevel@tonic-gate 			}
3352*7c478bd9Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
3353*7c478bd9Sstevel@tonic-gate 				if (noblock)
3354*7c478bd9Sstevel@tonic-gate 					no_trap();
3355*7c478bd9Sstevel@tonic-gate 				goto out;
3356*7c478bd9Sstevel@tonic-gate 			}
3357*7c478bd9Sstevel@tonic-gate 			if (noblock)
3358*7c478bd9Sstevel@tonic-gate 				no_trap();
3359*7c478bd9Sstevel@tonic-gate 			break;
3360*7c478bd9Sstevel@tonic-gate 
3361*7c478bd9Sstevel@tonic-gate 		default:
3362*7c478bd9Sstevel@tonic-gate 			error = EIO;
3363*7c478bd9Sstevel@tonic-gate 			goto out;
3364*7c478bd9Sstevel@tonic-gate 		}
3365*7c478bd9Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
3366*7c478bd9Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
3367*7c478bd9Sstevel@tonic-gate 	}
3368*7c478bd9Sstevel@tonic-gate out:
3369*7c478bd9Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
3370*7c478bd9Sstevel@tonic-gate 		/*
3371*7c478bd9Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
3372*7c478bd9Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
3373*7c478bd9Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
3374*7c478bd9Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
3375*7c478bd9Sstevel@tonic-gate 		 * again (after the fault has been handled).
3376*7c478bd9Sstevel@tonic-gate 		 */
3377*7c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
3378*7c478bd9Sstevel@tonic-gate 		if (uiocnt >= resid)
3379*7c478bd9Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
3380*7c478bd9Sstevel@tonic-gate 	}
3381*7c478bd9Sstevel@tonic-gate 	return (error);
3382*7c478bd9Sstevel@tonic-gate }
3383*7c478bd9Sstevel@tonic-gate 
3384*7c478bd9Sstevel@tonic-gate /*
3385*7c478bd9Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
3386*7c478bd9Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
3387*7c478bd9Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
3388*7c478bd9Sstevel@tonic-gate  * queue.
3389*7c478bd9Sstevel@tonic-gate  */
3390*7c478bd9Sstevel@tonic-gate static boolean_t
3391*7c478bd9Sstevel@tonic-gate rwnext_enter(queue_t *qp)
3392*7c478bd9Sstevel@tonic-gate {
3393*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
3394*7c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
3395*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
3396*7c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3397*7c478bd9Sstevel@tonic-gate 	}
3398*7c478bd9Sstevel@tonic-gate 	qp->q_rwcnt++;
3399*7c478bd9Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
3400*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
3401*7c478bd9Sstevel@tonic-gate 	return (B_TRUE);
3402*7c478bd9Sstevel@tonic-gate }
3403*7c478bd9Sstevel@tonic-gate 
3404*7c478bd9Sstevel@tonic-gate /*
3405*7c478bd9Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
3406*7c478bd9Sstevel@tonic-gate  * threads blocked in removeq().
3407*7c478bd9Sstevel@tonic-gate  */
3408*7c478bd9Sstevel@tonic-gate static void
3409*7c478bd9Sstevel@tonic-gate rwnext_exit(queue_t *qp)
3410*7c478bd9Sstevel@tonic-gate {
3411*7c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
3412*7c478bd9Sstevel@tonic-gate 	qp->q_rwcnt--;
3413*7c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
3414*7c478bd9Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
3415*7c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
3416*7c478bd9Sstevel@tonic-gate 	}
3417*7c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
3418*7c478bd9Sstevel@tonic-gate }
3419*7c478bd9Sstevel@tonic-gate 
3420*7c478bd9Sstevel@tonic-gate /*
3421*7c478bd9Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
3422*7c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
3423*7c478bd9Sstevel@tonic-gate  *
3424*7c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
3425*7c478bd9Sstevel@tonic-gate  *
3426*7c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
3427*7c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
3428*7c478bd9Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
3429*7c478bd9Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
3430*7c478bd9Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
3431*7c478bd9Sstevel@tonic-gate  *
3432*7c478bd9Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
3433*7c478bd9Sstevel@tonic-gate  */
3434*7c478bd9Sstevel@tonic-gate int
3435*7c478bd9Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
3436*7c478bd9Sstevel@tonic-gate {
3437*7c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
3438*7c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
3439*7c478bd9Sstevel@tonic-gate 	uint16_t	count;
3440*7c478bd9Sstevel@tonic-gate 	uint16_t	flags;
3441*7c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
3442*7c478bd9Sstevel@tonic-gate 	int		(*proc)();
3443*7c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
3444*7c478bd9Sstevel@tonic-gate 	int		isread;
3445*7c478bd9Sstevel@tonic-gate 	int		rval;
3446*7c478bd9Sstevel@tonic-gate 
3447*7c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
3448*7c478bd9Sstevel@tonic-gate 	/*
3449*7c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
3450*7c478bd9Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
3451*7c478bd9Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
3452*7c478bd9Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
3453*7c478bd9Sstevel@tonic-gate 	 *
3454*7c478bd9Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
3455*7c478bd9Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
3456*7c478bd9Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
3457*7c478bd9Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
3458*7c478bd9Sstevel@tonic-gate 	 */
3459*7c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
3460*7c478bd9Sstevel@tonic-gate 		isread = 0;
3461*7c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3462*7c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
3463*7c478bd9Sstevel@tonic-gate 	} else {
3464*7c478bd9Sstevel@tonic-gate 		isread = 1;
3465*7c478bd9Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
3466*7c478bd9Sstevel@tonic-gate 			/* Not streamhead */
3467*7c478bd9Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
3468*7c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
3469*7c478bd9Sstevel@tonic-gate 	}
3470*7c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
3471*7c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
3472*7c478bd9Sstevel@tonic-gate 		/*
3473*7c478bd9Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
3474*7c478bd9Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
3475*7c478bd9Sstevel@tonic-gate 		 */
3476*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3477*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3478*7c478bd9Sstevel@tonic-gate 	}
3479*7c478bd9Sstevel@tonic-gate 
3480*7c478bd9Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
3481*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3482*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3483*7c478bd9Sstevel@tonic-gate 	}
3484*7c478bd9Sstevel@tonic-gate 
3485*7c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
3486*7c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3487*7c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3488*7c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
3489*7c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
3490*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
3491*7c478bd9Sstevel@tonic-gate 
3492*7c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
3493*7c478bd9Sstevel@tonic-gate 		/*
3494*7c478bd9Sstevel@tonic-gate 		 * if this queue is being closed, return.
3495*7c478bd9Sstevel@tonic-gate 		 */
3496*7c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
3497*7c478bd9Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
3498*7c478bd9Sstevel@tonic-gate 			rwnext_exit(qp);
3499*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
3500*7c478bd9Sstevel@tonic-gate 		}
3501*7c478bd9Sstevel@tonic-gate 
3502*7c478bd9Sstevel@tonic-gate 		/*
3503*7c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
3504*7c478bd9Sstevel@tonic-gate 		 */
3505*7c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
3506*7c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
3507*7c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
3508*7c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
3509*7c478bd9Sstevel@tonic-gate 	}
3510*7c478bd9Sstevel@tonic-gate 
3511*7c478bd9Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
3512*7c478bd9Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
3513*7c478bd9Sstevel@tonic-gate 		/*
3514*7c478bd9Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
3515*7c478bd9Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
3516*7c478bd9Sstevel@tonic-gate 		 */
3517*7c478bd9Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
3518*7c478bd9Sstevel@tonic-gate 		rwnext_exit(qp);
3519*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3520*7c478bd9Sstevel@tonic-gate 	}
3521*7c478bd9Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
3522*7c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
3523*7c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
3524*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
3525*7c478bd9Sstevel@tonic-gate 	/*
3526*7c478bd9Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
3527*7c478bd9Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
3528*7c478bd9Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
3529*7c478bd9Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
3530*7c478bd9Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
3531*7c478bd9Sstevel@tonic-gate 	 *	 the most efficent for all cases.
3532*7c478bd9Sstevel@tonic-gate 	 */
3533*7c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3534*7c478bd9Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
3535*7c478bd9Sstevel@tonic-gate 		/*
3536*7c478bd9Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
3537*7c478bd9Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
3538*7c478bd9Sstevel@tonic-gate 		 */
3539*7c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
3540*7c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
3541*7c478bd9Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
3542*7c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
3543*7c478bd9Sstevel@tonic-gate 			rval = EWOULDBLOCK;
3544*7c478bd9Sstevel@tonic-gate 			goto out;
3545*7c478bd9Sstevel@tonic-gate 		}
3546*7c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
3547*7c478bd9Sstevel@tonic-gate 	}
3548*7c478bd9Sstevel@tonic-gate 
3549*7c478bd9Sstevel@tonic-gate 	if (! isread && dp->d_mp)
3550*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
3551*7c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
3552*7c478bd9Sstevel@tonic-gate 
3553*7c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
3554*7c478bd9Sstevel@tonic-gate 
3555*7c478bd9Sstevel@tonic-gate 	if (isread && dp->d_mp)
3556*7c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
3557*7c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
3558*7c478bd9Sstevel@tonic-gate out:
3559*7c478bd9Sstevel@tonic-gate 	/*
3560*7c478bd9Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
3561*7c478bd9Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
3562*7c478bd9Sstevel@tonic-gate 	 */
3563*7c478bd9Sstevel@tonic-gate 	rwnext_exit(qp);
3564*7c478bd9Sstevel@tonic-gate 
3565*7c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3566*7c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
3567*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
3568*7c478bd9Sstevel@tonic-gate 	sq->sq_count--;
3569*7c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
3570*7c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
3571*7c478bd9Sstevel@tonic-gate 		/*
3572*7c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
3573*7c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
3574*7c478bd9Sstevel@tonic-gate 		 */
3575*7c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
3576*7c478bd9Sstevel@tonic-gate 		return (rval);
3577*7c478bd9Sstevel@tonic-gate 	}
3578*7c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
3579*7c478bd9Sstevel@tonic-gate 	/*
3580*7c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
3581*7c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
3582*7c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
3583*7c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
3584*7c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
3585*7c478bd9Sstevel@tonic-gate 	 *
3586*7c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
3587*7c478bd9Sstevel@tonic-gate 	 *
3588*7c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
3589*7c478bd9Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
3590*7c478bd9Sstevel@tonic-gate 	 *
3591*7c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
3592*7c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
3593*7c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
3594*7c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
3595*7c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
3596*7c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
3597*7c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
3598*7c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
3599*7c478bd9Sstevel@tonic-gate 	 * test invalid.
3600*7c478bd9Sstevel@tonic-gate 	 */
3601*7c478bd9Sstevel@tonic-gate 
3602*7c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
3603*7c478bd9Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
3604*7c478bd9Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
3605*7c478bd9Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
3606*7c478bd9Sstevel@tonic-gate 	}
3607*7c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3608*7c478bd9Sstevel@tonic-gate 	return (rval);
3609*7c478bd9Sstevel@tonic-gate }
3610*7c478bd9Sstevel@tonic-gate 
3611*7c478bd9Sstevel@tonic-gate /*
3612*7c478bd9Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
3613*7c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
3614*7c478bd9Sstevel@tonic-gate  *
3615*7c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
3616*7c478bd9Sstevel@tonic-gate  *
3617*7c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
3618*7c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
3619*7c478bd9Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
3620*7c478bd9Sstevel@tonic-gate  * entered.
3621*7c478bd9Sstevel@tonic-gate  */
3622*7c478bd9Sstevel@tonic-gate int
3623*7c478bd9Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
3624*7c478bd9Sstevel@tonic-gate {
3625*7c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
3626*7c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
3627*7c478bd9Sstevel@tonic-gate 	uint16_t	count;
3628*7c478bd9Sstevel@tonic-gate 	uint16_t 	flags;
3629*7c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
3630*7c478bd9Sstevel@tonic-gate 	int		(*proc)();
3631*7c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
3632*7c478bd9Sstevel@tonic-gate 	int		rval;
3633*7c478bd9Sstevel@tonic-gate 
3634*7c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
3635*7c478bd9Sstevel@tonic-gate 	/*
3636*7c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
3637*7c478bd9Sstevel@tonic-gate 	 * acquiring SQLOCK.
3638*7c478bd9Sstevel@tonic-gate 	 */
3639*7c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
3640*7c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
3641*7c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
3642*7c478bd9Sstevel@tonic-gate 	} else {
3643*7c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
3644*7c478bd9Sstevel@tonic-gate 	}
3645*7c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
3646*7c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
3647*7c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
3648*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
3649*7c478bd9Sstevel@tonic-gate 	}
3650*7c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
3651*7c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3652*7c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
3653*7c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
3654*7c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
3655*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
3656*7c478bd9Sstevel@tonic-gate 
3657*7c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
3658*7c478bd9Sstevel@tonic-gate 		/*
3659*7c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
3660*7c478bd9Sstevel@tonic-gate 		 */
3661*7c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
3662*7c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
3663*7c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
3664*7c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
3665*7c478bd9Sstevel@tonic-gate 	}
3666*7c478bd9Sstevel@tonic-gate 
3667*7c478bd9Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
3668*7c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
3669*7c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
3670*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
3671*7c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3672*7c478bd9Sstevel@tonic-gate 
3673*7c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
3674*7c478bd9Sstevel@tonic-gate 
3675*7c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
3676*7c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
3677*7c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
3678*7c478bd9Sstevel@tonic-gate 	sq->sq_count--;
3679*7c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
3680*7c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
3681*7c478bd9Sstevel@tonic-gate 		/*
3682*7c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
3683*7c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
3684*7c478bd9Sstevel@tonic-gate 		 */
3685*7c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
3686*7c478bd9Sstevel@tonic-gate 		return (rval);
3687*7c478bd9Sstevel@tonic-gate 	}
3688*7c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
3689*7c478bd9Sstevel@tonic-gate /*
3690*7c478bd9Sstevel@tonic-gate  * XXXX
3691*7c478bd9Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
3692*7c478bd9Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
3693*7c478bd9Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
3694*7c478bd9Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
3695*7c478bd9Sstevel@tonic-gate  */
3696*7c478bd9Sstevel@tonic-gate 	/*
3697*7c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
3698*7c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
3699*7c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
3700*7c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
3701*7c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
3702*7c478bd9Sstevel@tonic-gate 	 *
3703*7c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
3704*7c478bd9Sstevel@tonic-gate 	 *
3705*7c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
3706*7c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
3707*7c478bd9Sstevel@tonic-gate 	 *
3708*7c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
3709*7c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
3710*7c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
3711*7c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
3712*7c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
3713*7c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
3714*7c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
3715*7c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
3716*7c478bd9Sstevel@tonic-gate 	 * test invalid.
3717*7c478bd9Sstevel@tonic-gate 	 */
3718*7c478bd9Sstevel@tonic-gate 
3719*7c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
3720*7c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
3721*7c478bd9Sstevel@tonic-gate 	return (rval);
3722*7c478bd9Sstevel@tonic-gate }
3723*7c478bd9Sstevel@tonic-gate 
3724*7c478bd9Sstevel@tonic-gate /*
3725*7c478bd9Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
3726*7c478bd9Sstevel@tonic-gate  */
3727*7c478bd9Sstevel@tonic-gate int
3728*7c478bd9Sstevel@tonic-gate isuioq(queue_t *q)
3729*7c478bd9Sstevel@tonic-gate {
3730*7c478bd9Sstevel@tonic-gate 	if (q->q_flag & QREADR)
3731*7c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
3732*7c478bd9Sstevel@tonic-gate 	else
3733*7c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
3734*7c478bd9Sstevel@tonic-gate }
3735*7c478bd9Sstevel@tonic-gate 
3736*7c478bd9Sstevel@tonic-gate #if defined(__sparc)
3737*7c478bd9Sstevel@tonic-gate int disable_putlocks = 0;
3738*7c478bd9Sstevel@tonic-gate #else
3739*7c478bd9Sstevel@tonic-gate int disable_putlocks = 1;
3740*7c478bd9Sstevel@tonic-gate #endif
3741*7c478bd9Sstevel@tonic-gate 
3742*7c478bd9Sstevel@tonic-gate /*
3743*7c478bd9Sstevel@tonic-gate  * called by create_putlock.
3744*7c478bd9Sstevel@tonic-gate  */
3745*7c478bd9Sstevel@tonic-gate static void
3746*7c478bd9Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
3747*7c478bd9Sstevel@tonic-gate {
3748*7c478bd9Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
3749*7c478bd9Sstevel@tonic-gate 	ciputctrl_t *cip;
3750*7c478bd9Sstevel@tonic-gate 	int i;
3751*7c478bd9Sstevel@tonic-gate 
3752*7c478bd9Sstevel@tonic-gate 	ASSERT(sq != NULL);
3753*7c478bd9Sstevel@tonic-gate 
3754*7c478bd9Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
3755*7c478bd9Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
3756*7c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
3757*7c478bd9Sstevel@tonic-gate 
3758*7c478bd9Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
3759*7c478bd9Sstevel@tonic-gate 		return;
3760*7c478bd9Sstevel@tonic-gate 
3761*7c478bd9Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
3762*7c478bd9Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
3763*7c478bd9Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
3764*7c478bd9Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
3765*7c478bd9Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
3766*7c478bd9Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
3767*7c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
3768*7c478bd9Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
3769*7c478bd9Sstevel@tonic-gate 			} else {
3770*7c478bd9Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
3771*7c478bd9Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
3772*7c478bd9Sstevel@tonic-gate 				/*
3773*7c478bd9Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
3774*7c478bd9Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
3775*7c478bd9Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
3776*7c478bd9Sstevel@tonic-gate 				 * insures that.
3777*7c478bd9Sstevel@tonic-gate 				 */
3778*7c478bd9Sstevel@tonic-gate 				membar_producer();
3779*7c478bd9Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
3780*7c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
3781*7c478bd9Sstevel@tonic-gate 			}
3782*7c478bd9Sstevel@tonic-gate 		}
3783*7c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
3784*7c478bd9Sstevel@tonic-gate 		if (i == 1)
3785*7c478bd9Sstevel@tonic-gate 			break;
3786*7c478bd9Sstevel@tonic-gate 		q = _OTHERQ(q);
3787*7c478bd9Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
3788*7c478bd9Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
3789*7c478bd9Sstevel@tonic-gate 			break;
3790*7c478bd9Sstevel@tonic-gate 		}
3791*7c478bd9Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
3792*7c478bd9Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
3793*7c478bd9Sstevel@tonic-gate 		sq = q->q_syncq;
3794*7c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
3795*7c478bd9Sstevel@tonic-gate 	}
3796*7c478bd9Sstevel@tonic-gate }
3797*7c478bd9Sstevel@tonic-gate 
3798*7c478bd9Sstevel@tonic-gate /*
3799*7c478bd9Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
3800*7c478bd9Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
3801*7c478bd9Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
3802*7c478bd9Sstevel@tonic-gate  * starting from q and down to the driver.
3803*7c478bd9Sstevel@tonic-gate  *
3804*7c478bd9Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
3805*7c478bd9Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
3806*7c478bd9Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
3807*7c478bd9Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
3808*7c478bd9Sstevel@tonic-gate  * call.
3809*7c478bd9Sstevel@tonic-gate  *
3810*7c478bd9Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
3811*7c478bd9Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
3812*7c478bd9Sstevel@tonic-gate  * (e.g. IP).
3813*7c478bd9Sstevel@tonic-gate  *
3814*7c478bd9Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
3815*7c478bd9Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
3816*7c478bd9Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
3817*7c478bd9Sstevel@tonic-gate  * particularly MT hot.
3818*7c478bd9Sstevel@tonic-gate  *
3819*7c478bd9Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
3820*7c478bd9Sstevel@tonic-gate  * q_next can be safely used.
3821*7c478bd9Sstevel@tonic-gate  */
3822*7c478bd9Sstevel@tonic-gate 
3823*7c478bd9Sstevel@tonic-gate void
3824*7c478bd9Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
3825*7c478bd9Sstevel@tonic-gate {
3826*7c478bd9Sstevel@tonic-gate 	ciputctrl_t	*cip;
3827*7c478bd9Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
3828*7c478bd9Sstevel@tonic-gate 
3829*7c478bd9Sstevel@tonic-gate 	q = _WR(q);
3830*7c478bd9Sstevel@tonic-gate 	ASSERT(stp != NULL);
3831*7c478bd9Sstevel@tonic-gate 
3832*7c478bd9Sstevel@tonic-gate 	if (disable_putlocks != 0)
3833*7c478bd9Sstevel@tonic-gate 		return;
3834*7c478bd9Sstevel@tonic-gate 
3835*7c478bd9Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
3836*7c478bd9Sstevel@tonic-gate 		return;
3837*7c478bd9Sstevel@tonic-gate 
3838*7c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
3839*7c478bd9Sstevel@tonic-gate 
3840*7c478bd9Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
3841*7c478bd9Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
3842*7c478bd9Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
3843*7c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
3844*7c478bd9Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
3845*7c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
3846*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
3847*7c478bd9Sstevel@tonic-gate 		} else {
3848*7c478bd9Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
3849*7c478bd9Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
3850*7c478bd9Sstevel@tonic-gate 			/*
3851*7c478bd9Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
3852*7c478bd9Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
3853*7c478bd9Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
3854*7c478bd9Sstevel@tonic-gate 			 * insures that.
3855*7c478bd9Sstevel@tonic-gate 			 */
3856*7c478bd9Sstevel@tonic-gate 			membar_producer();
3857*7c478bd9Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
3858*7c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
3859*7c478bd9Sstevel@tonic-gate 		}
3860*7c478bd9Sstevel@tonic-gate 	}
3861*7c478bd9Sstevel@tonic-gate 
3862*7c478bd9Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
3863*7c478bd9Sstevel@tonic-gate 
3864*7c478bd9Sstevel@tonic-gate 	while (_SAMESTR(q)) {
3865*7c478bd9Sstevel@tonic-gate 		create_syncq_putlocks(q);
3866*7c478bd9Sstevel@tonic-gate 		if (stream == 0)
3867*7c478bd9Sstevel@tonic-gate 			return;
3868*7c478bd9Sstevel@tonic-gate 		q = q->q_next;
3869*7c478bd9Sstevel@tonic-gate 	}
3870*7c478bd9Sstevel@tonic-gate 	ASSERT(q != NULL);
3871*7c478bd9Sstevel@tonic-gate 	create_syncq_putlocks(q);
3872*7c478bd9Sstevel@tonic-gate }
3873*7c478bd9Sstevel@tonic-gate 
3874*7c478bd9Sstevel@tonic-gate /*
3875*7c478bd9Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
3876*7c478bd9Sstevel@tonic-gate  * through a stream.
3877*7c478bd9Sstevel@tonic-gate  *
3878*7c478bd9Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
3879*7c478bd9Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
3880*7c478bd9Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
3881*7c478bd9Sstevel@tonic-gate  * by STREAMS modules and drivers.
3882*7c478bd9Sstevel@tonic-gate  *
3883*7c478bd9Sstevel@tonic-gate  * Global objects:
3884*7c478bd9Sstevel@tonic-gate  *
3885*7c478bd9Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
3886*7c478bd9Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
3887*7c478bd9Sstevel@tonic-gate  *
3888*7c478bd9Sstevel@tonic-gate  * Local objects:
3889*7c478bd9Sstevel@tonic-gate  *
3890*7c478bd9Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
3891*7c478bd9Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
3892*7c478bd9Sstevel@tonic-gate  */
3893*7c478bd9Sstevel@tonic-gate 
3894*7c478bd9Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
3895*7c478bd9Sstevel@tonic-gate 
3896*7c478bd9Sstevel@tonic-gate void
3897*7c478bd9Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
3898*7c478bd9Sstevel@tonic-gate {
3899*7c478bd9Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
3900*7c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
3901*7c478bd9Sstevel@tonic-gate 	ftevnt_t *ep;
3902*7c478bd9Sstevel@tonic-gate 	int ix, nix;
3903*7c478bd9Sstevel@tonic-gate 
3904*7c478bd9Sstevel@tonic-gate 	ASSERT(hp != NULL);
3905*7c478bd9Sstevel@tonic-gate 
3906*7c478bd9Sstevel@tonic-gate 	for (;;) {
3907*7c478bd9Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
3908*7c478bd9Sstevel@tonic-gate 			/*
3909*7c478bd9Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
3910*7c478bd9Sstevel@tonic-gate 			 *
3911*7c478bd9Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
3912*7c478bd9Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
3913*7c478bd9Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
3914*7c478bd9Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
3915*7c478bd9Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
3916*7c478bd9Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
3917*7c478bd9Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
3918*7c478bd9Sstevel@tonic-gate 			 * got here first, so free the block and start
3919*7c478bd9Sstevel@tonic-gate 			 * again.
3920*7c478bd9Sstevel@tonic-gate 			 */
3921*7c478bd9Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
3922*7c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP))) {
3923*7c478bd9Sstevel@tonic-gate 				/* no mem, so punt */
3924*7c478bd9Sstevel@tonic-gate 				str_ftnever++;
3925*7c478bd9Sstevel@tonic-gate 				/* free up all flow data? */
3926*7c478bd9Sstevel@tonic-gate 				return;
3927*7c478bd9Sstevel@tonic-gate 			}
3928*7c478bd9Sstevel@tonic-gate 			nbp->nxt = NULL;
3929*7c478bd9Sstevel@tonic-gate 			nbp->ix = 1;
3930*7c478bd9Sstevel@tonic-gate 			/*
3931*7c478bd9Sstevel@tonic-gate 			 * Just in case there is another thread about
3932*7c478bd9Sstevel@tonic-gate 			 * to get the next index, we need to make sure
3933*7c478bd9Sstevel@tonic-gate 			 * the value is there for it.
3934*7c478bd9Sstevel@tonic-gate 			 */
3935*7c478bd9Sstevel@tonic-gate 			membar_producer();
3936*7c478bd9Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
3937*7c478bd9Sstevel@tonic-gate 				/* CAS was successful */
3938*7c478bd9Sstevel@tonic-gate 				bp->nxt = nbp;
3939*7c478bd9Sstevel@tonic-gate 				membar_producer();
3940*7c478bd9Sstevel@tonic-gate 				bp = nbp;
3941*7c478bd9Sstevel@tonic-gate 				ix = 0;
3942*7c478bd9Sstevel@tonic-gate 				goto cas_good;
3943*7c478bd9Sstevel@tonic-gate 			} else {
3944*7c478bd9Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
3945*7c478bd9Sstevel@tonic-gate 				bp = hp->tail;
3946*7c478bd9Sstevel@tonic-gate 				continue;
3947*7c478bd9Sstevel@tonic-gate 			}
3948*7c478bd9Sstevel@tonic-gate 		}
3949*7c478bd9Sstevel@tonic-gate 		nix = ix + 1;
3950*7c478bd9Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
3951*7c478bd9Sstevel@tonic-gate 		cas_good:
3952*7c478bd9Sstevel@tonic-gate 			if (curthread != hp->thread) {
3953*7c478bd9Sstevel@tonic-gate 				hp->thread = curthread;
3954*7c478bd9Sstevel@tonic-gate 				evnt |= FTEV_CS;
3955*7c478bd9Sstevel@tonic-gate 			}
3956*7c478bd9Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
3957*7c478bd9Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
3958*7c478bd9Sstevel@tonic-gate 				evnt |= FTEV_PS;
3959*7c478bd9Sstevel@tonic-gate 			}
3960*7c478bd9Sstevel@tonic-gate 			ep = &bp->ev[ix];
3961*7c478bd9Sstevel@tonic-gate 			break;
3962*7c478bd9Sstevel@tonic-gate 		}
3963*7c478bd9Sstevel@tonic-gate 	}
3964*7c478bd9Sstevel@tonic-gate 
3965*7c478bd9Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
3966*7c478bd9Sstevel@tonic-gate 		queue_t *qp = p;
3967*7c478bd9Sstevel@tonic-gate 
3968*7c478bd9Sstevel@tonic-gate 		/*
3969*7c478bd9Sstevel@tonic-gate 		 * It is possible that the module info is broke
3970*7c478bd9Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
3971*7c478bd9Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
3972*7c478bd9Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
3973*7c478bd9Sstevel@tonic-gate 		 */
3974*7c478bd9Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
3975*7c478bd9Sstevel@tonic-gate 			ep->mid = "NONAME";
3976*7c478bd9Sstevel@tonic-gate 		else
3977*7c478bd9Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
3978*7c478bd9Sstevel@tonic-gate 
3979*7c478bd9Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
3980*7c478bd9Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
3981*7c478bd9Sstevel@tonic-gate 	} else {
3982*7c478bd9Sstevel@tonic-gate 		ep->mid = (char *)p;
3983*7c478bd9Sstevel@tonic-gate 	}
3984*7c478bd9Sstevel@tonic-gate 
3985*7c478bd9Sstevel@tonic-gate 	ep->ts = gethrtime();
3986*7c478bd9Sstevel@tonic-gate 	ep->evnt = evnt;
3987*7c478bd9Sstevel@tonic-gate 	ep->data = data;
3988*7c478bd9Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
3989*7c478bd9Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
3990*7c478bd9Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
3991*7c478bd9Sstevel@tonic-gate }
3992*7c478bd9Sstevel@tonic-gate 
3993*7c478bd9Sstevel@tonic-gate /*
3994*7c478bd9Sstevel@tonic-gate  * Free flow-trace data.
3995*7c478bd9Sstevel@tonic-gate  */
3996*7c478bd9Sstevel@tonic-gate void
3997*7c478bd9Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
3998*7c478bd9Sstevel@tonic-gate {
3999*7c478bd9Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
4000*7c478bd9Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
4001*7c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
4002*7c478bd9Sstevel@tonic-gate 
4003*7c478bd9Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
4004*7c478bd9Sstevel@tonic-gate 		/*
4005*7c478bd9Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
4006*7c478bd9Sstevel@tonic-gate 		 * any continuation blocks.
4007*7c478bd9Sstevel@tonic-gate 		 */
4008*7c478bd9Sstevel@tonic-gate 		bp = hp->first.nxt;
4009*7c478bd9Sstevel@tonic-gate 		hp->tail = &hp->first;
4010*7c478bd9Sstevel@tonic-gate 		hp->hash = 0;
4011*7c478bd9Sstevel@tonic-gate 		hp->first.nxt = NULL;
4012*7c478bd9Sstevel@tonic-gate 		hp->first.ix = 0;
4013*7c478bd9Sstevel@tonic-gate 		while (bp != NULL) {
4014*7c478bd9Sstevel@tonic-gate 			nbp = bp->nxt;
4015*7c478bd9Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
4016*7c478bd9Sstevel@tonic-gate 			bp = nbp;
4017*7c478bd9Sstevel@tonic-gate 		}
4018*7c478bd9Sstevel@tonic-gate 	}
4019*7c478bd9Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
4020*7c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
4021*7c478bd9Sstevel@tonic-gate }
4022