xref: /titanic_53/usr/src/uts/common/io/stream.c (revision b5fca8f855054d167d04d3b4de5210c83ed2083c)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5381a2a9aSdr146992  * Common Development and Distribution License (the "License").
6381a2a9aSdr146992  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
227c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
237c478bd9Sstevel@tonic-gate 
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate /*
2617169044Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
277c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
287c478bd9Sstevel@tonic-gate  */
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <sys/param.h>
347c478bd9Sstevel@tonic-gate #include <sys/thread.h>
357c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
367c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
377c478bd9Sstevel@tonic-gate #include <sys/stream.h>
387c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
397c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
407c478bd9Sstevel@tonic-gate #include <sys/conf.h>
417c478bd9Sstevel@tonic-gate #include <sys/debug.h>
427c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
437c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
447c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
457c478bd9Sstevel@tonic-gate #include <sys/errno.h>
467c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
477c478bd9Sstevel@tonic-gate #include <sys/ftrace.h>
487c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
497c478bd9Sstevel@tonic-gate #include <sys/multidata.h>
507c478bd9Sstevel@tonic-gate #include <sys/multidata_impl.h>
517c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
52ae35285aSmeem #include <sys/strft.h>
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate #ifdef DEBUG
557c478bd9Sstevel@tonic-gate #include <sys/kmem_impl.h>
567c478bd9Sstevel@tonic-gate #endif
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate /*
597c478bd9Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
607c478bd9Sstevel@tonic-gate  * be used by modules and drivers.
617c478bd9Sstevel@tonic-gate  */
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate /*
647c478bd9Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
677c478bd9Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
687c478bd9Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
697c478bd9Sstevel@tonic-gate  * of how the allocator works.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
727c478bd9Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
737c478bd9Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
747c478bd9Sstevel@tonic-gate  * message is allocated:
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
777c478bd9Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
787c478bd9Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
797c478bd9Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
807c478bd9Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
817c478bd9Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
827c478bd9Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
837c478bd9Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
847c478bd9Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
877c478bd9Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
887c478bd9Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
917c478bd9Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
927c478bd9Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
937c478bd9Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
947c478bd9Sstevel@tonic-gate  *
957c478bd9Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
967c478bd9Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
977c478bd9Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
987c478bd9Sstevel@tonic-gate  *
997c478bd9Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
1007c478bd9Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
1017c478bd9Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
1027c478bd9Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
1037c478bd9Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
1047c478bd9Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
1057c478bd9Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1067c478bd9Sstevel@tonic-gate  *
1077c478bd9Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
1087c478bd9Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
1097c478bd9Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
1107c478bd9Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
1117c478bd9Sstevel@tonic-gate  *
1127c478bd9Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
1137c478bd9Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
1147c478bd9Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
1157c478bd9Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
1167c478bd9Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
1177c478bd9Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
1187c478bd9Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
1197c478bd9Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
1207c478bd9Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
1217c478bd9Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
1227c478bd9Sstevel@tonic-gate  *
1237c478bd9Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
1247c478bd9Sstevel@tonic-gate  * They represent a good trade-off between internal and external
1257c478bd9Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
1267c478bd9Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
1277c478bd9Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
1287c478bd9Sstevel@tonic-gate  * straddle cache lines unnecessarily.
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
1317c478bd9Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
1327c478bd9Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
1337c478bd9Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1367c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1377c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1387c478bd9Sstevel@tonic-gate #else
1397c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1407c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1417c478bd9Sstevel@tonic-gate #endif
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
1447c478bd9Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1457c478bd9Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
1467c478bd9Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1477c478bd9Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1487c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1497c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
1507c478bd9Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate static size_t dblk_sizes[] = {
1537c478bd9Sstevel@tonic-gate #ifdef _LP64
154*b5fca8f8Stomee 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
155*b5fca8f8Stomee 	8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
156*b5fca8f8Stomee 	40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
1577c478bd9Sstevel@tonic-gate #else
158*b5fca8f8Stomee 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
159*b5fca8f8Stomee 	8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
160*b5fca8f8Stomee 	40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
1617c478bd9Sstevel@tonic-gate #endif
1627c478bd9Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
1637c478bd9Sstevel@tonic-gate };
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1667c478bd9Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1677c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1687c478bd9Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1697c478bd9Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1727c478bd9Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1737c478bd9Sstevel@tonic-gate static int allocb_tryhard_fails;
1747c478bd9Sstevel@tonic-gate static void frnop_func(void *arg);
1757c478bd9Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1767c478bd9Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1797c478bd9Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate /*
1827c478bd9Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
1837c478bd9Sstevel@tonic-gate  */
1847c478bd9Sstevel@tonic-gate int dblk_kmem_flags = 0;
1857c478bd9Sstevel@tonic-gate int mblk_kmem_flags = 0;
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate static int
1897c478bd9Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1907c478bd9Sstevel@tonic-gate {
1917c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
1927c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
1937c478bd9Sstevel@tonic-gate 	size_t index;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1987c478bd9Sstevel@tonic-gate 
199e4506d67Smeem 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2027c478bd9Sstevel@tonic-gate 		return (-1);
2037c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2047c478bd9Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
2057c478bd9Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
2067c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
2077c478bd9Sstevel@tonic-gate 			return (-1);
2087c478bd9Sstevel@tonic-gate 		}
2097c478bd9Sstevel@tonic-gate 	} else {
2107c478bd9Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
2117c478bd9Sstevel@tonic-gate 	}
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2147c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
2157c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
2167c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2177c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2187c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2197c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2207c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2217c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2227c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2237c478bd9Sstevel@tonic-gate 	return (0);
2247c478bd9Sstevel@tonic-gate }
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2277c478bd9Sstevel@tonic-gate static int
2287c478bd9Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2297c478bd9Sstevel@tonic-gate {
2307c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2337c478bd9Sstevel@tonic-gate 		return (-1);
2347c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2357c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
2367c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2377c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2387c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2397c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2407c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2417c478bd9Sstevel@tonic-gate 	return (0);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate static int
2457c478bd9Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2467c478bd9Sstevel@tonic-gate {
2477c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2487c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2517c478bd9Sstevel@tonic-gate 		return (-1);
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
2547c478bd9Sstevel@tonic-gate 	    kmflags)) == NULL) {
2557c478bd9Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
2567c478bd9Sstevel@tonic-gate 		return (-1);
2577c478bd9Sstevel@tonic-gate 	}
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2607c478bd9Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
2617c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
2627c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2637c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2647c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2657c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2667c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2677c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2687c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2697c478bd9Sstevel@tonic-gate 	return (0);
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2737c478bd9Sstevel@tonic-gate static void
2747c478bd9Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2757c478bd9Sstevel@tonic-gate {
2767c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2777c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2847c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2877c478bd9Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
2887c478bd9Sstevel@tonic-gate 	}
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
2917c478bd9Sstevel@tonic-gate }
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate static void
2947c478bd9Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2957c478bd9Sstevel@tonic-gate {
2967c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2977c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
3047c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate void
3107c478bd9Sstevel@tonic-gate streams_msg_init(void)
3117c478bd9Sstevel@tonic-gate {
3127c478bd9Sstevel@tonic-gate 	char name[40];
3137c478bd9Sstevel@tonic-gate 	size_t size;
3147c478bd9Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
3157c478bd9Sstevel@tonic-gate 	size_t *sizep;
3167c478bd9Sstevel@tonic-gate 	struct kmem_cache *cp;
3177c478bd9Sstevel@tonic-gate 	size_t tot_size;
3187c478bd9Sstevel@tonic-gate 	int offset;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
3217c478bd9Sstevel@tonic-gate 	    sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
3227c478bd9Sstevel@tonic-gate 	    mblk_kmem_flags);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
3277c478bd9Sstevel@tonic-gate 			/*
3287c478bd9Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
3297c478bd9Sstevel@tonic-gate 			 * be allocated on the same page
3307c478bd9Sstevel@tonic-gate 			 */
3317c478bd9Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
3327c478bd9Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3337c478bd9Sstevel@tonic-gate 			    < PAGESIZE);
3347c478bd9Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 		} else {
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 			/*
3397c478bd9Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
3407c478bd9Sstevel@tonic-gate 			 * buffer are allocated separately.
3417c478bd9Sstevel@tonic-gate 			 */
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3447c478bd9Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
3457c478bd9Sstevel@tonic-gate 		}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
3487c478bd9Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
3497c478bd9Sstevel@tonic-gate 		    DBLK_CACHE_ALIGN, dblk_constructor,
3507c478bd9Sstevel@tonic-gate 		    dblk_destructor, NULL,
3517c478bd9Sstevel@tonic-gate 		    (void *)(size), NULL, dblk_kmem_flags);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 		while (lastsize <= size) {
3547c478bd9Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3557c478bd9Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
3567c478bd9Sstevel@tonic-gate 		}
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
3607c478bd9Sstevel@tonic-gate 	    sizeof (dblk_t), DBLK_CACHE_ALIGN,
3617c478bd9Sstevel@tonic-gate 	    dblk_esb_constructor, dblk_destructor, NULL,
3627c478bd9Sstevel@tonic-gate 	    (void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
3637c478bd9Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
3647c478bd9Sstevel@tonic-gate 	    sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3657c478bd9Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
3667c478bd9Sstevel@tonic-gate 	    sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	/* Initialize Multidata caches */
3697c478bd9Sstevel@tonic-gate 	mmd_init();
370e7d4b76fSss146032 
371e7d4b76fSss146032 	/* initialize throttling queue for esballoc */
372e7d4b76fSss146032 	esballoc_queue_init();
3737c478bd9Sstevel@tonic-gate }
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3767c478bd9Sstevel@tonic-gate mblk_t *
3777c478bd9Sstevel@tonic-gate allocb(size_t size, uint_t pri)
3787c478bd9Sstevel@tonic-gate {
3797c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
3807c478bd9Sstevel@tonic-gate 	mblk_t *mp;
3817c478bd9Sstevel@tonic-gate 	size_t index;
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
3867c478bd9Sstevel@tonic-gate 		if (size != 0) {
3877c478bd9Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
3887c478bd9Sstevel@tonic-gate 			goto out;
3897c478bd9Sstevel@tonic-gate 		}
3907c478bd9Sstevel@tonic-gate 		index = 0;
3917c478bd9Sstevel@tonic-gate 	}
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
3947c478bd9Sstevel@tonic-gate 		mp = NULL;
3957c478bd9Sstevel@tonic-gate 		goto out;
3967c478bd9Sstevel@tonic-gate 	}
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
3997c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
4007c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
4017c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
4027c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
4037c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
4047c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4057c478bd9Sstevel@tonic-gate out:
4067c478bd9Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	return (mp);
4097c478bd9Sstevel@tonic-gate }
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate mblk_t *
4127c478bd9Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4137c478bd9Sstevel@tonic-gate {
4147c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
4177c478bd9Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
4187c478bd9Sstevel@tonic-gate 		if (cr != NULL)
4197c478bd9Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
4207c478bd9Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
4217c478bd9Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
4227c478bd9Sstevel@tonic-gate 	}
4237c478bd9Sstevel@tonic-gate 	return (mp);
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate mblk_t *
4277c478bd9Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
4287c478bd9Sstevel@tonic-gate {
4297c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4327c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 	return (mp);
4357c478bd9Sstevel@tonic-gate }
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate mblk_t *
4387c478bd9Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
4397c478bd9Sstevel@tonic-gate {
4407c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4437c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate 	return (mp);
4467c478bd9Sstevel@tonic-gate }
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate void
4497c478bd9Sstevel@tonic-gate freeb(mblk_t *mp)
4507c478bd9Sstevel@tonic-gate {
4517c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
4547c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4557c478bd9Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
4607c478bd9Sstevel@tonic-gate }
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate void
4637c478bd9Sstevel@tonic-gate freemsg(mblk_t *mp)
4647c478bd9Sstevel@tonic-gate {
4657c478bd9Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
4667c478bd9Sstevel@tonic-gate 	while (mp) {
4677c478bd9Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
4687c478bd9Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
4717c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
4767c478bd9Sstevel@tonic-gate 		mp = mp_cont;
4777c478bd9Sstevel@tonic-gate 	}
4787c478bd9Sstevel@tonic-gate }
4797c478bd9Sstevel@tonic-gate 
4807c478bd9Sstevel@tonic-gate /*
4817c478bd9Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
4827c478bd9Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
4837c478bd9Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
4847c478bd9Sstevel@tonic-gate  *
4857c478bd9Sstevel@tonic-gate  * This routine is private and unstable.
4867c478bd9Sstevel@tonic-gate  */
4877c478bd9Sstevel@tonic-gate mblk_t	*
4887c478bd9Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
4897c478bd9Sstevel@tonic-gate {
4907c478bd9Sstevel@tonic-gate 	mblk_t		*mp1;
4917c478bd9Sstevel@tonic-gate 	unsigned char	*old_rptr;
4927c478bd9Sstevel@tonic-gate 	ptrdiff_t	cur_size;
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	if (mp == NULL)
4957c478bd9Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
4987c478bd9Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
5017c478bd9Sstevel@tonic-gate 
5027c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
5037c478bd9Sstevel@tonic-gate 		/*
5047c478bd9Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
5057c478bd9Sstevel@tonic-gate 		 * work is required.
5067c478bd9Sstevel@tonic-gate 		 */
5077c478bd9Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
5087c478bd9Sstevel@tonic-gate 			return (mp);
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
5117c478bd9Sstevel@tonic-gate 		mp1 = mp;
5127c478bd9Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
5137c478bd9Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
5147c478bd9Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
5157c478bd9Sstevel@tonic-gate 	} else {
5167c478bd9Sstevel@tonic-gate 		return (NULL);
5177c478bd9Sstevel@tonic-gate 	}
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 	if (copy) {
5207c478bd9Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
5217c478bd9Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
5227c478bd9Sstevel@tonic-gate 	}
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	if (mp != mp1)
5257c478bd9Sstevel@tonic-gate 		freeb(mp);
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	return (mp1);
5287c478bd9Sstevel@tonic-gate }
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate static void
5317c478bd9Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
5327c478bd9Sstevel@tonic-gate {
5337c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
5347c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
5357c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
5387c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
5397c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
5407c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
5417c478bd9Sstevel@tonic-gate 	}
5427c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
5457c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
5467c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
5477c478bd9Sstevel@tonic-gate 
54817169044Sbrutus 	/* and the COOKED and/or UIOA flag(s) */
54917169044Sbrutus 	dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
550c28749e9Skais 
5517c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
5527c478bd9Sstevel@tonic-gate }
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate static void
5557c478bd9Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
5567c478bd9Sstevel@tonic-gate {
5577c478bd9Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
5587c478bd9Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
5597c478bd9Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
5607c478bd9Sstevel@tonic-gate 		/*
5617c478bd9Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
5627c478bd9Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
5637c478bd9Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
5647c478bd9Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
5657c478bd9Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
5667c478bd9Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
5677c478bd9Sstevel@tonic-gate 		 */
5687c478bd9Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
5697c478bd9Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
5707c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
5717c478bd9Sstevel@tonic-gate 			return;
5727c478bd9Sstevel@tonic-gate 		}
5737c478bd9Sstevel@tonic-gate 	}
5747c478bd9Sstevel@tonic-gate 	dbp->db_mblk = mp;
5757c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
5767c478bd9Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
5777c478bd9Sstevel@tonic-gate }
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate mblk_t *
5807c478bd9Sstevel@tonic-gate dupb(mblk_t *mp)
5817c478bd9Sstevel@tonic-gate {
5827c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
5837c478bd9Sstevel@tonic-gate 	mblk_t *new_mp;
5847c478bd9Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
5877c478bd9Sstevel@tonic-gate 		goto out;
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
5907c478bd9Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
5917c478bd9Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
5927c478bd9Sstevel@tonic-gate 	new_mp->b_datap = dbp;
5937c478bd9Sstevel@tonic-gate 	new_mp->b_queue = NULL;
5947c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	dbp->db_free = dblk_decref;
5997c478bd9Sstevel@tonic-gate 	do {
6007c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
6017c478bd9Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
6027c478bd9Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
6037c478bd9Sstevel@tonic-gate 		/*
6047c478bd9Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
6057c478bd9Sstevel@tonic-gate 		 */
6067c478bd9Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
6077c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
6087c478bd9Sstevel@tonic-gate 			new_mp = NULL;
6097c478bd9Sstevel@tonic-gate 			goto out;
6107c478bd9Sstevel@tonic-gate 		}
6117c478bd9Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate out:
6147c478bd9Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
6157c478bd9Sstevel@tonic-gate 	return (new_mp);
6167c478bd9Sstevel@tonic-gate }
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate static void
6197c478bd9Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
6207c478bd9Sstevel@tonic-gate {
6217c478bd9Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
6247c478bd9Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
6257c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
6267c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
6297c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
6307c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
6317c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
6327c478bd9Sstevel@tonic-gate 	}
6337c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
6347c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
6357c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
6387c478bd9Sstevel@tonic-gate }
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6417c478bd9Sstevel@tonic-gate static void
6427c478bd9Sstevel@tonic-gate frnop_func(void *arg)
6437c478bd9Sstevel@tonic-gate {
6447c478bd9Sstevel@tonic-gate }
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate /*
6477c478bd9Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
6487c478bd9Sstevel@tonic-gate  */
6497c478bd9Sstevel@tonic-gate static mblk_t *
6507c478bd9Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
6517c478bd9Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
6527c478bd9Sstevel@tonic-gate {
6537c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
6547c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
6597c478bd9Sstevel@tonic-gate 		mp = NULL;
6607c478bd9Sstevel@tonic-gate 		goto out;
6617c478bd9Sstevel@tonic-gate 	}
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
6647c478bd9Sstevel@tonic-gate 	dbp->db_base = base;
6657c478bd9Sstevel@tonic-gate 	dbp->db_lim = base + size;
6667c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
6677c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = frp;
6687c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
6697c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
6707c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
6717c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
6727c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate out:
6757c478bd9Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
6767c478bd9Sstevel@tonic-gate 	return (mp);
6777c478bd9Sstevel@tonic-gate }
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6807c478bd9Sstevel@tonic-gate mblk_t *
6817c478bd9Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
6827c478bd9Sstevel@tonic-gate {
6837c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 	/*
6867c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
6877c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
6887c478bd9Sstevel@tonic-gate 	 * call optimization.
6897c478bd9Sstevel@tonic-gate 	 */
6907c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
6917c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
6927c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate 		if (mp != NULL)
6957c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
6967c478bd9Sstevel@tonic-gate 		return (mp);
6977c478bd9Sstevel@tonic-gate 	}
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7007c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7017c478bd9Sstevel@tonic-gate }
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate /*
7047c478bd9Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
7057c478bd9Sstevel@tonic-gate  */
7067c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7077c478bd9Sstevel@tonic-gate mblk_t *
7087c478bd9Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7097c478bd9Sstevel@tonic-gate {
7107c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 	/*
7137c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7147c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7157c478bd9Sstevel@tonic-gate 	 * call optimization.
7167c478bd9Sstevel@tonic-gate 	 */
7177c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7187c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7197c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
7207c478bd9Sstevel@tonic-gate 
7217c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7227c478bd9Sstevel@tonic-gate 		return (mp);
7237c478bd9Sstevel@tonic-gate 	}
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7267c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
7277c478bd9Sstevel@tonic-gate }
7287c478bd9Sstevel@tonic-gate 
7297c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7307c478bd9Sstevel@tonic-gate mblk_t *
7317c478bd9Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7327c478bd9Sstevel@tonic-gate {
7337c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 	/*
7367c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7377c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7387c478bd9Sstevel@tonic-gate 	 * call optimization.
7397c478bd9Sstevel@tonic-gate 	 */
7407c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7417c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7427c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7457c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
7467c478bd9Sstevel@tonic-gate 		return (mp);
7477c478bd9Sstevel@tonic-gate 	}
7487c478bd9Sstevel@tonic-gate 
7497c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7507c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7547c478bd9Sstevel@tonic-gate mblk_t *
7557c478bd9Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7567c478bd9Sstevel@tonic-gate {
7577c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7587c478bd9Sstevel@tonic-gate 
7597c478bd9Sstevel@tonic-gate 	/*
7607c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7617c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7627c478bd9Sstevel@tonic-gate 	 * call optimization.
7637c478bd9Sstevel@tonic-gate 	 */
7647c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7657c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7667c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7697c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
7707c478bd9Sstevel@tonic-gate 		return (mp);
7717c478bd9Sstevel@tonic-gate 	}
7727c478bd9Sstevel@tonic-gate 
7737c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7747c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7757c478bd9Sstevel@tonic-gate }
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7787c478bd9Sstevel@tonic-gate mblk_t *
7797c478bd9Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7807c478bd9Sstevel@tonic-gate {
7817c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 	/*
7847c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7857c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7867c478bd9Sstevel@tonic-gate 	 * call optimization.
7877c478bd9Sstevel@tonic-gate 	 */
7887c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7897c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7907c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7937c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
7947c478bd9Sstevel@tonic-gate 		return (mp);
7957c478bd9Sstevel@tonic-gate 	}
7967c478bd9Sstevel@tonic-gate 
7977c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7987c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
7997c478bd9Sstevel@tonic-gate }
8007c478bd9Sstevel@tonic-gate 
8017c478bd9Sstevel@tonic-gate static void
8027c478bd9Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
8037c478bd9Sstevel@tonic-gate {
8047c478bd9Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
8077c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
8087c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
8117c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
8127c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
8137c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
8147c478bd9Sstevel@tonic-gate 	}
8157c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
8167c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
8177c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
8187c478bd9Sstevel@tonic-gate 
8197c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8207c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
8217c478bd9Sstevel@tonic-gate 	bcp->alloc--;
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
8247c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8257c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8267c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8277c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8287c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8297c478bd9Sstevel@tonic-gate 	} else {
8307c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8317c478bd9Sstevel@tonic-gate 	}
8327c478bd9Sstevel@tonic-gate }
8337c478bd9Sstevel@tonic-gate 
8347c478bd9Sstevel@tonic-gate bcache_t *
8357c478bd9Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
8367c478bd9Sstevel@tonic-gate {
8377c478bd9Sstevel@tonic-gate 	bcache_t *bcp;
8387c478bd9Sstevel@tonic-gate 	char buffer[255];
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
8417c478bd9Sstevel@tonic-gate 
8427c478bd9Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
8437c478bd9Sstevel@tonic-gate 	    NULL) {
8447c478bd9Sstevel@tonic-gate 		return (NULL);
8457c478bd9Sstevel@tonic-gate 	}
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate 	bcp->size = size;
8487c478bd9Sstevel@tonic-gate 	bcp->align = align;
8497c478bd9Sstevel@tonic-gate 	bcp->alloc = 0;
8507c478bd9Sstevel@tonic-gate 	bcp->destroy = 0;
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
8557c478bd9Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
8567c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
8577c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
8587c478bd9Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
8597c478bd9Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
8607c478bd9Sstevel@tonic-gate 	    NULL, (void *)bcp, NULL, 0);
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate 	return (bcp);
8637c478bd9Sstevel@tonic-gate }
8647c478bd9Sstevel@tonic-gate 
8657c478bd9Sstevel@tonic-gate void
8667c478bd9Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
8677c478bd9Sstevel@tonic-gate {
8687c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8717c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0) {
8727c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8737c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8747c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8757c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8767c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8777c478bd9Sstevel@tonic-gate 	} else {
8787c478bd9Sstevel@tonic-gate 		bcp->destroy++;
8797c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8807c478bd9Sstevel@tonic-gate 	}
8817c478bd9Sstevel@tonic-gate }
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8847c478bd9Sstevel@tonic-gate mblk_t *
8857c478bd9Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
8867c478bd9Sstevel@tonic-gate {
8877c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
8887c478bd9Sstevel@tonic-gate 	mblk_t *mp = NULL;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8917c478bd9Sstevel@tonic-gate 
8927c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8937c478bd9Sstevel@tonic-gate 	if (bcp->destroy != 0) {
8947c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8957c478bd9Sstevel@tonic-gate 		goto out;
8967c478bd9Sstevel@tonic-gate 	}
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
8997c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9007c478bd9Sstevel@tonic-gate 		goto out;
9017c478bd9Sstevel@tonic-gate 	}
9027c478bd9Sstevel@tonic-gate 	bcp->alloc++;
9037c478bd9Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
9087c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
9097c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
9107c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
9117c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
9127c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
9137c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
9147c478bd9Sstevel@tonic-gate out:
9157c478bd9Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 	return (mp);
9187c478bd9Sstevel@tonic-gate }
9197c478bd9Sstevel@tonic-gate 
9207c478bd9Sstevel@tonic-gate static void
9217c478bd9Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
9227c478bd9Sstevel@tonic-gate {
9237c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
9247c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
9257c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
9287c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
9297c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
9307c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
9317c478bd9Sstevel@tonic-gate 	}
9327c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
9337c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
9347c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
9357c478bd9Sstevel@tonic-gate 
9367c478bd9Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
9377c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
9387c478bd9Sstevel@tonic-gate }
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate static mblk_t *
9417c478bd9Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
9427c478bd9Sstevel@tonic-gate {
9437c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9447c478bd9Sstevel@tonic-gate 	void *buf;
9457c478bd9Sstevel@tonic-gate 
9467c478bd9Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
9477c478bd9Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
9487c478bd9Sstevel@tonic-gate 		return (NULL);
9497c478bd9Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
9507c478bd9Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
9517c478bd9Sstevel@tonic-gate 		kmem_free(buf, size);
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	if (mp != NULL)
9547c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
9557c478bd9Sstevel@tonic-gate 
9567c478bd9Sstevel@tonic-gate 	return (mp);
9577c478bd9Sstevel@tonic-gate }
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate mblk_t *
9607c478bd9Sstevel@tonic-gate allocb_tryhard(size_t target_size)
9617c478bd9Sstevel@tonic-gate {
9627c478bd9Sstevel@tonic-gate 	size_t size;
9637c478bd9Sstevel@tonic-gate 	mblk_t *bp;
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
9667c478bd9Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
9677c478bd9Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
9687c478bd9Sstevel@tonic-gate 			return (bp);
9697c478bd9Sstevel@tonic-gate 	allocb_tryhard_fails++;
9707c478bd9Sstevel@tonic-gate 	return (NULL);
9717c478bd9Sstevel@tonic-gate }
9727c478bd9Sstevel@tonic-gate 
9737c478bd9Sstevel@tonic-gate /*
9747c478bd9Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
9757c478bd9Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
9767c478bd9Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
9777c478bd9Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
9787c478bd9Sstevel@tonic-gate  * allocb() implementation details in header files.
9797c478bd9Sstevel@tonic-gate  */
9807c478bd9Sstevel@tonic-gate mblk_t *
9817c478bd9Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
9827c478bd9Sstevel@tonic-gate {
9837c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
9847c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9857c478bd9Sstevel@tonic-gate 	size_t index;
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
9887c478bd9Sstevel@tonic-gate 
9897c478bd9Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
9907c478bd9Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
9917c478bd9Sstevel@tonic-gate 			if (size != 0) {
9927c478bd9Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
9937c478bd9Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
9947c478bd9Sstevel@tonic-gate 				    (uintptr_t)mp);
9957c478bd9Sstevel@tonic-gate 				return (mp);
9967c478bd9Sstevel@tonic-gate 			}
9977c478bd9Sstevel@tonic-gate 			index = 0;
9987c478bd9Sstevel@tonic-gate 		}
9997c478bd9Sstevel@tonic-gate 
10007c478bd9Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
10017c478bd9Sstevel@tonic-gate 		mp = dbp->db_mblk;
10027c478bd9Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
10037c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
10047c478bd9Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
10057c478bd9Sstevel@tonic-gate 		mp->b_queue = NULL;
10067c478bd9Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
10077c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
10087c478bd9Sstevel@tonic-gate 
10097c478bd9Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate 	} else {
10127c478bd9Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
10137c478bd9Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
10147c478bd9Sstevel@tonic-gate 				return (NULL);
10157c478bd9Sstevel@tonic-gate 		}
10167c478bd9Sstevel@tonic-gate 	}
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	return (mp);
10197c478bd9Sstevel@tonic-gate }
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate /*
10227c478bd9Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
10237c478bd9Sstevel@tonic-gate  * be allocated with priority 'pri'.
10247c478bd9Sstevel@tonic-gate  */
10257c478bd9Sstevel@tonic-gate bufcall_id_t
10267c478bd9Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
10277c478bd9Sstevel@tonic-gate {
10287c478bd9Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
10297c478bd9Sstevel@tonic-gate }
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate /*
10327c478bd9Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
10337c478bd9Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
10347c478bd9Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
10357c478bd9Sstevel@tonic-gate  */
10367c478bd9Sstevel@tonic-gate mblk_t *
10377c478bd9Sstevel@tonic-gate mkiocb(uint_t cmd)
10387c478bd9Sstevel@tonic-gate {
10397c478bd9Sstevel@tonic-gate 	struct iocblk	*ioc;
10407c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate 	/*
10437c478bd9Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
10447c478bd9Sstevel@tonic-gate 	 */
10457c478bd9Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
10467c478bd9Sstevel@tonic-gate 		return (NULL);
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 	/*
10517c478bd9Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
10527c478bd9Sstevel@tonic-gate 	 */
10537c478bd9Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
10547c478bd9Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate 	/*
10577c478bd9Sstevel@tonic-gate 	 * Fill in the fields.
10587c478bd9Sstevel@tonic-gate 	 */
10597c478bd9Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
10607c478bd9Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
10617c478bd9Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
10627c478bd9Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
10637c478bd9Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
10647c478bd9Sstevel@tonic-gate 	return (mp);
10657c478bd9Sstevel@tonic-gate }
10667c478bd9Sstevel@tonic-gate 
10677c478bd9Sstevel@tonic-gate /*
10687c478bd9Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
10697c478bd9Sstevel@tonic-gate  * the given priority.
10707c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10717c478bd9Sstevel@tonic-gate  */
10727c478bd9Sstevel@tonic-gate /* ARGSUSED */
10737c478bd9Sstevel@tonic-gate int
10747c478bd9Sstevel@tonic-gate testb(size_t size, uint_t pri)
10757c478bd9Sstevel@tonic-gate {
10767c478bd9Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
10777c478bd9Sstevel@tonic-gate }
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate /*
10807c478bd9Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
10817c478bd9Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
10827c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10837c478bd9Sstevel@tonic-gate  */
10847c478bd9Sstevel@tonic-gate /* ARGSUSED */
10857c478bd9Sstevel@tonic-gate bufcall_id_t
10867c478bd9Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
10877c478bd9Sstevel@tonic-gate {
10887c478bd9Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
10897c478bd9Sstevel@tonic-gate 	bufcall_id_t bc_id;
10907c478bd9Sstevel@tonic-gate 	struct strbufcall *bcp;
10917c478bd9Sstevel@tonic-gate 
10927c478bd9Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
10937c478bd9Sstevel@tonic-gate 		return (0);
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 	bcp->bc_func = func;
10967c478bd9Sstevel@tonic-gate 	bcp->bc_arg = arg;
10977c478bd9Sstevel@tonic-gate 	bcp->bc_size = size;
10987c478bd9Sstevel@tonic-gate 	bcp->bc_next = NULL;
10997c478bd9Sstevel@tonic-gate 	bcp->bc_executor = NULL;
11007c478bd9Sstevel@tonic-gate 
11017c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11027c478bd9Sstevel@tonic-gate 	/*
11037c478bd9Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
11047c478bd9Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
11057c478bd9Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
11067c478bd9Sstevel@tonic-gate 	 * the local var.
11077c478bd9Sstevel@tonic-gate 	 */
11087c478bd9Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	/*
11117c478bd9Sstevel@tonic-gate 	 * add newly allocated stream event to existing
11127c478bd9Sstevel@tonic-gate 	 * linked list of events.
11137c478bd9Sstevel@tonic-gate 	 */
11147c478bd9Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
11157c478bd9Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
11167c478bd9Sstevel@tonic-gate 	} else {
11177c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
11187c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
11197c478bd9Sstevel@tonic-gate 	}
11207c478bd9Sstevel@tonic-gate 
11217c478bd9Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
11227c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11237c478bd9Sstevel@tonic-gate 	return (bc_id);
11247c478bd9Sstevel@tonic-gate }
11257c478bd9Sstevel@tonic-gate 
11267c478bd9Sstevel@tonic-gate /*
11277c478bd9Sstevel@tonic-gate  * Cancel a bufcall request.
11287c478bd9Sstevel@tonic-gate  */
11297c478bd9Sstevel@tonic-gate void
11307c478bd9Sstevel@tonic-gate unbufcall(bufcall_id_t id)
11317c478bd9Sstevel@tonic-gate {
11327c478bd9Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
11337c478bd9Sstevel@tonic-gate 
11347c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11357c478bd9Sstevel@tonic-gate again:
11367c478bd9Sstevel@tonic-gate 	pbcp = NULL;
11377c478bd9Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
11387c478bd9Sstevel@tonic-gate 		if (id == bcp->bc_id)
11397c478bd9Sstevel@tonic-gate 			break;
11407c478bd9Sstevel@tonic-gate 		pbcp = bcp;
11417c478bd9Sstevel@tonic-gate 	}
11427c478bd9Sstevel@tonic-gate 	if (bcp) {
11437c478bd9Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
11447c478bd9Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
11457c478bd9Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
11467c478bd9Sstevel@tonic-gate 				goto again;
11477c478bd9Sstevel@tonic-gate 			}
11487c478bd9Sstevel@tonic-gate 		} else {
11497c478bd9Sstevel@tonic-gate 			if (pbcp)
11507c478bd9Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
11517c478bd9Sstevel@tonic-gate 			else
11527c478bd9Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
11537c478bd9Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
11547c478bd9Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
11557c478bd9Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
11567c478bd9Sstevel@tonic-gate 		}
11577c478bd9Sstevel@tonic-gate 	}
11587c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11597c478bd9Sstevel@tonic-gate }
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate /*
11627c478bd9Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
11637c478bd9Sstevel@tonic-gate  * a pointer to the duplicate message.
11647c478bd9Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
11657c478bd9Sstevel@tonic-gate  * was dup'd.
11667c478bd9Sstevel@tonic-gate  */
11677c478bd9Sstevel@tonic-gate mblk_t *
11687c478bd9Sstevel@tonic-gate dupmsg(mblk_t *bp)
11697c478bd9Sstevel@tonic-gate {
11707c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
11717c478bd9Sstevel@tonic-gate 
11727c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
11737c478bd9Sstevel@tonic-gate 		return (NULL);
11747c478bd9Sstevel@tonic-gate 
11757c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
11767c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
11777c478bd9Sstevel@tonic-gate 			freemsg(head);
11787c478bd9Sstevel@tonic-gate 			return (NULL);
11797c478bd9Sstevel@tonic-gate 		}
11807c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
11817c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
11827c478bd9Sstevel@tonic-gate 	}
11837c478bd9Sstevel@tonic-gate 	return (head);
11847c478bd9Sstevel@tonic-gate }
11857c478bd9Sstevel@tonic-gate 
11867c478bd9Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
11877c478bd9Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
11887c478bd9Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
11897c478bd9Sstevel@tonic-gate 
11907c478bd9Sstevel@tonic-gate mblk_t *
11917c478bd9Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
11927c478bd9Sstevel@tonic-gate {
11937c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
11947c478bd9Sstevel@tonic-gate 
11957c478bd9Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
11967c478bd9Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
11977c478bd9Sstevel@tonic-gate 		return (NULL);
11987c478bd9Sstevel@tonic-gate 
11997c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
12007c478bd9Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
12017c478bd9Sstevel@tonic-gate 			freemsg(head);
12027c478bd9Sstevel@tonic-gate 			return (NULL);
12037c478bd9Sstevel@tonic-gate 		}
12047c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
12057c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
12067c478bd9Sstevel@tonic-gate 	}
12077c478bd9Sstevel@tonic-gate 	return (head);
12087c478bd9Sstevel@tonic-gate }
12097c478bd9Sstevel@tonic-gate 
12107c478bd9Sstevel@tonic-gate /*
12117c478bd9Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
12127c478bd9Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
12137c478bd9Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
12147c478bd9Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
12157c478bd9Sstevel@tonic-gate  */
12167c478bd9Sstevel@tonic-gate mblk_t *
12177c478bd9Sstevel@tonic-gate copyb(mblk_t *bp)
12187c478bd9Sstevel@tonic-gate {
12197c478bd9Sstevel@tonic-gate 	mblk_t	*nbp;
12207c478bd9Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
12217c478bd9Sstevel@tonic-gate 	uchar_t *base;
12227c478bd9Sstevel@tonic-gate 	size_t	size;
12237c478bd9Sstevel@tonic-gate 	size_t	unaligned;
12247c478bd9Sstevel@tonic-gate 
12257c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
12267c478bd9Sstevel@tonic-gate 
12277c478bd9Sstevel@tonic-gate 	dp = bp->b_datap;
12287c478bd9Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
12297c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
12307c478bd9Sstevel@tonic-gate 
12317c478bd9Sstevel@tonic-gate 	/*
12327c478bd9Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
12337c478bd9Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
12347c478bd9Sstevel@tonic-gate 	 */
12357c478bd9Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
12367c478bd9Sstevel@tonic-gate 		cred_t *cr;
12377c478bd9Sstevel@tonic-gate 
12387c478bd9Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
12397c478bd9Sstevel@tonic-gate 			return (NULL);
12407c478bd9Sstevel@tonic-gate 
12417c478bd9Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
12427c478bd9Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
12437c478bd9Sstevel@tonic-gate 		ndp = nbp->b_datap;
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 		/* See comments below on potential issues. */
12467c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
12497c478bd9Sstevel@tonic-gate 		cr = dp->db_credp;
12507c478bd9Sstevel@tonic-gate 		if (cr != NULL)
12517c478bd9Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
12527c478bd9Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
12537c478bd9Sstevel@tonic-gate 		return (nbp);
12547c478bd9Sstevel@tonic-gate 	}
12557c478bd9Sstevel@tonic-gate 
12567c478bd9Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
12577c478bd9Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
12587c478bd9Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
12597c478bd9Sstevel@tonic-gate 		return (NULL);
12607c478bd9Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
12617c478bd9Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
12627c478bd9Sstevel@tonic-gate 	ndp = nbp->b_datap;
12637c478bd9Sstevel@tonic-gate 
12647c478bd9Sstevel@tonic-gate 	/*
12657c478bd9Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
12667c478bd9Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
12677c478bd9Sstevel@tonic-gate 	 * information about where this message might have been.
12687c478bd9Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
12697c478bd9Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
12707c478bd9Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
12717c478bd9Sstevel@tonic-gate 	 */
12727c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12737c478bd9Sstevel@tonic-gate 
12747c478bd9Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
12757c478bd9Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
12767c478bd9Sstevel@tonic-gate 
12777c478bd9Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
12787c478bd9Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate 	return (nbp);
12817c478bd9Sstevel@tonic-gate }
12827c478bd9Sstevel@tonic-gate 
12837c478bd9Sstevel@tonic-gate /*
12847c478bd9Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
12857c478bd9Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
12867c478bd9Sstevel@tonic-gate  */
12877c478bd9Sstevel@tonic-gate mblk_t *
12887c478bd9Sstevel@tonic-gate copymsg(mblk_t *bp)
12897c478bd9Sstevel@tonic-gate {
12907c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
12917c478bd9Sstevel@tonic-gate 
12927c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
12937c478bd9Sstevel@tonic-gate 		return (NULL);
12947c478bd9Sstevel@tonic-gate 
12957c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
12967c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
12977c478bd9Sstevel@tonic-gate 			freemsg(head);
12987c478bd9Sstevel@tonic-gate 			return (NULL);
12997c478bd9Sstevel@tonic-gate 		}
13007c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
13017c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
13027c478bd9Sstevel@tonic-gate 	}
13037c478bd9Sstevel@tonic-gate 	return (head);
13047c478bd9Sstevel@tonic-gate }
13057c478bd9Sstevel@tonic-gate 
13067c478bd9Sstevel@tonic-gate /*
13077c478bd9Sstevel@tonic-gate  * link a message block to tail of message
13087c478bd9Sstevel@tonic-gate  */
13097c478bd9Sstevel@tonic-gate void
13107c478bd9Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
13117c478bd9Sstevel@tonic-gate {
13127c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13137c478bd9Sstevel@tonic-gate 
13147c478bd9Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
13157c478bd9Sstevel@tonic-gate 		;
13167c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;
13177c478bd9Sstevel@tonic-gate }
13187c478bd9Sstevel@tonic-gate 
13197c478bd9Sstevel@tonic-gate /*
13207c478bd9Sstevel@tonic-gate  * unlink a message block from head of message
13217c478bd9Sstevel@tonic-gate  * return pointer to new message.
13227c478bd9Sstevel@tonic-gate  * NULL if message becomes empty.
13237c478bd9Sstevel@tonic-gate  */
13247c478bd9Sstevel@tonic-gate mblk_t *
13257c478bd9Sstevel@tonic-gate unlinkb(mblk_t *bp)
13267c478bd9Sstevel@tonic-gate {
13277c478bd9Sstevel@tonic-gate 	mblk_t *bp1;
13287c478bd9Sstevel@tonic-gate 
13297c478bd9Sstevel@tonic-gate 	bp1 = bp->b_cont;
13307c478bd9Sstevel@tonic-gate 	bp->b_cont = NULL;
13317c478bd9Sstevel@tonic-gate 	return (bp1);
13327c478bd9Sstevel@tonic-gate }
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate /*
13357c478bd9Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
13367c478bd9Sstevel@tonic-gate  *
13377c478bd9Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
13387c478bd9Sstevel@tonic-gate  * Return -1 if bp is not found in message.
13397c478bd9Sstevel@tonic-gate  */
13407c478bd9Sstevel@tonic-gate mblk_t *
13417c478bd9Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
13427c478bd9Sstevel@tonic-gate {
13437c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
13447c478bd9Sstevel@tonic-gate 	mblk_t *lastp = NULL;
13457c478bd9Sstevel@tonic-gate 
13467c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13477c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
13487c478bd9Sstevel@tonic-gate 		if (tmp == bp) {
13497c478bd9Sstevel@tonic-gate 			if (lastp)
13507c478bd9Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
13517c478bd9Sstevel@tonic-gate 			else
13527c478bd9Sstevel@tonic-gate 				mp = tmp->b_cont;
13537c478bd9Sstevel@tonic-gate 			tmp->b_cont = NULL;
13547c478bd9Sstevel@tonic-gate 			return (mp);
13557c478bd9Sstevel@tonic-gate 		}
13567c478bd9Sstevel@tonic-gate 		lastp = tmp;
13577c478bd9Sstevel@tonic-gate 	}
13587c478bd9Sstevel@tonic-gate 	return ((mblk_t *)-1);
13597c478bd9Sstevel@tonic-gate }
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate /*
13627c478bd9Sstevel@tonic-gate  * Concatenate and align first len bytes of common
13637c478bd9Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
13647c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
13657c478bd9Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
13667c478bd9Sstevel@tonic-gate  */
13677c478bd9Sstevel@tonic-gate int
13687c478bd9Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
13697c478bd9Sstevel@tonic-gate {
13707c478bd9Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
13717c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
13727c478bd9Sstevel@tonic-gate 	ssize_t n;
13737c478bd9Sstevel@tonic-gate 
13747c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
13757c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate 	/*
13787c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
13797c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
13807c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
13817c478bd9Sstevel@tonic-gate 	 */
13827c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
13837c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
13847c478bd9Sstevel@tonic-gate 		return (0);
13857c478bd9Sstevel@tonic-gate 
13867c478bd9Sstevel@tonic-gate 	if (len == -1) {
13877c478bd9Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
13887c478bd9Sstevel@tonic-gate 			return (1);
13897c478bd9Sstevel@tonic-gate 		len = xmsgsize(mp);
13907c478bd9Sstevel@tonic-gate 	} else {
13917c478bd9Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
13927c478bd9Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
13937c478bd9Sstevel@tonic-gate 		/*
13947c478bd9Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
13957c478bd9Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
13967c478bd9Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
13977c478bd9Sstevel@tonic-gate 		 */
13987c478bd9Sstevel@tonic-gate 		if (len <= first_mblk_len) {
13997c478bd9Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
14007c478bd9Sstevel@tonic-gate 				return (1);
14017c478bd9Sstevel@tonic-gate 			len = first_mblk_len;
14027c478bd9Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
14037c478bd9Sstevel@tonic-gate 			return (0);
14047c478bd9Sstevel@tonic-gate 	}
14057c478bd9Sstevel@tonic-gate 
14067c478bd9Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
14077c478bd9Sstevel@tonic-gate 		return (0);
14087c478bd9Sstevel@tonic-gate 
14097c478bd9Sstevel@tonic-gate 	dbp = bp->b_datap;
14107c478bd9Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
14117c478bd9Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
14127c478bd9Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
14137c478bd9Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
14147c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate 	do {
14177c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
14187c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
14197c478bd9Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
14207c478bd9Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
14217c478bd9Sstevel@tonic-gate 		mp->b_wptr += n;
14227c478bd9Sstevel@tonic-gate 		bp->b_rptr += n;
14237c478bd9Sstevel@tonic-gate 		len -= n;
14247c478bd9Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
14257c478bd9Sstevel@tonic-gate 			break;
14267c478bd9Sstevel@tonic-gate 		b_cont = bp->b_cont;
14277c478bd9Sstevel@tonic-gate 		freeb(bp);
14287c478bd9Sstevel@tonic-gate 		bp = b_cont;
14297c478bd9Sstevel@tonic-gate 	} while (len && bp);
14307c478bd9Sstevel@tonic-gate 
14317c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 	return (1);
14347c478bd9Sstevel@tonic-gate }
14357c478bd9Sstevel@tonic-gate 
14367c478bd9Sstevel@tonic-gate /*
14377c478bd9Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
14387c478bd9Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
14397c478bd9Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
14407c478bd9Sstevel@tonic-gate  * returns NULL.
14417c478bd9Sstevel@tonic-gate  */
14427c478bd9Sstevel@tonic-gate mblk_t *
14437c478bd9Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
14447c478bd9Sstevel@tonic-gate {
14457c478bd9Sstevel@tonic-gate 	mblk_t	*newmp;
14467c478bd9Sstevel@tonic-gate 	ssize_t	totlen;
14477c478bd9Sstevel@tonic-gate 	ssize_t	n;
14487c478bd9Sstevel@tonic-gate 
14497c478bd9Sstevel@tonic-gate 	/*
14507c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
14517c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
14527c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
14537c478bd9Sstevel@tonic-gate 	 */
14547c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
14557c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
14567c478bd9Sstevel@tonic-gate 		return (NULL);
14577c478bd9Sstevel@tonic-gate 
14587c478bd9Sstevel@tonic-gate 	totlen = xmsgsize(mp);
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
14617c478bd9Sstevel@tonic-gate 		return (NULL);
14627c478bd9Sstevel@tonic-gate 
14637c478bd9Sstevel@tonic-gate 	/*
14647c478bd9Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
14657c478bd9Sstevel@tonic-gate 	 * and link the rest onto this.
14667c478bd9Sstevel@tonic-gate 	 */
14677c478bd9Sstevel@tonic-gate 
14687c478bd9Sstevel@tonic-gate 	len = totlen;
14697c478bd9Sstevel@tonic-gate 
14707c478bd9Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
14717c478bd9Sstevel@tonic-gate 		return (NULL);
14727c478bd9Sstevel@tonic-gate 
14737c478bd9Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
14747c478bd9Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate 	while (len > 0) {
14777c478bd9Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
14787c478bd9Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
14797c478bd9Sstevel@tonic-gate 		if (n > 0)
14807c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
14817c478bd9Sstevel@tonic-gate 		newmp->b_wptr += n;
14827c478bd9Sstevel@tonic-gate 		len -= n;
14837c478bd9Sstevel@tonic-gate 		mp = mp->b_cont;
14847c478bd9Sstevel@tonic-gate 	}
14857c478bd9Sstevel@tonic-gate 
14867c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
14877c478bd9Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
14887c478bd9Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
14897c478bd9Sstevel@tonic-gate 			freemsg(newmp);
14907c478bd9Sstevel@tonic-gate 			return (NULL);
14917c478bd9Sstevel@tonic-gate 		}
14927c478bd9Sstevel@tonic-gate 	}
14937c478bd9Sstevel@tonic-gate 
14947c478bd9Sstevel@tonic-gate 	return (newmp);
14957c478bd9Sstevel@tonic-gate }
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate /*
14987c478bd9Sstevel@tonic-gate  * Trim bytes from message
14997c478bd9Sstevel@tonic-gate  *  len > 0, trim from head
15007c478bd9Sstevel@tonic-gate  *  len < 0, trim from tail
15017c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
15027c478bd9Sstevel@tonic-gate  */
15037c478bd9Sstevel@tonic-gate int
15047c478bd9Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
15057c478bd9Sstevel@tonic-gate {
15067c478bd9Sstevel@tonic-gate 	mblk_t *bp;
15077c478bd9Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
15087c478bd9Sstevel@tonic-gate 	mblk_t *prev_bp;
15097c478bd9Sstevel@tonic-gate 	mblk_t *bcont;
15107c478bd9Sstevel@tonic-gate 	unsigned char type;
15117c478bd9Sstevel@tonic-gate 	ssize_t n;
15127c478bd9Sstevel@tonic-gate 	int fromhead;
15137c478bd9Sstevel@tonic-gate 	int first;
15147c478bd9Sstevel@tonic-gate 
15157c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
15167c478bd9Sstevel@tonic-gate 	/*
15177c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
15187c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
15197c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
15207c478bd9Sstevel@tonic-gate 	 */
15217c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
15227c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
15237c478bd9Sstevel@tonic-gate 		return (0);
15247c478bd9Sstevel@tonic-gate 
15257c478bd9Sstevel@tonic-gate 	if (len < 0) {
15267c478bd9Sstevel@tonic-gate 		fromhead = 0;
15277c478bd9Sstevel@tonic-gate 		len = -len;
15287c478bd9Sstevel@tonic-gate 	} else {
15297c478bd9Sstevel@tonic-gate 		fromhead = 1;
15307c478bd9Sstevel@tonic-gate 	}
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
15337c478bd9Sstevel@tonic-gate 		return (0);
15347c478bd9Sstevel@tonic-gate 
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	if (fromhead) {
15377c478bd9Sstevel@tonic-gate 		first = 1;
15387c478bd9Sstevel@tonic-gate 		while (len) {
15397c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
15407c478bd9Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
15417c478bd9Sstevel@tonic-gate 			mp->b_rptr += n;
15427c478bd9Sstevel@tonic-gate 			len -= n;
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate 			/*
15457c478bd9Sstevel@tonic-gate 			 * If this is not the first zero length
15467c478bd9Sstevel@tonic-gate 			 * message remove it
15477c478bd9Sstevel@tonic-gate 			 */
15487c478bd9Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
15497c478bd9Sstevel@tonic-gate 				bcont = mp->b_cont;
15507c478bd9Sstevel@tonic-gate 				freeb(mp);
15517c478bd9Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
15527c478bd9Sstevel@tonic-gate 			} else {
15537c478bd9Sstevel@tonic-gate 				save_bp = mp;
15547c478bd9Sstevel@tonic-gate 				mp = mp->b_cont;
15557c478bd9Sstevel@tonic-gate 			}
15567c478bd9Sstevel@tonic-gate 			first = 0;
15577c478bd9Sstevel@tonic-gate 		}
15587c478bd9Sstevel@tonic-gate 	} else {
15597c478bd9Sstevel@tonic-gate 		type = mp->b_datap->db_type;
15607c478bd9Sstevel@tonic-gate 		while (len) {
15617c478bd9Sstevel@tonic-gate 			bp = mp;
15627c478bd9Sstevel@tonic-gate 			save_bp = NULL;
15637c478bd9Sstevel@tonic-gate 
15647c478bd9Sstevel@tonic-gate 			/*
15657c478bd9Sstevel@tonic-gate 			 * Find the last message of same type
15667c478bd9Sstevel@tonic-gate 			 */
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
15697c478bd9Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
15707c478bd9Sstevel@tonic-gate 				prev_bp = save_bp;
15717c478bd9Sstevel@tonic-gate 				save_bp = bp;
15727c478bd9Sstevel@tonic-gate 				bp = bp->b_cont;
15737c478bd9Sstevel@tonic-gate 			}
15747c478bd9Sstevel@tonic-gate 			if (save_bp == NULL)
15757c478bd9Sstevel@tonic-gate 				break;
15767c478bd9Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
15777c478bd9Sstevel@tonic-gate 			save_bp->b_wptr -= n;
15787c478bd9Sstevel@tonic-gate 			len -= n;
15797c478bd9Sstevel@tonic-gate 
15807c478bd9Sstevel@tonic-gate 			/*
15817c478bd9Sstevel@tonic-gate 			 * If this is not the first message
15827c478bd9Sstevel@tonic-gate 			 * and we have taken away everything
15837c478bd9Sstevel@tonic-gate 			 * from this message, remove it
15847c478bd9Sstevel@tonic-gate 			 */
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate 			if ((save_bp != mp) &&
15877c478bd9Sstevel@tonic-gate 			    (save_bp->b_wptr == save_bp->b_rptr)) {
15887c478bd9Sstevel@tonic-gate 				bcont = save_bp->b_cont;
15897c478bd9Sstevel@tonic-gate 				freeb(save_bp);
15907c478bd9Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
15917c478bd9Sstevel@tonic-gate 			}
15927c478bd9Sstevel@tonic-gate 		}
15937c478bd9Sstevel@tonic-gate 	}
15947c478bd9Sstevel@tonic-gate 	return (1);
15957c478bd9Sstevel@tonic-gate }
15967c478bd9Sstevel@tonic-gate 
15977c478bd9Sstevel@tonic-gate /*
15987c478bd9Sstevel@tonic-gate  * get number of data bytes in message
15997c478bd9Sstevel@tonic-gate  */
16007c478bd9Sstevel@tonic-gate size_t
16017c478bd9Sstevel@tonic-gate msgdsize(mblk_t *bp)
16027c478bd9Sstevel@tonic-gate {
16037c478bd9Sstevel@tonic-gate 	size_t count = 0;
16047c478bd9Sstevel@tonic-gate 
16057c478bd9Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
16067c478bd9Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
16077c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
16087c478bd9Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
16097c478bd9Sstevel@tonic-gate 		}
16107c478bd9Sstevel@tonic-gate 	return (count);
16117c478bd9Sstevel@tonic-gate }
16127c478bd9Sstevel@tonic-gate 
16137c478bd9Sstevel@tonic-gate /*
16147c478bd9Sstevel@tonic-gate  * Get a message off head of queue
16157c478bd9Sstevel@tonic-gate  *
16167c478bd9Sstevel@tonic-gate  * If queue has no buffers then mark queue
16177c478bd9Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
16187c478bd9Sstevel@tonic-gate  * someone when data becomes available)
16197c478bd9Sstevel@tonic-gate  *
16207c478bd9Sstevel@tonic-gate  * If there is something to take off then do so.
16217c478bd9Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
16227c478bd9Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
16237c478bd9Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
16247c478bd9Sstevel@tonic-gate  *
16257c478bd9Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
16267c478bd9Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
16277c478bd9Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
16287c478bd9Sstevel@tonic-gate  * fields in their respective qband structures
16297c478bd9Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
16307c478bd9Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
16317c478bd9Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
16327c478bd9Sstevel@tonic-gate  * not reflect the size of all the messages on the
16337c478bd9Sstevel@tonic-gate  * queue.  It only reflects those messages in the
16347c478bd9Sstevel@tonic-gate  * normal band of flow.  The one exception to this
16357c478bd9Sstevel@tonic-gate  * deals with high priority messages.  They are in
16367c478bd9Sstevel@tonic-gate  * their own conceptual "band", but are accounted
16377c478bd9Sstevel@tonic-gate  * against q_count.
16387c478bd9Sstevel@tonic-gate  *
16397c478bd9Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
16407c478bd9Sstevel@tonic-gate  * is set, enable the closest backq which has a service
16417c478bd9Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
16427c478bd9Sstevel@tonic-gate  *
16437c478bd9Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
16447c478bd9Sstevel@tonic-gate  * of performance considerations.
16457c478bd9Sstevel@tonic-gate  *
16467c478bd9Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
16477c478bd9Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
16487c478bd9Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
16497c478bd9Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
16507c478bd9Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
16517c478bd9Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
16527c478bd9Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
16537c478bd9Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
16547c478bd9Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
16557c478bd9Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
16567c478bd9Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
16577c478bd9Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
16587c478bd9Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
16597c478bd9Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
16607c478bd9Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
16617c478bd9Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
16627c478bd9Sstevel@tonic-gate  *   mark.
16637c478bd9Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
16647c478bd9Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
16657c478bd9Sstevel@tonic-gate  *   framework can still try and limit resource usage.
16667c478bd9Sstevel@tonic-gate  */
16677c478bd9Sstevel@tonic-gate mblk_t *
16687c478bd9Sstevel@tonic-gate getq(queue_t *q)
16697c478bd9Sstevel@tonic-gate {
16707c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1671116094b2Smicheng 	uchar_t band = 0;
16727c478bd9Sstevel@tonic-gate 
16737c478bd9Sstevel@tonic-gate 	bp = getq_noenab(q);
16747c478bd9Sstevel@tonic-gate 	if (bp != NULL)
16757c478bd9Sstevel@tonic-gate 		band = bp->b_band;
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 	/*
16787c478bd9Sstevel@tonic-gate 	 * Inlined from qbackenable().
16797c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
16807c478bd9Sstevel@tonic-gate 	 */
16817c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
16827c478bd9Sstevel@tonic-gate 		return (bp);
16837c478bd9Sstevel@tonic-gate 
16847c478bd9Sstevel@tonic-gate 	qbackenable(q, band);
16857c478bd9Sstevel@tonic-gate 	return (bp);
16867c478bd9Sstevel@tonic-gate }
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate /*
1689ff550d0eSmasputra  * Calculate number of data bytes in a single data message block taking
1690ff550d0eSmasputra  * multidata messages into account.
1691ff550d0eSmasputra  */
1692ff550d0eSmasputra 
1693ff550d0eSmasputra #define	ADD_MBLK_SIZE(mp, size) 					\
1694ff550d0eSmasputra 	if (DB_TYPE(mp) != M_MULTIDATA) {				\
1695ff550d0eSmasputra 		(size) += MBLKL(mp);					\
1696ff550d0eSmasputra 	} else {							\
1697ff550d0eSmasputra 		uint_t	pinuse;						\
1698ff550d0eSmasputra 									\
1699ff550d0eSmasputra 		mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse);	\
1700ff550d0eSmasputra 		(size) += pinuse;					\
1701ff550d0eSmasputra 	}
1702ff550d0eSmasputra 
1703ff550d0eSmasputra /*
17047c478bd9Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
17057c478bd9Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
17067c478bd9Sstevel@tonic-gate  * after it is done with accessing the queue.
17077c478bd9Sstevel@tonic-gate  */
17087c478bd9Sstevel@tonic-gate mblk_t *
17097c478bd9Sstevel@tonic-gate getq_noenab(queue_t *q)
17107c478bd9Sstevel@tonic-gate {
17117c478bd9Sstevel@tonic-gate 	mblk_t *bp;
17127c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
17137c478bd9Sstevel@tonic-gate 	qband_t *qbp;
17147c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
17157c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
17167c478bd9Sstevel@tonic-gate 
17177c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
17187c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
17197c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
17207c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
17217c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
17227c478bd9Sstevel@tonic-gate 	} else
17237c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
17247c478bd9Sstevel@tonic-gate 
17257c478bd9Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
17267c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTR;
17277c478bd9Sstevel@tonic-gate 	} else {
17287c478bd9Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
17297c478bd9Sstevel@tonic-gate 			q->q_last = NULL;
17307c478bd9Sstevel@tonic-gate 		else
17317c478bd9Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
17327c478bd9Sstevel@tonic-gate 
17337c478bd9Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
17347c478bd9Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
1735ff550d0eSmasputra 			ADD_MBLK_SIZE(tmp, bytecnt);
17367c478bd9Sstevel@tonic-gate 			mblkcnt++;
17377c478bd9Sstevel@tonic-gate 		}
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 		if (bp->b_band == 0) {
17407c478bd9Sstevel@tonic-gate 			q->q_count -= bytecnt;
17417c478bd9Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
1742ba464308Srk129064 			if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
1743ba464308Srk129064 			    (q->q_mblkcnt < q->q_hiwat))) {
17447c478bd9Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
17457c478bd9Sstevel@tonic-gate 			}
17467c478bd9Sstevel@tonic-gate 		} else {
17477c478bd9Sstevel@tonic-gate 			int i;
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
17507c478bd9Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
17517c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
17527c478bd9Sstevel@tonic-gate 			qbp = q->q_bandp;
17537c478bd9Sstevel@tonic-gate 			i = bp->b_band;
17547c478bd9Sstevel@tonic-gate 			while (--i > 0)
17557c478bd9Sstevel@tonic-gate 				qbp = qbp->qb_next;
17567c478bd9Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
17577c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
17587c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
17597c478bd9Sstevel@tonic-gate 			} else {
17607c478bd9Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
17617c478bd9Sstevel@tonic-gate 			}
17627c478bd9Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
17637c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
1764ba464308Srk129064 			if (qbp->qb_mblkcnt == 0 ||
1765ba464308Srk129064 			    ((qbp->qb_count < qbp->qb_hiwat) &&
1766ba464308Srk129064 			    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
17677c478bd9Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
17687c478bd9Sstevel@tonic-gate 			}
17697c478bd9Sstevel@tonic-gate 		}
17707c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
17717c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
17727c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
17737c478bd9Sstevel@tonic-gate 	}
17747c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
17757c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
17787c478bd9Sstevel@tonic-gate 
17797c478bd9Sstevel@tonic-gate 	return (bp);
17807c478bd9Sstevel@tonic-gate }
17817c478bd9Sstevel@tonic-gate 
17827c478bd9Sstevel@tonic-gate /*
17837c478bd9Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
17847c478bd9Sstevel@tonic-gate  * specified band.
17857c478bd9Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
17867c478bd9Sstevel@tonic-gate  * already called.
17877c478bd9Sstevel@tonic-gate  *
17887c478bd9Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
17897c478bd9Sstevel@tonic-gate  * stream head often does).
17907c478bd9Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
17917c478bd9Sstevel@tonic-gate  */
17927c478bd9Sstevel@tonic-gate void
1793116094b2Smicheng qbackenable(queue_t *q, uchar_t band)
17947c478bd9Sstevel@tonic-gate {
17957c478bd9Sstevel@tonic-gate 	int backenab = 0;
17967c478bd9Sstevel@tonic-gate 	qband_t *qbp;
17977c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 	ASSERT(q);
18007c478bd9Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
18017c478bd9Sstevel@tonic-gate 
18027c478bd9Sstevel@tonic-gate 	/*
18037c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
18047c478bd9Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
18057c478bd9Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
18067c478bd9Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
18077c478bd9Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
18087c478bd9Sstevel@tonic-gate 	 * drops below q_lowat.
18097c478bd9Sstevel@tonic-gate 	 */
18107c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
18117c478bd9Sstevel@tonic-gate 		return;
18127c478bd9Sstevel@tonic-gate 
18137c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
18147c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
18157c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
18167c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
18177c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
18187c478bd9Sstevel@tonic-gate 	} else
18197c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18207c478bd9Sstevel@tonic-gate 
18217c478bd9Sstevel@tonic-gate 	if (band == 0) {
18227c478bd9Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
18237c478bd9Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
18247c478bd9Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
18257c478bd9Sstevel@tonic-gate 		}
18267c478bd9Sstevel@tonic-gate 	} else {
18277c478bd9Sstevel@tonic-gate 		int i;
18287c478bd9Sstevel@tonic-gate 
18297c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
18307c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
18317c478bd9Sstevel@tonic-gate 
18327c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
18337c478bd9Sstevel@tonic-gate 		i = band;
18347c478bd9Sstevel@tonic-gate 		while (--i > 0)
18357c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
18367c478bd9Sstevel@tonic-gate 
18377c478bd9Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
18387c478bd9Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
18397c478bd9Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
18407c478bd9Sstevel@tonic-gate 		}
18417c478bd9Sstevel@tonic-gate 	}
18427c478bd9Sstevel@tonic-gate 
18437c478bd9Sstevel@tonic-gate 	if (backenab == 0) {
18447c478bd9Sstevel@tonic-gate 		if (freezer != curthread)
18457c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
18467c478bd9Sstevel@tonic-gate 		return;
18477c478bd9Sstevel@tonic-gate 	}
18487c478bd9Sstevel@tonic-gate 
18497c478bd9Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
18507c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18517c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
18527c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
18537c478bd9Sstevel@tonic-gate 		if (band != 0)
18547c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
18557c478bd9Sstevel@tonic-gate 		else {
18567c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
18577c478bd9Sstevel@tonic-gate 		}
18587c478bd9Sstevel@tonic-gate 	}
18597c478bd9Sstevel@tonic-gate 
18607c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
18617c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18627c478bd9Sstevel@tonic-gate 
18637c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18647c478bd9Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
18657c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
18667c478bd9Sstevel@tonic-gate 		backenable(q, band);
18677c478bd9Sstevel@tonic-gate }
18687c478bd9Sstevel@tonic-gate 
18697c478bd9Sstevel@tonic-gate /*
18707c478bd9Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
18717c478bd9Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
18727c478bd9Sstevel@tonic-gate  * enabled if necessary.
18737c478bd9Sstevel@tonic-gate  *
18747c478bd9Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
18757c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
18767c478bd9Sstevel@tonic-gate  */
18777c478bd9Sstevel@tonic-gate void
18787c478bd9Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
18797c478bd9Sstevel@tonic-gate {
18807c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
18817c478bd9Sstevel@tonic-gate 
18827c478bd9Sstevel@tonic-gate 	rmvq_noenab(q, mp);
18837c478bd9Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
18847c478bd9Sstevel@tonic-gate 		/*
18857c478bd9Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
18867c478bd9Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
18877c478bd9Sstevel@tonic-gate 		 */
18887c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18897c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18907c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18917c478bd9Sstevel@tonic-gate 	} else {
18927c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18937c478bd9Sstevel@tonic-gate 	}
18947c478bd9Sstevel@tonic-gate }
18957c478bd9Sstevel@tonic-gate 
18967c478bd9Sstevel@tonic-gate /*
18977c478bd9Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
18987c478bd9Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
18997c478bd9Sstevel@tonic-gate  */
19007c478bd9Sstevel@tonic-gate void
19017c478bd9Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
19027c478bd9Sstevel@tonic-gate {
19037c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
19047c478bd9Sstevel@tonic-gate 	int i;
19057c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
19067c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
19077c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
19087c478bd9Sstevel@tonic-gate 
19097c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
19107c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
19117c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
19127c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
19137c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
19147c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
19157c478bd9Sstevel@tonic-gate 		freezer = curthread;
19167c478bd9Sstevel@tonic-gate 	} else
19177c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19187c478bd9Sstevel@tonic-gate 
19197c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
19207c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
19217c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
19227c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
19237c478bd9Sstevel@tonic-gate 		i = mp->b_band;
19247c478bd9Sstevel@tonic-gate 		while (--i > 0)
19257c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
19267c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
19277c478bd9Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
19287c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
19297c478bd9Sstevel@tonic-gate 			else
19307c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
19317c478bd9Sstevel@tonic-gate 		}
19327c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
19337c478bd9Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
19347c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
19357c478bd9Sstevel@tonic-gate 			else
19367c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
19377c478bd9Sstevel@tonic-gate 		}
19387c478bd9Sstevel@tonic-gate 	}
19397c478bd9Sstevel@tonic-gate 
19407c478bd9Sstevel@tonic-gate 	/*
19417c478bd9Sstevel@tonic-gate 	 * Remove the message from the list.
19427c478bd9Sstevel@tonic-gate 	 */
19437c478bd9Sstevel@tonic-gate 	if (mp->b_prev)
19447c478bd9Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
19457c478bd9Sstevel@tonic-gate 	else
19467c478bd9Sstevel@tonic-gate 		q->q_first = mp->b_next;
19477c478bd9Sstevel@tonic-gate 	if (mp->b_next)
19487c478bd9Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
19497c478bd9Sstevel@tonic-gate 	else
19507c478bd9Sstevel@tonic-gate 		q->q_last = mp->b_prev;
19517c478bd9Sstevel@tonic-gate 	mp->b_next = NULL;
19527c478bd9Sstevel@tonic-gate 	mp->b_prev = NULL;
19537c478bd9Sstevel@tonic-gate 
19547c478bd9Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
19557c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1956ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
19577c478bd9Sstevel@tonic-gate 		mblkcnt++;
19587c478bd9Sstevel@tonic-gate 	}
19597c478bd9Sstevel@tonic-gate 
19607c478bd9Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
19617c478bd9Sstevel@tonic-gate 		q->q_count -= bytecnt;
19627c478bd9Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
1963ba464308Srk129064 		if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
1964ba464308Srk129064 		    (q->q_mblkcnt < q->q_hiwat))) {
19657c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
19667c478bd9Sstevel@tonic-gate 		}
19677c478bd9Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
19687c478bd9Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
19697c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
1970ba464308Srk129064 		if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) &&
1971ba464308Srk129064 		    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
19727c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
19737c478bd9Sstevel@tonic-gate 		}
19747c478bd9Sstevel@tonic-gate 	}
19757c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
19767c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
19797c478bd9Sstevel@tonic-gate }
19807c478bd9Sstevel@tonic-gate 
19817c478bd9Sstevel@tonic-gate /*
19827c478bd9Sstevel@tonic-gate  * Empty a queue.
19837c478bd9Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
19847c478bd9Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
19857c478bd9Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
19867c478bd9Sstevel@tonic-gate  * service procedure.
19877c478bd9Sstevel@tonic-gate  *
19887c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
19897c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
19907c478bd9Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
19917c478bd9Sstevel@tonic-gate  *
19927c478bd9Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
19937c478bd9Sstevel@tonic-gate  * if one exists on the queue.
19947c478bd9Sstevel@tonic-gate  */
19957c478bd9Sstevel@tonic-gate void
19967c478bd9Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
19977c478bd9Sstevel@tonic-gate {
19987c478bd9Sstevel@tonic-gate 	mblk_t *mp, *nmp;
19997c478bd9Sstevel@tonic-gate 	qband_t *qbp;
20007c478bd9Sstevel@tonic-gate 	int backenab = 0;
20017c478bd9Sstevel@tonic-gate 	unsigned char bpri;
20027c478bd9Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
20037c478bd9Sstevel@tonic-gate 
20047c478bd9Sstevel@tonic-gate 	if (q->q_first == NULL)
20057c478bd9Sstevel@tonic-gate 		return;
20067c478bd9Sstevel@tonic-gate 
20077c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20087c478bd9Sstevel@tonic-gate 	mp = q->q_first;
20097c478bd9Sstevel@tonic-gate 	q->q_first = NULL;
20107c478bd9Sstevel@tonic-gate 	q->q_last = NULL;
20117c478bd9Sstevel@tonic-gate 	q->q_count = 0;
20127c478bd9Sstevel@tonic-gate 	q->q_mblkcnt = 0;
20137c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20147c478bd9Sstevel@tonic-gate 		qbp->qb_first = NULL;
20157c478bd9Sstevel@tonic-gate 		qbp->qb_last = NULL;
20167c478bd9Sstevel@tonic-gate 		qbp->qb_count = 0;
20177c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
20187c478bd9Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
20197c478bd9Sstevel@tonic-gate 	}
20207c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
20217c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
20227c478bd9Sstevel@tonic-gate 	while (mp) {
20237c478bd9Sstevel@tonic-gate 		nmp = mp->b_next;
20247c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
20257c478bd9Sstevel@tonic-gate 
20267c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
20277c478bd9Sstevel@tonic-gate 
20287c478bd9Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
20297c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20307c478bd9Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
20317c478bd9Sstevel@tonic-gate 			freemsg(mp);
20327c478bd9Sstevel@tonic-gate 		else
20337c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20347c478bd9Sstevel@tonic-gate 		mp = nmp;
20357c478bd9Sstevel@tonic-gate 	}
20367c478bd9Sstevel@tonic-gate 	bpri = 1;
20377c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20387c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20397c478bd9Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
20407c478bd9Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
20417c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
20427c478bd9Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
20437c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
20447c478bd9Sstevel@tonic-gate 			backenab = 1;
20457c478bd9Sstevel@tonic-gate 			qbf[bpri] = 1;
20467c478bd9Sstevel@tonic-gate 		} else
20477c478bd9Sstevel@tonic-gate 			qbf[bpri] = 0;
20487c478bd9Sstevel@tonic-gate 		bpri++;
20497c478bd9Sstevel@tonic-gate 	}
20507c478bd9Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
20517c478bd9Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
20527c478bd9Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
20537c478bd9Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
20547c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
20557c478bd9Sstevel@tonic-gate 		backenab = 1;
20567c478bd9Sstevel@tonic-gate 		qbf[0] = 1;
20577c478bd9Sstevel@tonic-gate 	} else
20587c478bd9Sstevel@tonic-gate 		qbf[0] = 0;
20597c478bd9Sstevel@tonic-gate 
20607c478bd9Sstevel@tonic-gate 	/*
20617c478bd9Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
20627c478bd9Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
20637c478bd9Sstevel@tonic-gate 	 */
20647c478bd9Sstevel@tonic-gate 	if (backenab) {
20657c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20667c478bd9Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
20677c478bd9Sstevel@tonic-gate 			if (qbf[bpri])
2068116094b2Smicheng 				backenable(q, bpri);
20697c478bd9Sstevel@tonic-gate 		if (qbf[0])
20707c478bd9Sstevel@tonic-gate 			backenable(q, 0);
20717c478bd9Sstevel@tonic-gate 	} else
20727c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20737c478bd9Sstevel@tonic-gate }
20747c478bd9Sstevel@tonic-gate 
20757c478bd9Sstevel@tonic-gate /*
20767c478bd9Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
20777c478bd9Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
20787c478bd9Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
20797c478bd9Sstevel@tonic-gate  */
20807c478bd9Sstevel@tonic-gate void
20817c478bd9Sstevel@tonic-gate flushq(queue_t *q, int flag)
20827c478bd9Sstevel@tonic-gate {
20837c478bd9Sstevel@tonic-gate 	flushq_common(q, flag, 0);
20847c478bd9Sstevel@tonic-gate }
20857c478bd9Sstevel@tonic-gate 
20867c478bd9Sstevel@tonic-gate /*
20877c478bd9Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
20887c478bd9Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
20897c478bd9Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
20907c478bd9Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
20917c478bd9Sstevel@tonic-gate  * (priority 0) band than in any other.
20927c478bd9Sstevel@tonic-gate  *
20937c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
20947c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
20957c478bd9Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
20967c478bd9Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
20977c478bd9Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
20987c478bd9Sstevel@tonic-gate  */
20997c478bd9Sstevel@tonic-gate void
21007c478bd9Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
21017c478bd9Sstevel@tonic-gate {
21027c478bd9Sstevel@tonic-gate 	mblk_t *mp;
21037c478bd9Sstevel@tonic-gate 	mblk_t *nmp;
21047c478bd9Sstevel@tonic-gate 	mblk_t *last;
21057c478bd9Sstevel@tonic-gate 	qband_t *qbp;
21067c478bd9Sstevel@tonic-gate 	int band;
21077c478bd9Sstevel@tonic-gate 
21087c478bd9Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
21097c478bd9Sstevel@tonic-gate 	if (pri > q->q_nband) {
21107c478bd9Sstevel@tonic-gate 		return;
21117c478bd9Sstevel@tonic-gate 	}
21127c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
21137c478bd9Sstevel@tonic-gate 	if (pri == 0) {
21147c478bd9Sstevel@tonic-gate 		mp = q->q_first;
21157c478bd9Sstevel@tonic-gate 		q->q_first = NULL;
21167c478bd9Sstevel@tonic-gate 		q->q_last = NULL;
21177c478bd9Sstevel@tonic-gate 		q->q_count = 0;
21187c478bd9Sstevel@tonic-gate 		q->q_mblkcnt = 0;
21197c478bd9Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
21207c478bd9Sstevel@tonic-gate 			qbp->qb_first = NULL;
21217c478bd9Sstevel@tonic-gate 			qbp->qb_last = NULL;
21227c478bd9Sstevel@tonic-gate 			qbp->qb_count = 0;
21237c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
21247c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
21257c478bd9Sstevel@tonic-gate 		}
21267c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
21277c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21287c478bd9Sstevel@tonic-gate 		while (mp) {
21297c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21307c478bd9Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
21317c478bd9Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
21327c478bd9Sstevel@tonic-gate 			    ((flag == FLUSHALL) ||
21337c478bd9Sstevel@tonic-gate 			    datamsg(mp->b_datap->db_type)))
21347c478bd9Sstevel@tonic-gate 				freemsg(mp);
21357c478bd9Sstevel@tonic-gate 			else
21367c478bd9Sstevel@tonic-gate 				(void) putq(q, mp);
21377c478bd9Sstevel@tonic-gate 			mp = nmp;
21387c478bd9Sstevel@tonic-gate 		}
21397c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21407c478bd9Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
21417c478bd9Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
21427c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
21437c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
21447c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21457c478bd9Sstevel@tonic-gate 
2146116094b2Smicheng 			backenable(q, pri);
21477c478bd9Sstevel@tonic-gate 		} else
21487c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21497c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
21507c478bd9Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
21517c478bd9Sstevel@tonic-gate 		band = pri;
21527c478bd9Sstevel@tonic-gate 
21537c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
21547c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
21557c478bd9Sstevel@tonic-gate 		while (--band > 0)
21567c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
21577c478bd9Sstevel@tonic-gate 		mp = qbp->qb_first;
21587c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
21597c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21607c478bd9Sstevel@tonic-gate 			return;
21617c478bd9Sstevel@tonic-gate 		}
21627c478bd9Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
21637c478bd9Sstevel@tonic-gate 		/*
21647c478bd9Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
21657c478bd9Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
21667c478bd9Sstevel@tonic-gate 		 * mblk has been processed.
21677c478bd9Sstevel@tonic-gate 		 */
21687c478bd9Sstevel@tonic-gate 		while (mp != last) {
21697c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
21707c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21717c478bd9Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
21727c478bd9Sstevel@tonic-gate 				rmvq_noenab(q, mp);
21737c478bd9Sstevel@tonic-gate 				freemsg(mp);
21747c478bd9Sstevel@tonic-gate 				flushed = B_TRUE;
21757c478bd9Sstevel@tonic-gate 			}
21767c478bd9Sstevel@tonic-gate 			mp = nmp;
21777c478bd9Sstevel@tonic-gate 		}
21787c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21797c478bd9Sstevel@tonic-gate 
21807c478bd9Sstevel@tonic-gate 		/*
21817c478bd9Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
21827c478bd9Sstevel@tonic-gate 		 * will need to be called.
21837c478bd9Sstevel@tonic-gate 		 */
21847c478bd9Sstevel@tonic-gate 		if (flushed)
2185116094b2Smicheng 			qbackenable(q, pri);
21867c478bd9Sstevel@tonic-gate 	}
21877c478bd9Sstevel@tonic-gate }
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate /*
21907c478bd9Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
21917c478bd9Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
21927c478bd9Sstevel@tonic-gate  * to the queue).
21937c478bd9Sstevel@tonic-gate  */
21947c478bd9Sstevel@tonic-gate int
21957c478bd9Sstevel@tonic-gate canput(queue_t *q)
21967c478bd9Sstevel@tonic-gate {
21977c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
21987c478bd9Sstevel@tonic-gate 
21997c478bd9Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
22007c478bd9Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
22017c478bd9Sstevel@tonic-gate 
22027c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22037c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
22047c478bd9Sstevel@tonic-gate 
22057c478bd9Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
22067c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22077c478bd9Sstevel@tonic-gate 		return (1);
22087c478bd9Sstevel@tonic-gate 	}
22097c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22107c478bd9Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
22117c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTW;
22127c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22137c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
22147c478bd9Sstevel@tonic-gate 		return (0);
22157c478bd9Sstevel@tonic-gate 	}
22167c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22177c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22187c478bd9Sstevel@tonic-gate 	return (1);
22197c478bd9Sstevel@tonic-gate }
22207c478bd9Sstevel@tonic-gate 
22217c478bd9Sstevel@tonic-gate /*
22227c478bd9Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
22237c478bd9Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
22247c478bd9Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
22257c478bd9Sstevel@tonic-gate  * write to the queue).
22267c478bd9Sstevel@tonic-gate  */
22277c478bd9Sstevel@tonic-gate int
22287c478bd9Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
22297c478bd9Sstevel@tonic-gate {
22307c478bd9Sstevel@tonic-gate 	qband_t *qbp;
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
22337c478bd9Sstevel@tonic-gate 	if (!q)
22347c478bd9Sstevel@tonic-gate 		return (0);
22357c478bd9Sstevel@tonic-gate 
22367c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22377c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
22387c478bd9Sstevel@tonic-gate 
22397c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22407c478bd9Sstevel@tonic-gate 	if (pri == 0) {
22417c478bd9Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
22427c478bd9Sstevel@tonic-gate 			q->q_flag |= QWANTW;
22437c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22447c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22457c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 0);
22467c478bd9Sstevel@tonic-gate 			return (0);
22477c478bd9Sstevel@tonic-gate 		}
22487c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
22497c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
22507c478bd9Sstevel@tonic-gate 			/*
22517c478bd9Sstevel@tonic-gate 			 * No band exists yet, so return success.
22527c478bd9Sstevel@tonic-gate 			 */
22537c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22547c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22557c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 1);
22567c478bd9Sstevel@tonic-gate 			return (1);
22577c478bd9Sstevel@tonic-gate 		}
22587c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
22597c478bd9Sstevel@tonic-gate 		while (--pri)
22607c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
22617c478bd9Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
22627c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
22637c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22647c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22657c478bd9Sstevel@tonic-gate 			    "bcanput:%p %X %d", q, pri, 0);
22667c478bd9Sstevel@tonic-gate 			return (0);
22677c478bd9Sstevel@tonic-gate 		}
22687c478bd9Sstevel@tonic-gate 	}
22697c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22707c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22717c478bd9Sstevel@tonic-gate 	    "bcanput:%p %X %d", q, pri, 1);
22727c478bd9Sstevel@tonic-gate 	return (1);
22737c478bd9Sstevel@tonic-gate }
22747c478bd9Sstevel@tonic-gate 
22757c478bd9Sstevel@tonic-gate /*
22767c478bd9Sstevel@tonic-gate  * Put a message on a queue.
22777c478bd9Sstevel@tonic-gate  *
22787c478bd9Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
22797c478bd9Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
22807c478bd9Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
22817c478bd9Sstevel@tonic-gate  *
22827c478bd9Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
22837c478bd9Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
22847c478bd9Sstevel@tonic-gate  *
22857c478bd9Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
22867c478bd9Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
22877c478bd9Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
22887c478bd9Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
22897c478bd9Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
22907c478bd9Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
22917c478bd9Sstevel@tonic-gate  */
22927c478bd9Sstevel@tonic-gate int
22937c478bd9Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
22947c478bd9Sstevel@tonic-gate {
22957c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
22967c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
22977c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
22987c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
22997c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
23007c478bd9Sstevel@tonic-gate 
23017c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
23027c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
23037c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
23047c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23057c478bd9Sstevel@tonic-gate 	} else
23067c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
23077c478bd9Sstevel@tonic-gate 
23087c478bd9Sstevel@tonic-gate 	/*
23097c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
23107c478bd9Sstevel@tonic-gate 	 * allocated, do so.
23117c478bd9Sstevel@tonic-gate 	 */
23127c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
23137c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
23147c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
23157c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
23167c478bd9Sstevel@tonic-gate 		int i;
23177c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
23187c478bd9Sstevel@tonic-gate 
23197c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
23207c478bd9Sstevel@tonic-gate 
23217c478bd9Sstevel@tonic-gate 			/*
23227c478bd9Sstevel@tonic-gate 			 * The qband structure for this priority band is
23237c478bd9Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
23247c478bd9Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
23257c478bd9Sstevel@tonic-gate 			 * associate the qband structures with every
23267c478bd9Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
23277c478bd9Sstevel@tonic-gate 			 * because most queues will only need the normal
23287c478bd9Sstevel@tonic-gate 			 * band of flow which can be described entirely
23297c478bd9Sstevel@tonic-gate 			 * by the queue itself.
23307c478bd9Sstevel@tonic-gate 			 */
23317c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
23327c478bd9Sstevel@tonic-gate 			while (*qbpp)
23337c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23347c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
23357c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
23367c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
23377c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
23387c478bd9Sstevel@tonic-gate 					return (0);
23397c478bd9Sstevel@tonic-gate 				}
23407c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
23417c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
23427c478bd9Sstevel@tonic-gate 				q->q_nband++;
23437c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23447c478bd9Sstevel@tonic-gate 			}
23457c478bd9Sstevel@tonic-gate 		}
23467c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23477c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
23487c478bd9Sstevel@tonic-gate 		i = bp->b_band;
23497c478bd9Sstevel@tonic-gate 		while (--i)
23507c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
23517c478bd9Sstevel@tonic-gate 	}
23527c478bd9Sstevel@tonic-gate 
23537c478bd9Sstevel@tonic-gate 	/*
23547c478bd9Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
23557c478bd9Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
23567c478bd9Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
23577c478bd9Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
23587c478bd9Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
23597c478bd9Sstevel@tonic-gate 	 */
23607c478bd9Sstevel@tonic-gate 	if (!q->q_first) {
23617c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
23627c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
23637c478bd9Sstevel@tonic-gate 		q->q_first = bp;
23647c478bd9Sstevel@tonic-gate 		q->q_last = bp;
23657c478bd9Sstevel@tonic-gate 		if (qbp) {
23667c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
23677c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
23687c478bd9Sstevel@tonic-gate 		}
23697c478bd9Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
23707c478bd9Sstevel@tonic-gate 
23717c478bd9Sstevel@tonic-gate 		/*
23727c478bd9Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
23737c478bd9Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
23747c478bd9Sstevel@tonic-gate 		 */
23757c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
23767c478bd9Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
23777c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
23787c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
23797c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
23807c478bd9Sstevel@tonic-gate 			q->q_last = bp;
23817c478bd9Sstevel@tonic-gate 		} else {
23827c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
23837c478bd9Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
23847c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
23857c478bd9Sstevel@tonic-gate 
23867c478bd9Sstevel@tonic-gate 			/*
23877c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
23887c478bd9Sstevel@tonic-gate 			 */
23897c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
23907c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
23917c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
23927c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
23937c478bd9Sstevel@tonic-gate 			else
23947c478bd9Sstevel@tonic-gate 				q->q_first = bp;
23957c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
23967c478bd9Sstevel@tonic-gate 		}
23977c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
23987c478bd9Sstevel@tonic-gate 		if (qbp->qb_first) {
23997c478bd9Sstevel@tonic-gate 			tmp = qbp->qb_last;
24007c478bd9Sstevel@tonic-gate 
24017c478bd9Sstevel@tonic-gate 			/*
24027c478bd9Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
24037c478bd9Sstevel@tonic-gate 			 */
24047c478bd9Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
24057c478bd9Sstevel@tonic-gate 			if (tmp->b_next)
24067c478bd9Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
24077c478bd9Sstevel@tonic-gate 			else
24087c478bd9Sstevel@tonic-gate 				q->q_last = bp;
24097c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
24107c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
24117c478bd9Sstevel@tonic-gate 		} else {
24127c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
24137c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
24147c478bd9Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
24157c478bd9Sstevel@tonic-gate 
24167c478bd9Sstevel@tonic-gate 				/*
24177c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
24187c478bd9Sstevel@tonic-gate 				 */
24197c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
24207c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
24217c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
24227c478bd9Sstevel@tonic-gate 				q->q_last = bp;
24237c478bd9Sstevel@tonic-gate 			} else {
24247c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
24257c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
24267c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24277c478bd9Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
24287c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24297c478bd9Sstevel@tonic-gate 
24307c478bd9Sstevel@tonic-gate 				/*
24317c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
24327c478bd9Sstevel@tonic-gate 				 */
24337c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
24347c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
24357c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
24367c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
24377c478bd9Sstevel@tonic-gate 				else
24387c478bd9Sstevel@tonic-gate 					q->q_first = bp;
24397c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
24407c478bd9Sstevel@tonic-gate 			}
24417c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
24427c478bd9Sstevel@tonic-gate 		}
24437c478bd9Sstevel@tonic-gate 		qbp->qb_last = bp;
24447c478bd9Sstevel@tonic-gate 	}
24457c478bd9Sstevel@tonic-gate 
24467c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
24477c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2448ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
24497c478bd9Sstevel@tonic-gate 		mblkcnt++;
24507c478bd9Sstevel@tonic-gate 	}
2451ff550d0eSmasputra 
24527c478bd9Sstevel@tonic-gate 	if (qbp) {
24537c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
24547c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
24557c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
24567c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
24577c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
24587c478bd9Sstevel@tonic-gate 		}
24597c478bd9Sstevel@tonic-gate 	} else {
24607c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
24617c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
24627c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
24637c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
24647c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
24657c478bd9Sstevel@tonic-gate 		}
24667c478bd9Sstevel@tonic-gate 	}
24677c478bd9Sstevel@tonic-gate 
24687c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
24697c478bd9Sstevel@tonic-gate 
24707c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) ||
24717c478bd9Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
24727c478bd9Sstevel@tonic-gate 		qenable_locked(q);
24737c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
24747c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
24757c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 	return (1);
24787c478bd9Sstevel@tonic-gate }
24797c478bd9Sstevel@tonic-gate 
24807c478bd9Sstevel@tonic-gate /*
24817c478bd9Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
24827c478bd9Sstevel@tonic-gate  * See comment on putq above for details.
24837c478bd9Sstevel@tonic-gate  */
24847c478bd9Sstevel@tonic-gate int
24857c478bd9Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
24867c478bd9Sstevel@tonic-gate {
24877c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
24887c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
24897c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
24907c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
24917c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
24927c478bd9Sstevel@tonic-gate 
24937c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
24947c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
24957c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
24967c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
24977c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
24987c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
24997c478bd9Sstevel@tonic-gate 	} else
25007c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
25017c478bd9Sstevel@tonic-gate 
25027c478bd9Sstevel@tonic-gate 	/*
25037c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
25047c478bd9Sstevel@tonic-gate 	 * allocated, do so.
25057c478bd9Sstevel@tonic-gate 	 */
25067c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
25077c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
25087c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
25097c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
25107c478bd9Sstevel@tonic-gate 		int i;
25117c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
25127c478bd9Sstevel@tonic-gate 
25137c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
25147c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
25157c478bd9Sstevel@tonic-gate 			while (*qbpp)
25167c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25177c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
25187c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
25197c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
25207c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
25217c478bd9Sstevel@tonic-gate 					return (0);
25227c478bd9Sstevel@tonic-gate 				}
25237c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
25247c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
25257c478bd9Sstevel@tonic-gate 				q->q_nband++;
25267c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25277c478bd9Sstevel@tonic-gate 			}
25287c478bd9Sstevel@tonic-gate 		}
25297c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
25307c478bd9Sstevel@tonic-gate 		i = bp->b_band;
25317c478bd9Sstevel@tonic-gate 		while (--i)
25327c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
25337c478bd9Sstevel@tonic-gate 	}
25347c478bd9Sstevel@tonic-gate 
25357c478bd9Sstevel@tonic-gate 	/*
25367c478bd9Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
25377c478bd9Sstevel@tonic-gate 	 * place on the front of the queue.
25387c478bd9Sstevel@tonic-gate 	 */
25397c478bd9Sstevel@tonic-gate 	tmp = q->q_first;
25407c478bd9Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
25417c478bd9Sstevel@tonic-gate 		bp->b_next = tmp;
25427c478bd9Sstevel@tonic-gate 		if (tmp)
25437c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25447c478bd9Sstevel@tonic-gate 		else
25457c478bd9Sstevel@tonic-gate 			q->q_last = bp;
25467c478bd9Sstevel@tonic-gate 		q->q_first = bp;
25477c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
25487c478bd9Sstevel@tonic-gate 		if (qbp) {
25497c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
25507c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
25517c478bd9Sstevel@tonic-gate 		}
25527c478bd9Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
25537c478bd9Sstevel@tonic-gate 		tmp = qbp->qb_first;
25547c478bd9Sstevel@tonic-gate 		if (tmp) {
25557c478bd9Sstevel@tonic-gate 
25567c478bd9Sstevel@tonic-gate 			/*
25577c478bd9Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
25587c478bd9Sstevel@tonic-gate 			 */
25597c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
25607c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
25617c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
25627c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
25637c478bd9Sstevel@tonic-gate 			else
25647c478bd9Sstevel@tonic-gate 				q->q_first = bp;
25657c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25667c478bd9Sstevel@tonic-gate 		} else {
25677c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
25687c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
25697c478bd9Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
25707c478bd9Sstevel@tonic-gate 
25717c478bd9Sstevel@tonic-gate 				/*
25727c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
25737c478bd9Sstevel@tonic-gate 				 */
25747c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
25757c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
25767c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
25777c478bd9Sstevel@tonic-gate 				q->q_last = bp;
25787c478bd9Sstevel@tonic-gate 			} else {
25797c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
25807c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
25817c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25827c478bd9Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
25837c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25847c478bd9Sstevel@tonic-gate 
25857c478bd9Sstevel@tonic-gate 				/*
25867c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
25877c478bd9Sstevel@tonic-gate 				 */
25887c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
25897c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
25907c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
25917c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
25927c478bd9Sstevel@tonic-gate 				else
25937c478bd9Sstevel@tonic-gate 					q->q_first = bp;
25947c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
25957c478bd9Sstevel@tonic-gate 			}
25967c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
25977c478bd9Sstevel@tonic-gate 		}
25987c478bd9Sstevel@tonic-gate 		qbp->qb_first = bp;
25997c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
26007c478bd9Sstevel@tonic-gate 
26017c478bd9Sstevel@tonic-gate 		/*
26027c478bd9Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
26037c478bd9Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
26047c478bd9Sstevel@tonic-gate 		 */
26057c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
26067c478bd9Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
26077c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
26087c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
26097c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
26107c478bd9Sstevel@tonic-gate 			q->q_last = bp;
26117c478bd9Sstevel@tonic-gate 		} else {
26127c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
26137c478bd9Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
26147c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26157c478bd9Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
26167c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26177c478bd9Sstevel@tonic-gate 
26187c478bd9Sstevel@tonic-gate 			/*
26197c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
26207c478bd9Sstevel@tonic-gate 			 */
26217c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
26227c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
26237c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
26247c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
26257c478bd9Sstevel@tonic-gate 			else
26267c478bd9Sstevel@tonic-gate 				q->q_first = bp;
26277c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
26287c478bd9Sstevel@tonic-gate 		}
26297c478bd9Sstevel@tonic-gate 	}
26307c478bd9Sstevel@tonic-gate 
26317c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
26327c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2633ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
26347c478bd9Sstevel@tonic-gate 		mblkcnt++;
26357c478bd9Sstevel@tonic-gate 	}
26367c478bd9Sstevel@tonic-gate 	if (qbp) {
26377c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
26387c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
26397c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
26407c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
26417c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
26427c478bd9Sstevel@tonic-gate 		}
26437c478bd9Sstevel@tonic-gate 	} else {
26447c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
26457c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
26467c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
26477c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
26487c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
26497c478bd9Sstevel@tonic-gate 		}
26507c478bd9Sstevel@tonic-gate 	}
26517c478bd9Sstevel@tonic-gate 
26527c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
26537c478bd9Sstevel@tonic-gate 
26547c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
26557c478bd9Sstevel@tonic-gate 		qenable_locked(q);
26567c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
26577c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
26587c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
26597c478bd9Sstevel@tonic-gate 
26607c478bd9Sstevel@tonic-gate 	return (1);
26617c478bd9Sstevel@tonic-gate }
26627c478bd9Sstevel@tonic-gate 
26637c478bd9Sstevel@tonic-gate /*
26647c478bd9Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
26657c478bd9Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
26667c478bd9Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
26677c478bd9Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
26687c478bd9Sstevel@tonic-gate  * ordering:
26697c478bd9Sstevel@tonic-gate  *
26707c478bd9Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
26717c478bd9Sstevel@tonic-gate  *
26727c478bd9Sstevel@tonic-gate  * All flow control parameters are updated.
26737c478bd9Sstevel@tonic-gate  *
26747c478bd9Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
26757c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
26767c478bd9Sstevel@tonic-gate  */
26777c478bd9Sstevel@tonic-gate int
26787c478bd9Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
26797c478bd9Sstevel@tonic-gate {
26807c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
26817c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
26827c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
26837c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
26847c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
26857c478bd9Sstevel@tonic-gate 
26867c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
26877c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
26887c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
26897c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
26907c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
26917c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
26927c478bd9Sstevel@tonic-gate 		freezer = curthread;
26937c478bd9Sstevel@tonic-gate 	} else
26947c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
26957c478bd9Sstevel@tonic-gate 
26967c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
26977c478bd9Sstevel@tonic-gate 		if (mp->b_band != 0)
26987c478bd9Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
26997c478bd9Sstevel@tonic-gate 		if (emp && emp->b_prev &&
27007c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
27017c478bd9Sstevel@tonic-gate 			goto badord;
27027c478bd9Sstevel@tonic-gate 	}
27037c478bd9Sstevel@tonic-gate 	if (emp) {
27047c478bd9Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
27057c478bd9Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
27067c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
27077c478bd9Sstevel@tonic-gate 			goto badord;
27087c478bd9Sstevel@tonic-gate 		}
27097c478bd9Sstevel@tonic-gate 	} else {
27107c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
27117c478bd9Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
27127c478bd9Sstevel@tonic-gate badord:
27137c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
27147c478bd9Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
27157c478bd9Sstevel@tonic-gate 			    "on q %p", (void *)q);
27167c478bd9Sstevel@tonic-gate 			if (freezer != curthread)
27177c478bd9Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
27187c478bd9Sstevel@tonic-gate 			return (0);
27197c478bd9Sstevel@tonic-gate 		}
27207c478bd9Sstevel@tonic-gate 	}
27217c478bd9Sstevel@tonic-gate 
27227c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {
27237c478bd9Sstevel@tonic-gate 		int i;
27247c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
27257c478bd9Sstevel@tonic-gate 
27267c478bd9Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
27277c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
27287c478bd9Sstevel@tonic-gate 			while (*qbpp)
27297c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27307c478bd9Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
27317c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
27327c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
27337c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
27347c478bd9Sstevel@tonic-gate 					return (0);
27357c478bd9Sstevel@tonic-gate 				}
27367c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
27377c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
27387c478bd9Sstevel@tonic-gate 				q->q_nband++;
27397c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27407c478bd9Sstevel@tonic-gate 			}
27417c478bd9Sstevel@tonic-gate 		}
27427c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
27437c478bd9Sstevel@tonic-gate 		i = mp->b_band;
27447c478bd9Sstevel@tonic-gate 		while (--i)
27457c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
27467c478bd9Sstevel@tonic-gate 	}
27477c478bd9Sstevel@tonic-gate 
27487c478bd9Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
27497c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
27507c478bd9Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
27517c478bd9Sstevel@tonic-gate 		else
27527c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27537c478bd9Sstevel@tonic-gate 		emp->b_prev = mp;
27547c478bd9Sstevel@tonic-gate 	} else {
27557c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
27567c478bd9Sstevel@tonic-gate 			q->q_last->b_next = mp;
27577c478bd9Sstevel@tonic-gate 		else
27587c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27597c478bd9Sstevel@tonic-gate 		q->q_last = mp;
27607c478bd9Sstevel@tonic-gate 	}
27617c478bd9Sstevel@tonic-gate 
27627c478bd9Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
27637c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
2764ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
27657c478bd9Sstevel@tonic-gate 		mblkcnt++;
27667c478bd9Sstevel@tonic-gate 	}
27677c478bd9Sstevel@tonic-gate 
27687c478bd9Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
27697c478bd9Sstevel@tonic-gate 		if (!qbp->qb_first) {
27707c478bd9Sstevel@tonic-gate 			qbp->qb_first = mp;
27717c478bd9Sstevel@tonic-gate 			qbp->qb_last = mp;
27727c478bd9Sstevel@tonic-gate 		} else {
27737c478bd9Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
27747c478bd9Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
27757c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp;
27767c478bd9Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
27777c478bd9Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
27787c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp;
27797c478bd9Sstevel@tonic-gate 		}
27807c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
27817c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
27827c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
27837c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
27847c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
27857c478bd9Sstevel@tonic-gate 		}
27867c478bd9Sstevel@tonic-gate 	} else {
27877c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
27887c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
27897c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
27907c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
27917c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
27927c478bd9Sstevel@tonic-gate 		}
27937c478bd9Sstevel@tonic-gate 	}
27947c478bd9Sstevel@tonic-gate 
27957c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
27967c478bd9Sstevel@tonic-gate 
27977c478bd9Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
27987c478bd9Sstevel@tonic-gate 		qenable_locked(q);
27997c478bd9Sstevel@tonic-gate 
28007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
28017c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
28027c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
28037c478bd9Sstevel@tonic-gate 
28047c478bd9Sstevel@tonic-gate 	return (1);
28057c478bd9Sstevel@tonic-gate }
28067c478bd9Sstevel@tonic-gate 
28077c478bd9Sstevel@tonic-gate /*
28087c478bd9Sstevel@tonic-gate  * Create and put a control message on queue.
28097c478bd9Sstevel@tonic-gate  */
28107c478bd9Sstevel@tonic-gate int
28117c478bd9Sstevel@tonic-gate putctl(queue_t *q, int type)
28127c478bd9Sstevel@tonic-gate {
28137c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28147c478bd9Sstevel@tonic-gate 
28157c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28167c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
28177c478bd9Sstevel@tonic-gate 		return (0);
28187c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
28197c478bd9Sstevel@tonic-gate 
28207c478bd9Sstevel@tonic-gate 	put(q, bp);
28217c478bd9Sstevel@tonic-gate 
28227c478bd9Sstevel@tonic-gate 	return (1);
28237c478bd9Sstevel@tonic-gate }
28247c478bd9Sstevel@tonic-gate 
28257c478bd9Sstevel@tonic-gate /*
28267c478bd9Sstevel@tonic-gate  * Control message with a single-byte parameter
28277c478bd9Sstevel@tonic-gate  */
28287c478bd9Sstevel@tonic-gate int
28297c478bd9Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
28307c478bd9Sstevel@tonic-gate {
28317c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28327c478bd9Sstevel@tonic-gate 
28337c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28347c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
28357c478bd9Sstevel@tonic-gate 		return (0);
28367c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28377c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28387c478bd9Sstevel@tonic-gate 
28397c478bd9Sstevel@tonic-gate 	put(q, bp);
28407c478bd9Sstevel@tonic-gate 
28417c478bd9Sstevel@tonic-gate 	return (1);
28427c478bd9Sstevel@tonic-gate }
28437c478bd9Sstevel@tonic-gate 
28447c478bd9Sstevel@tonic-gate int
28457c478bd9Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
28467c478bd9Sstevel@tonic-gate {
28477c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28487c478bd9Sstevel@tonic-gate 
28497c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28507c478bd9Sstevel@tonic-gate 	    ((bp = allocb_tryhard(1)) == NULL))
28517c478bd9Sstevel@tonic-gate 		return (0);
28527c478bd9Sstevel@tonic-gate 
28537c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28547c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28557c478bd9Sstevel@tonic-gate 
28567c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 	return (1);
28597c478bd9Sstevel@tonic-gate }
28607c478bd9Sstevel@tonic-gate 
28617c478bd9Sstevel@tonic-gate int
28627c478bd9Sstevel@tonic-gate putnextctl(queue_t *q, int type)
28637c478bd9Sstevel@tonic-gate {
28647c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28657c478bd9Sstevel@tonic-gate 
28667c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28677c478bd9Sstevel@tonic-gate 	    ((bp = allocb_tryhard(0)) == NULL))
28687c478bd9Sstevel@tonic-gate 		return (0);
28697c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28707c478bd9Sstevel@tonic-gate 
28717c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28727c478bd9Sstevel@tonic-gate 
28737c478bd9Sstevel@tonic-gate 	return (1);
28747c478bd9Sstevel@tonic-gate }
28757c478bd9Sstevel@tonic-gate 
28767c478bd9Sstevel@tonic-gate /*
28777c478bd9Sstevel@tonic-gate  * Return the queue upstream from this one
28787c478bd9Sstevel@tonic-gate  */
28797c478bd9Sstevel@tonic-gate queue_t *
28807c478bd9Sstevel@tonic-gate backq(queue_t *q)
28817c478bd9Sstevel@tonic-gate {
28827c478bd9Sstevel@tonic-gate 	q = _OTHERQ(q);
28837c478bd9Sstevel@tonic-gate 	if (q->q_next) {
28847c478bd9Sstevel@tonic-gate 		q = q->q_next;
28857c478bd9Sstevel@tonic-gate 		return (_OTHERQ(q));
28867c478bd9Sstevel@tonic-gate 	}
28877c478bd9Sstevel@tonic-gate 	return (NULL);
28887c478bd9Sstevel@tonic-gate }
28897c478bd9Sstevel@tonic-gate 
28907c478bd9Sstevel@tonic-gate /*
28917c478bd9Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
28927c478bd9Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
28937c478bd9Sstevel@tonic-gate  */
28947c478bd9Sstevel@tonic-gate void
28957c478bd9Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
28967c478bd9Sstevel@tonic-gate {
28977c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
28987c478bd9Sstevel@tonic-gate 
28997c478bd9Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
29007c478bd9Sstevel@tonic-gate }
29017c478bd9Sstevel@tonic-gate 
29027c478bd9Sstevel@tonic-gate /*
29037c478bd9Sstevel@tonic-gate  * Streams Queue Scheduling
29047c478bd9Sstevel@tonic-gate  *
29057c478bd9Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
29067c478bd9Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
29077c478bd9Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
29087c478bd9Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
29097c478bd9Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
29107c478bd9Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
29117c478bd9Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
29127c478bd9Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
29137c478bd9Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
29147c478bd9Sstevel@tonic-gate  */
29157c478bd9Sstevel@tonic-gate 
29167c478bd9Sstevel@tonic-gate /*
29177c478bd9Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
29187c478bd9Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
29197c478bd9Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
29207c478bd9Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
29217c478bd9Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
29227c478bd9Sstevel@tonic-gate  */
29237c478bd9Sstevel@tonic-gate void
29247c478bd9Sstevel@tonic-gate qenable(queue_t *q)
29257c478bd9Sstevel@tonic-gate {
29267c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29277c478bd9Sstevel@tonic-gate 	qenable_locked(q);
29287c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29297c478bd9Sstevel@tonic-gate }
29307c478bd9Sstevel@tonic-gate /*
29317c478bd9Sstevel@tonic-gate  * Return number of messages on queue
29327c478bd9Sstevel@tonic-gate  */
29337c478bd9Sstevel@tonic-gate int
29347c478bd9Sstevel@tonic-gate qsize(queue_t *qp)
29357c478bd9Sstevel@tonic-gate {
29367c478bd9Sstevel@tonic-gate 	int count = 0;
29377c478bd9Sstevel@tonic-gate 	mblk_t *mp;
29387c478bd9Sstevel@tonic-gate 
29397c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
29407c478bd9Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
29417c478bd9Sstevel@tonic-gate 		count++;
29427c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
29437c478bd9Sstevel@tonic-gate 	return (count);
29447c478bd9Sstevel@tonic-gate }
29457c478bd9Sstevel@tonic-gate 
29467c478bd9Sstevel@tonic-gate /*
29477c478bd9Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
29487c478bd9Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
29497c478bd9Sstevel@tonic-gate  */
29507c478bd9Sstevel@tonic-gate void
29517c478bd9Sstevel@tonic-gate noenable(queue_t *q)
29527c478bd9Sstevel@tonic-gate {
29537c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29547c478bd9Sstevel@tonic-gate 	q->q_flag |= QNOENB;
29557c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29567c478bd9Sstevel@tonic-gate }
29577c478bd9Sstevel@tonic-gate 
29587c478bd9Sstevel@tonic-gate void
29597c478bd9Sstevel@tonic-gate enableok(queue_t *q)
29607c478bd9Sstevel@tonic-gate {
29617c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29627c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
29637c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29647c478bd9Sstevel@tonic-gate }
29657c478bd9Sstevel@tonic-gate 
29667c478bd9Sstevel@tonic-gate /*
29677c478bd9Sstevel@tonic-gate  * Set queue fields.
29687c478bd9Sstevel@tonic-gate  */
29697c478bd9Sstevel@tonic-gate int
29707c478bd9Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
29717c478bd9Sstevel@tonic-gate {
29727c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
29737c478bd9Sstevel@tonic-gate 	queue_t	*wrq;
29747c478bd9Sstevel@tonic-gate 	int error = 0;
29757c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
29767c478bd9Sstevel@tonic-gate 
29777c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
29787c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
29797c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
29807c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
29817c478bd9Sstevel@tonic-gate 	} else
29827c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
29837c478bd9Sstevel@tonic-gate 
29847c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
29857c478bd9Sstevel@tonic-gate 		error = EINVAL;
29867c478bd9Sstevel@tonic-gate 		goto done;
29877c478bd9Sstevel@tonic-gate 	}
29887c478bd9Sstevel@tonic-gate 	if (pri != 0) {
29897c478bd9Sstevel@tonic-gate 		int i;
29907c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
29917c478bd9Sstevel@tonic-gate 
29927c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
29937c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
29947c478bd9Sstevel@tonic-gate 			while (*qbpp)
29957c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
29967c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
29977c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
29987c478bd9Sstevel@tonic-gate 					error = EAGAIN;
29997c478bd9Sstevel@tonic-gate 					goto done;
30007c478bd9Sstevel@tonic-gate 				}
30017c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
30027c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
30037c478bd9Sstevel@tonic-gate 				q->q_nband++;
30047c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30057c478bd9Sstevel@tonic-gate 			}
30067c478bd9Sstevel@tonic-gate 		}
30077c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
30087c478bd9Sstevel@tonic-gate 		i = pri;
30097c478bd9Sstevel@tonic-gate 		while (--i)
30107c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
30117c478bd9Sstevel@tonic-gate 	}
30127c478bd9Sstevel@tonic-gate 	switch (what) {
30137c478bd9Sstevel@tonic-gate 
30147c478bd9Sstevel@tonic-gate 	case QHIWAT:
30157c478bd9Sstevel@tonic-gate 		if (qbp)
30167c478bd9Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
30177c478bd9Sstevel@tonic-gate 		else
30187c478bd9Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
30197c478bd9Sstevel@tonic-gate 		break;
30207c478bd9Sstevel@tonic-gate 
30217c478bd9Sstevel@tonic-gate 	case QLOWAT:
30227c478bd9Sstevel@tonic-gate 		if (qbp)
30237c478bd9Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
30247c478bd9Sstevel@tonic-gate 		else
30257c478bd9Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
30267c478bd9Sstevel@tonic-gate 		break;
30277c478bd9Sstevel@tonic-gate 
30287c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
30297c478bd9Sstevel@tonic-gate 		if (qbp)
30307c478bd9Sstevel@tonic-gate 			error = EINVAL;
30317c478bd9Sstevel@tonic-gate 		else
30327c478bd9Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
30337c478bd9Sstevel@tonic-gate 
30347c478bd9Sstevel@tonic-gate 		/*
30357c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30367c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30377c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30387c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30397c478bd9Sstevel@tonic-gate 		 */
30407c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30417c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30427c478bd9Sstevel@tonic-gate 			break;
30437c478bd9Sstevel@tonic-gate 
30447c478bd9Sstevel@tonic-gate 		/*
30457c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30467c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30477c478bd9Sstevel@tonic-gate 		 */
30487c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30497c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30507c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30517c478bd9Sstevel@tonic-gate 		}
30527c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
30537c478bd9Sstevel@tonic-gate 
30547c478bd9Sstevel@tonic-gate 		if (strmsgsz != 0) {
30557c478bd9Sstevel@tonic-gate 			if (val == INFPSZ)
30567c478bd9Sstevel@tonic-gate 				val = strmsgsz;
30577c478bd9Sstevel@tonic-gate 			else  {
30587c478bd9Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
30597c478bd9Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
30607c478bd9Sstevel@tonic-gate 				else
30617c478bd9Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
30627c478bd9Sstevel@tonic-gate 			}
30637c478bd9Sstevel@tonic-gate 		}
30647c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
30657c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30667c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30677c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
30687c478bd9Sstevel@tonic-gate 		}
30697c478bd9Sstevel@tonic-gate 		break;
30707c478bd9Sstevel@tonic-gate 
30717c478bd9Sstevel@tonic-gate 	case QMINPSZ:
30727c478bd9Sstevel@tonic-gate 		if (qbp)
30737c478bd9Sstevel@tonic-gate 			error = EINVAL;
30747c478bd9Sstevel@tonic-gate 		else
30757c478bd9Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
30767c478bd9Sstevel@tonic-gate 
30777c478bd9Sstevel@tonic-gate 		/*
30787c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30797c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30807c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30817c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30827c478bd9Sstevel@tonic-gate 		 */
30837c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30847c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30857c478bd9Sstevel@tonic-gate 			break;
30867c478bd9Sstevel@tonic-gate 
30877c478bd9Sstevel@tonic-gate 		/*
30887c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30897c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30907c478bd9Sstevel@tonic-gate 		 */
30917c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30927c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30937c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30947c478bd9Sstevel@tonic-gate 		}
30957c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
30967c478bd9Sstevel@tonic-gate 
30977c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30987c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30997c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
31007c478bd9Sstevel@tonic-gate 		}
31017c478bd9Sstevel@tonic-gate 		break;
31027c478bd9Sstevel@tonic-gate 
31037c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
31047c478bd9Sstevel@tonic-gate 		if (qbp)
31057c478bd9Sstevel@tonic-gate 			error = EINVAL;
31067c478bd9Sstevel@tonic-gate 		else
31077c478bd9Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
31087c478bd9Sstevel@tonic-gate 		break;
31097c478bd9Sstevel@tonic-gate 
31107c478bd9Sstevel@tonic-gate 	case QCOUNT:
31117c478bd9Sstevel@tonic-gate 	case QFIRST:
31127c478bd9Sstevel@tonic-gate 	case QLAST:
31137c478bd9Sstevel@tonic-gate 	case QFLAG:
31147c478bd9Sstevel@tonic-gate 		error = EPERM;
31157c478bd9Sstevel@tonic-gate 		break;
31167c478bd9Sstevel@tonic-gate 
31177c478bd9Sstevel@tonic-gate 	default:
31187c478bd9Sstevel@tonic-gate 		error = EINVAL;
31197c478bd9Sstevel@tonic-gate 		break;
31207c478bd9Sstevel@tonic-gate 	}
31217c478bd9Sstevel@tonic-gate done:
31227c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
31237c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
31247c478bd9Sstevel@tonic-gate 	return (error);
31257c478bd9Sstevel@tonic-gate }
31267c478bd9Sstevel@tonic-gate 
31277c478bd9Sstevel@tonic-gate /*
31287c478bd9Sstevel@tonic-gate  * Get queue fields.
31297c478bd9Sstevel@tonic-gate  */
31307c478bd9Sstevel@tonic-gate int
31317c478bd9Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
31327c478bd9Sstevel@tonic-gate {
31337c478bd9Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
31347c478bd9Sstevel@tonic-gate 	int 		error = 0;
31357c478bd9Sstevel@tonic-gate 	kthread_id_t 	freezer;
31367c478bd9Sstevel@tonic-gate 
31377c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
31387c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
31397c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
31407c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
31417c478bd9Sstevel@tonic-gate 	} else
31427c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
31437c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
31447c478bd9Sstevel@tonic-gate 		error = EINVAL;
31457c478bd9Sstevel@tonic-gate 		goto done;
31467c478bd9Sstevel@tonic-gate 	}
31477c478bd9Sstevel@tonic-gate 	if (pri != 0) {
31487c478bd9Sstevel@tonic-gate 		int i;
31497c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
31507c478bd9Sstevel@tonic-gate 
31517c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
31527c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
31537c478bd9Sstevel@tonic-gate 			while (*qbpp)
31547c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31557c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
31567c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
31577c478bd9Sstevel@tonic-gate 					error = EAGAIN;
31587c478bd9Sstevel@tonic-gate 					goto done;
31597c478bd9Sstevel@tonic-gate 				}
31607c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
31617c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
31627c478bd9Sstevel@tonic-gate 				q->q_nband++;
31637c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31647c478bd9Sstevel@tonic-gate 			}
31657c478bd9Sstevel@tonic-gate 		}
31667c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
31677c478bd9Sstevel@tonic-gate 		i = pri;
31687c478bd9Sstevel@tonic-gate 		while (--i)
31697c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
31707c478bd9Sstevel@tonic-gate 	}
31717c478bd9Sstevel@tonic-gate 	switch (what) {
31727c478bd9Sstevel@tonic-gate 	case QHIWAT:
31737c478bd9Sstevel@tonic-gate 		if (qbp)
31747c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
31757c478bd9Sstevel@tonic-gate 		else
31767c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
31777c478bd9Sstevel@tonic-gate 		break;
31787c478bd9Sstevel@tonic-gate 
31797c478bd9Sstevel@tonic-gate 	case QLOWAT:
31807c478bd9Sstevel@tonic-gate 		if (qbp)
31817c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
31827c478bd9Sstevel@tonic-gate 		else
31837c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
31847c478bd9Sstevel@tonic-gate 		break;
31857c478bd9Sstevel@tonic-gate 
31867c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
31877c478bd9Sstevel@tonic-gate 		if (qbp)
31887c478bd9Sstevel@tonic-gate 			error = EINVAL;
31897c478bd9Sstevel@tonic-gate 		else
31907c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
31917c478bd9Sstevel@tonic-gate 		break;
31927c478bd9Sstevel@tonic-gate 
31937c478bd9Sstevel@tonic-gate 	case QMINPSZ:
31947c478bd9Sstevel@tonic-gate 		if (qbp)
31957c478bd9Sstevel@tonic-gate 			error = EINVAL;
31967c478bd9Sstevel@tonic-gate 		else
31977c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
31987c478bd9Sstevel@tonic-gate 		break;
31997c478bd9Sstevel@tonic-gate 
32007c478bd9Sstevel@tonic-gate 	case QCOUNT:
32017c478bd9Sstevel@tonic-gate 		if (qbp)
32027c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
32037c478bd9Sstevel@tonic-gate 		else
32047c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
32057c478bd9Sstevel@tonic-gate 		break;
32067c478bd9Sstevel@tonic-gate 
32077c478bd9Sstevel@tonic-gate 	case QFIRST:
32087c478bd9Sstevel@tonic-gate 		if (qbp)
32097c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
32107c478bd9Sstevel@tonic-gate 		else
32117c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
32127c478bd9Sstevel@tonic-gate 		break;
32137c478bd9Sstevel@tonic-gate 
32147c478bd9Sstevel@tonic-gate 	case QLAST:
32157c478bd9Sstevel@tonic-gate 		if (qbp)
32167c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
32177c478bd9Sstevel@tonic-gate 		else
32187c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
32197c478bd9Sstevel@tonic-gate 		break;
32207c478bd9Sstevel@tonic-gate 
32217c478bd9Sstevel@tonic-gate 	case QFLAG:
32227c478bd9Sstevel@tonic-gate 		if (qbp)
32237c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
32247c478bd9Sstevel@tonic-gate 		else
32257c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
32267c478bd9Sstevel@tonic-gate 		break;
32277c478bd9Sstevel@tonic-gate 
32287c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
32297c478bd9Sstevel@tonic-gate 		if (qbp)
32307c478bd9Sstevel@tonic-gate 			error = EINVAL;
32317c478bd9Sstevel@tonic-gate 		else
32327c478bd9Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
32337c478bd9Sstevel@tonic-gate 		break;
32347c478bd9Sstevel@tonic-gate 
32357c478bd9Sstevel@tonic-gate 	default:
32367c478bd9Sstevel@tonic-gate 		error = EINVAL;
32377c478bd9Sstevel@tonic-gate 		break;
32387c478bd9Sstevel@tonic-gate 	}
32397c478bd9Sstevel@tonic-gate done:
32407c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
32417c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
32427c478bd9Sstevel@tonic-gate 	return (error);
32437c478bd9Sstevel@tonic-gate }
32447c478bd9Sstevel@tonic-gate 
32457c478bd9Sstevel@tonic-gate /*
32467c478bd9Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
32477c478bd9Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
32487c478bd9Sstevel@tonic-gate  *
32497c478bd9Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
32507c478bd9Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
32517c478bd9Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
32527c478bd9Sstevel@tonic-gate  */
32537c478bd9Sstevel@tonic-gate void
32547c478bd9Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
32557c478bd9Sstevel@tonic-gate {
32567c478bd9Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
32577c478bd9Sstevel@tonic-gate 	pollhead_t 	*pl;
32587c478bd9Sstevel@tonic-gate 
32597c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
32607c478bd9Sstevel@tonic-gate 	pl = &stp->sd_pollist;
32617c478bd9Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
32627c478bd9Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
32637c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32647c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32657c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32667c478bd9Sstevel@tonic-gate 		} else {
32677c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
32687c478bd9Sstevel@tonic-gate 		}
32697c478bd9Sstevel@tonic-gate 
32707c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32717c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
32727c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32737c478bd9Sstevel@tonic-gate 
32747c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
32757c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
32767c478bd9Sstevel@tonic-gate 	} else if (flag & QWANTR) {
32777c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
32787c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
32797c478bd9Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
32807c478bd9Sstevel@tonic-gate 		} else {
32817c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
32827c478bd9Sstevel@tonic-gate 		}
32837c478bd9Sstevel@tonic-gate 
32847c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32857c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
32867c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32877c478bd9Sstevel@tonic-gate 
32887c478bd9Sstevel@tonic-gate 		{
32897c478bd9Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
32907c478bd9Sstevel@tonic-gate 
32917c478bd9Sstevel@tonic-gate 			if (events)
32927c478bd9Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
32937c478bd9Sstevel@tonic-gate 		}
32947c478bd9Sstevel@tonic-gate 	} else {
32957c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32967c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32977c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32987c478bd9Sstevel@tonic-gate 		}
32997c478bd9Sstevel@tonic-gate 
33007c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
33017c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
33027c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
33037c478bd9Sstevel@tonic-gate 
33047c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
33057c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
33067c478bd9Sstevel@tonic-gate 	}
33077c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
33087c478bd9Sstevel@tonic-gate }
33097c478bd9Sstevel@tonic-gate 
33107c478bd9Sstevel@tonic-gate int
33117c478bd9Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
33127c478bd9Sstevel@tonic-gate {
33137c478bd9Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
33147c478bd9Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
33157c478bd9Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
33167c478bd9Sstevel@tonic-gate 	dblk_t	 *dbp;
33177c478bd9Sstevel@tonic-gate 	ssize_t	 uiocnt;
33187c478bd9Sstevel@tonic-gate 	ssize_t	 cnt;
33197c478bd9Sstevel@tonic-gate 	unsigned char *ptr;
33207c478bd9Sstevel@tonic-gate 	ssize_t	 resid;
33217c478bd9Sstevel@tonic-gate 	int	 error = 0;
33227c478bd9Sstevel@tonic-gate 	on_trap_data_t otd;
33237c478bd9Sstevel@tonic-gate 	queue_t	*stwrq;
33247c478bd9Sstevel@tonic-gate 
33257c478bd9Sstevel@tonic-gate 	/*
33267c478bd9Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
33277c478bd9Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
33287c478bd9Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
33297c478bd9Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
33307c478bd9Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
33317c478bd9Sstevel@tonic-gate 	 * processing the message the way it would have been processed
33327c478bd9Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
33337c478bd9Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
33347c478bd9Sstevel@tonic-gate 	 *
33357c478bd9Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
33367c478bd9Sstevel@tonic-gate 	 * very hot.
33377c478bd9Sstevel@tonic-gate 	 */
33387c478bd9Sstevel@tonic-gate 
33397c478bd9Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
33407c478bd9Sstevel@tonic-gate 	if (stwrq)
33417c478bd9Sstevel@tonic-gate 		typ = stwrq->q_struiot;
33427c478bd9Sstevel@tonic-gate 
33437c478bd9Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
33447c478bd9Sstevel@tonic-gate 		dbp = mp->b_datap;
33457c478bd9Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
33467c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33477c478bd9Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
33487c478bd9Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
33497c478bd9Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
33507c478bd9Sstevel@tonic-gate 			/*
33517c478bd9Sstevel@tonic-gate 			 * Either this mblk has already been processed
33527c478bd9Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
33537c478bd9Sstevel@tonic-gate 			 */
33547c478bd9Sstevel@tonic-gate 			continue;
33557c478bd9Sstevel@tonic-gate 		}
33567c478bd9Sstevel@tonic-gate 		switch (typ) {
33577c478bd9Sstevel@tonic-gate 		case STRUIOT_STANDARD:
33587c478bd9Sstevel@tonic-gate 			if (noblock) {
33597c478bd9Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
33607c478bd9Sstevel@tonic-gate 					no_trap();
33617c478bd9Sstevel@tonic-gate 					error = EWOULDBLOCK;
33627c478bd9Sstevel@tonic-gate 					goto out;
33637c478bd9Sstevel@tonic-gate 				}
33647c478bd9Sstevel@tonic-gate 			}
33657c478bd9Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
33667c478bd9Sstevel@tonic-gate 				if (noblock)
33677c478bd9Sstevel@tonic-gate 					no_trap();
33687c478bd9Sstevel@tonic-gate 				goto out;
33697c478bd9Sstevel@tonic-gate 			}
33707c478bd9Sstevel@tonic-gate 			if (noblock)
33717c478bd9Sstevel@tonic-gate 				no_trap();
33727c478bd9Sstevel@tonic-gate 			break;
33737c478bd9Sstevel@tonic-gate 
33747c478bd9Sstevel@tonic-gate 		default:
33757c478bd9Sstevel@tonic-gate 			error = EIO;
33767c478bd9Sstevel@tonic-gate 			goto out;
33777c478bd9Sstevel@tonic-gate 		}
33787c478bd9Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
33797c478bd9Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
33807c478bd9Sstevel@tonic-gate 	}
33817c478bd9Sstevel@tonic-gate out:
33827c478bd9Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
33837c478bd9Sstevel@tonic-gate 		/*
33847c478bd9Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
33857c478bd9Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
33867c478bd9Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
33877c478bd9Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
33887c478bd9Sstevel@tonic-gate 		 * again (after the fault has been handled).
33897c478bd9Sstevel@tonic-gate 		 */
33907c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33917c478bd9Sstevel@tonic-gate 		if (uiocnt >= resid)
33927c478bd9Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
33937c478bd9Sstevel@tonic-gate 	}
33947c478bd9Sstevel@tonic-gate 	return (error);
33957c478bd9Sstevel@tonic-gate }
33967c478bd9Sstevel@tonic-gate 
33977c478bd9Sstevel@tonic-gate /*
33987c478bd9Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
33997c478bd9Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
34007c478bd9Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
34017c478bd9Sstevel@tonic-gate  * queue.
34027c478bd9Sstevel@tonic-gate  */
34037c478bd9Sstevel@tonic-gate static boolean_t
34047c478bd9Sstevel@tonic-gate rwnext_enter(queue_t *qp)
34057c478bd9Sstevel@tonic-gate {
34067c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34077c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
34087c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
34097c478bd9Sstevel@tonic-gate 		return (B_FALSE);
34107c478bd9Sstevel@tonic-gate 	}
34117c478bd9Sstevel@tonic-gate 	qp->q_rwcnt++;
34127c478bd9Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
34137c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34147c478bd9Sstevel@tonic-gate 	return (B_TRUE);
34157c478bd9Sstevel@tonic-gate }
34167c478bd9Sstevel@tonic-gate 
34177c478bd9Sstevel@tonic-gate /*
34187c478bd9Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
34197c478bd9Sstevel@tonic-gate  * threads blocked in removeq().
34207c478bd9Sstevel@tonic-gate  */
34217c478bd9Sstevel@tonic-gate static void
34227c478bd9Sstevel@tonic-gate rwnext_exit(queue_t *qp)
34237c478bd9Sstevel@tonic-gate {
34247c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34257c478bd9Sstevel@tonic-gate 	qp->q_rwcnt--;
34267c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
34277c478bd9Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
34287c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
34297c478bd9Sstevel@tonic-gate 	}
34307c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34317c478bd9Sstevel@tonic-gate }
34327c478bd9Sstevel@tonic-gate 
34337c478bd9Sstevel@tonic-gate /*
34347c478bd9Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
34357c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
34367c478bd9Sstevel@tonic-gate  *
34377c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
34387c478bd9Sstevel@tonic-gate  *
34397c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
34407c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
34417c478bd9Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
34427c478bd9Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
34437c478bd9Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
34447c478bd9Sstevel@tonic-gate  *
34457c478bd9Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
34467c478bd9Sstevel@tonic-gate  */
34477c478bd9Sstevel@tonic-gate int
34487c478bd9Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
34497c478bd9Sstevel@tonic-gate {
34507c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
34517c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
34527c478bd9Sstevel@tonic-gate 	uint16_t	count;
34537c478bd9Sstevel@tonic-gate 	uint16_t	flags;
34547c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
34557c478bd9Sstevel@tonic-gate 	int		(*proc)();
34567c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
34577c478bd9Sstevel@tonic-gate 	int		isread;
34587c478bd9Sstevel@tonic-gate 	int		rval;
34597c478bd9Sstevel@tonic-gate 
34607c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
34617c478bd9Sstevel@tonic-gate 	/*
34627c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
34637c478bd9Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
34647c478bd9Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
34657c478bd9Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
34667c478bd9Sstevel@tonic-gate 	 *
34677c478bd9Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
34687c478bd9Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
34697c478bd9Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
34707c478bd9Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
34717c478bd9Sstevel@tonic-gate 	 */
34727c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
34737c478bd9Sstevel@tonic-gate 		isread = 0;
34747c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
34757c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
34767c478bd9Sstevel@tonic-gate 	} else {
34777c478bd9Sstevel@tonic-gate 		isread = 1;
34787c478bd9Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
34797c478bd9Sstevel@tonic-gate 			/* Not streamhead */
34807c478bd9Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
34817c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
34827c478bd9Sstevel@tonic-gate 	}
34837c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
34847c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
34857c478bd9Sstevel@tonic-gate 		/*
34867c478bd9Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
34877c478bd9Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
34887c478bd9Sstevel@tonic-gate 		 */
34897c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
34907c478bd9Sstevel@tonic-gate 		return (EINVAL);
34917c478bd9Sstevel@tonic-gate 	}
34927c478bd9Sstevel@tonic-gate 
34937c478bd9Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
34947c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
34957c478bd9Sstevel@tonic-gate 		return (EINVAL);
34967c478bd9Sstevel@tonic-gate 	}
34977c478bd9Sstevel@tonic-gate 
34987c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
34997c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35007c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
35017c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
35027c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
35037c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
35047c478bd9Sstevel@tonic-gate 
35057c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
35067c478bd9Sstevel@tonic-gate 		/*
35077c478bd9Sstevel@tonic-gate 		 * if this queue is being closed, return.
35087c478bd9Sstevel@tonic-gate 		 */
35097c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
35107c478bd9Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
35117c478bd9Sstevel@tonic-gate 			rwnext_exit(qp);
35127c478bd9Sstevel@tonic-gate 			return (EINVAL);
35137c478bd9Sstevel@tonic-gate 		}
35147c478bd9Sstevel@tonic-gate 
35157c478bd9Sstevel@tonic-gate 		/*
35167c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
35177c478bd9Sstevel@tonic-gate 		 */
35187c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
35197c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
35207c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
35217c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
35227c478bd9Sstevel@tonic-gate 	}
35237c478bd9Sstevel@tonic-gate 
35247c478bd9Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
35257c478bd9Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
35267c478bd9Sstevel@tonic-gate 		/*
35277c478bd9Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
35287c478bd9Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
35297c478bd9Sstevel@tonic-gate 		 */
35307c478bd9Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
35317c478bd9Sstevel@tonic-gate 		rwnext_exit(qp);
35327c478bd9Sstevel@tonic-gate 		return (EINVAL);
35337c478bd9Sstevel@tonic-gate 	}
35347c478bd9Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
35357c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
35367c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
35377c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
35387c478bd9Sstevel@tonic-gate 	/*
35397c478bd9Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
35407c478bd9Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
35417c478bd9Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
35427c478bd9Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
35437c478bd9Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
35447c478bd9Sstevel@tonic-gate 	 *	 the most efficent for all cases.
35457c478bd9Sstevel@tonic-gate 	 */
35467c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
35477c478bd9Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
35487c478bd9Sstevel@tonic-gate 		/*
35497c478bd9Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
35507c478bd9Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
35517c478bd9Sstevel@tonic-gate 		 */
35527c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
35537c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
35547c478bd9Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
35557c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
35567c478bd9Sstevel@tonic-gate 			rval = EWOULDBLOCK;
35577c478bd9Sstevel@tonic-gate 			goto out;
35587c478bd9Sstevel@tonic-gate 		}
35597c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
35607c478bd9Sstevel@tonic-gate 	}
35617c478bd9Sstevel@tonic-gate 
35627c478bd9Sstevel@tonic-gate 	if (! isread && dp->d_mp)
35637c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
35647c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
35657c478bd9Sstevel@tonic-gate 
35667c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
35677c478bd9Sstevel@tonic-gate 
35687c478bd9Sstevel@tonic-gate 	if (isread && dp->d_mp)
35697c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
35707c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
35717c478bd9Sstevel@tonic-gate out:
35727c478bd9Sstevel@tonic-gate 	/*
35737c478bd9Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
35747c478bd9Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
35757c478bd9Sstevel@tonic-gate 	 */
35767c478bd9Sstevel@tonic-gate 	rwnext_exit(qp);
35777c478bd9Sstevel@tonic-gate 
35787c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35797c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
35807c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
35817c478bd9Sstevel@tonic-gate 	sq->sq_count--;
35827c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
35837c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
35847c478bd9Sstevel@tonic-gate 		/*
35857c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
35867c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
35877c478bd9Sstevel@tonic-gate 		 */
35887c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
35897c478bd9Sstevel@tonic-gate 		return (rval);
35907c478bd9Sstevel@tonic-gate 	}
35917c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
35927c478bd9Sstevel@tonic-gate 	/*
35937c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
35947c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
35957c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
35967c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
35977c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
35987c478bd9Sstevel@tonic-gate 	 *
35997c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
36007c478bd9Sstevel@tonic-gate 	 *
36017c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
36027c478bd9Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
36037c478bd9Sstevel@tonic-gate 	 *
36047c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
36057c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
36067c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
36077c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
36087c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
36097c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
36107c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
36117c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
36127c478bd9Sstevel@tonic-gate 	 * test invalid.
36137c478bd9Sstevel@tonic-gate 	 */
36147c478bd9Sstevel@tonic-gate 
36157c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
36167c478bd9Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
36177c478bd9Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
36187c478bd9Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
36197c478bd9Sstevel@tonic-gate 	}
36207c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36217c478bd9Sstevel@tonic-gate 	return (rval);
36227c478bd9Sstevel@tonic-gate }
36237c478bd9Sstevel@tonic-gate 
36247c478bd9Sstevel@tonic-gate /*
36257c478bd9Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
36267c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
36277c478bd9Sstevel@tonic-gate  *
36287c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
36297c478bd9Sstevel@tonic-gate  *
36307c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
36317c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
36327c478bd9Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
36337c478bd9Sstevel@tonic-gate  * entered.
36347c478bd9Sstevel@tonic-gate  */
36357c478bd9Sstevel@tonic-gate int
36367c478bd9Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
36377c478bd9Sstevel@tonic-gate {
36387c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
36397c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
36407c478bd9Sstevel@tonic-gate 	uint16_t	count;
36417c478bd9Sstevel@tonic-gate 	uint16_t 	flags;
36427c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
36437c478bd9Sstevel@tonic-gate 	int		(*proc)();
36447c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
36457c478bd9Sstevel@tonic-gate 	int		rval;
36467c478bd9Sstevel@tonic-gate 
36477c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
36487c478bd9Sstevel@tonic-gate 	/*
36497c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
36507c478bd9Sstevel@tonic-gate 	 * acquiring SQLOCK.
36517c478bd9Sstevel@tonic-gate 	 */
36527c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
36537c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
36547c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
36557c478bd9Sstevel@tonic-gate 	} else {
36567c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
36577c478bd9Sstevel@tonic-gate 	}
36587c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
36597c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
36607c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
36617c478bd9Sstevel@tonic-gate 		return (EINVAL);
36627c478bd9Sstevel@tonic-gate 	}
36637c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
36647c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36657c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
36667c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
36677c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36687c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
36697c478bd9Sstevel@tonic-gate 
36707c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
36717c478bd9Sstevel@tonic-gate 		/*
36727c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
36737c478bd9Sstevel@tonic-gate 		 */
36747c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
36757c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
36767c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
36777c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
36787c478bd9Sstevel@tonic-gate 	}
36797c478bd9Sstevel@tonic-gate 
36807c478bd9Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
36817c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
36827c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
36837c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
36847c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
36877c478bd9Sstevel@tonic-gate 
36887c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36897c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36907c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
36917c478bd9Sstevel@tonic-gate 	sq->sq_count--;
36927c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
36937c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
36947c478bd9Sstevel@tonic-gate 		/*
36957c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
36967c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
36977c478bd9Sstevel@tonic-gate 		 */
36987c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
36997c478bd9Sstevel@tonic-gate 		return (rval);
37007c478bd9Sstevel@tonic-gate 	}
37017c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
37027c478bd9Sstevel@tonic-gate /*
37037c478bd9Sstevel@tonic-gate  * XXXX
37047c478bd9Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
37057c478bd9Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
37067c478bd9Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
37077c478bd9Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
37087c478bd9Sstevel@tonic-gate  */
37097c478bd9Sstevel@tonic-gate 	/*
37107c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
37117c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
37127c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
37137c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
37147c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
37157c478bd9Sstevel@tonic-gate 	 *
37167c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
37177c478bd9Sstevel@tonic-gate 	 *
37187c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
37197c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
37207c478bd9Sstevel@tonic-gate 	 *
37217c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
37227c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
37237c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
37247c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
37257c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
37267c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
37277c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
37287c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
37297c478bd9Sstevel@tonic-gate 	 * test invalid.
37307c478bd9Sstevel@tonic-gate 	 */
37317c478bd9Sstevel@tonic-gate 
37327c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
37337c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
37347c478bd9Sstevel@tonic-gate 	return (rval);
37357c478bd9Sstevel@tonic-gate }
37367c478bd9Sstevel@tonic-gate 
37377c478bd9Sstevel@tonic-gate /*
37387c478bd9Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
37397c478bd9Sstevel@tonic-gate  */
37407c478bd9Sstevel@tonic-gate int
37417c478bd9Sstevel@tonic-gate isuioq(queue_t *q)
37427c478bd9Sstevel@tonic-gate {
37437c478bd9Sstevel@tonic-gate 	if (q->q_flag & QREADR)
37447c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
37457c478bd9Sstevel@tonic-gate 	else
37467c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
37477c478bd9Sstevel@tonic-gate }
37487c478bd9Sstevel@tonic-gate 
37497c478bd9Sstevel@tonic-gate #if defined(__sparc)
37507c478bd9Sstevel@tonic-gate int disable_putlocks = 0;
37517c478bd9Sstevel@tonic-gate #else
37527c478bd9Sstevel@tonic-gate int disable_putlocks = 1;
37537c478bd9Sstevel@tonic-gate #endif
37547c478bd9Sstevel@tonic-gate 
37557c478bd9Sstevel@tonic-gate /*
37567c478bd9Sstevel@tonic-gate  * called by create_putlock.
37577c478bd9Sstevel@tonic-gate  */
37587c478bd9Sstevel@tonic-gate static void
37597c478bd9Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
37607c478bd9Sstevel@tonic-gate {
37617c478bd9Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
37627c478bd9Sstevel@tonic-gate 	ciputctrl_t *cip;
37637c478bd9Sstevel@tonic-gate 	int i;
37647c478bd9Sstevel@tonic-gate 
37657c478bd9Sstevel@tonic-gate 	ASSERT(sq != NULL);
37667c478bd9Sstevel@tonic-gate 
37677c478bd9Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
37687c478bd9Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
37697c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
37707c478bd9Sstevel@tonic-gate 
37717c478bd9Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
37727c478bd9Sstevel@tonic-gate 		return;
37737c478bd9Sstevel@tonic-gate 
37747c478bd9Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
37757c478bd9Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
37767c478bd9Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
37777c478bd9Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
37787c478bd9Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
37797c478bd9Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
37807c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
37817c478bd9Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
37827c478bd9Sstevel@tonic-gate 			} else {
37837c478bd9Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
37847c478bd9Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
37857c478bd9Sstevel@tonic-gate 				/*
37867c478bd9Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
37877c478bd9Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
37887c478bd9Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
37897c478bd9Sstevel@tonic-gate 				 * insures that.
37907c478bd9Sstevel@tonic-gate 				 */
37917c478bd9Sstevel@tonic-gate 				membar_producer();
37927c478bd9Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
37937c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
37947c478bd9Sstevel@tonic-gate 			}
37957c478bd9Sstevel@tonic-gate 		}
37967c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
37977c478bd9Sstevel@tonic-gate 		if (i == 1)
37987c478bd9Sstevel@tonic-gate 			break;
37997c478bd9Sstevel@tonic-gate 		q = _OTHERQ(q);
38007c478bd9Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
38017c478bd9Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
38027c478bd9Sstevel@tonic-gate 			break;
38037c478bd9Sstevel@tonic-gate 		}
38047c478bd9Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
38057c478bd9Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
38067c478bd9Sstevel@tonic-gate 		sq = q->q_syncq;
38077c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
38087c478bd9Sstevel@tonic-gate 	}
38097c478bd9Sstevel@tonic-gate }
38107c478bd9Sstevel@tonic-gate 
38117c478bd9Sstevel@tonic-gate /*
38127c478bd9Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
38137c478bd9Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
38147c478bd9Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
38157c478bd9Sstevel@tonic-gate  * starting from q and down to the driver.
38167c478bd9Sstevel@tonic-gate  *
38177c478bd9Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
38187c478bd9Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
38197c478bd9Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
38207c478bd9Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
38217c478bd9Sstevel@tonic-gate  * call.
38227c478bd9Sstevel@tonic-gate  *
38237c478bd9Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
38247c478bd9Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
38257c478bd9Sstevel@tonic-gate  * (e.g. IP).
38267c478bd9Sstevel@tonic-gate  *
38277c478bd9Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
38287c478bd9Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
38297c478bd9Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
38307c478bd9Sstevel@tonic-gate  * particularly MT hot.
38317c478bd9Sstevel@tonic-gate  *
38327c478bd9Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
38337c478bd9Sstevel@tonic-gate  * q_next can be safely used.
38347c478bd9Sstevel@tonic-gate  */
38357c478bd9Sstevel@tonic-gate 
38367c478bd9Sstevel@tonic-gate void
38377c478bd9Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
38387c478bd9Sstevel@tonic-gate {
38397c478bd9Sstevel@tonic-gate 	ciputctrl_t	*cip;
38407c478bd9Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
38417c478bd9Sstevel@tonic-gate 
38427c478bd9Sstevel@tonic-gate 	q = _WR(q);
38437c478bd9Sstevel@tonic-gate 	ASSERT(stp != NULL);
38447c478bd9Sstevel@tonic-gate 
38457c478bd9Sstevel@tonic-gate 	if (disable_putlocks != 0)
38467c478bd9Sstevel@tonic-gate 		return;
38477c478bd9Sstevel@tonic-gate 
38487c478bd9Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
38497c478bd9Sstevel@tonic-gate 		return;
38507c478bd9Sstevel@tonic-gate 
38517c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
38527c478bd9Sstevel@tonic-gate 
38537c478bd9Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
38547c478bd9Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
38557c478bd9Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
38567c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
38577c478bd9Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
38587c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38597c478bd9Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
38607c478bd9Sstevel@tonic-gate 		} else {
38617c478bd9Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
38627c478bd9Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
38637c478bd9Sstevel@tonic-gate 			/*
38647c478bd9Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
38657c478bd9Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
38667c478bd9Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
38677c478bd9Sstevel@tonic-gate 			 * insures that.
38687c478bd9Sstevel@tonic-gate 			 */
38697c478bd9Sstevel@tonic-gate 			membar_producer();
38707c478bd9Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
38717c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38727c478bd9Sstevel@tonic-gate 		}
38737c478bd9Sstevel@tonic-gate 	}
38747c478bd9Sstevel@tonic-gate 
38757c478bd9Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
38767c478bd9Sstevel@tonic-gate 
38777c478bd9Sstevel@tonic-gate 	while (_SAMESTR(q)) {
38787c478bd9Sstevel@tonic-gate 		create_syncq_putlocks(q);
38797c478bd9Sstevel@tonic-gate 		if (stream == 0)
38807c478bd9Sstevel@tonic-gate 			return;
38817c478bd9Sstevel@tonic-gate 		q = q->q_next;
38827c478bd9Sstevel@tonic-gate 	}
38837c478bd9Sstevel@tonic-gate 	ASSERT(q != NULL);
38847c478bd9Sstevel@tonic-gate 	create_syncq_putlocks(q);
38857c478bd9Sstevel@tonic-gate }
38867c478bd9Sstevel@tonic-gate 
38877c478bd9Sstevel@tonic-gate /*
38887c478bd9Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
38897c478bd9Sstevel@tonic-gate  * through a stream.
38907c478bd9Sstevel@tonic-gate  *
38917c478bd9Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
38927c478bd9Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
38937c478bd9Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
38947c478bd9Sstevel@tonic-gate  * by STREAMS modules and drivers.
38957c478bd9Sstevel@tonic-gate  *
38967c478bd9Sstevel@tonic-gate  * Global objects:
38977c478bd9Sstevel@tonic-gate  *
38987c478bd9Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
38997c478bd9Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
39007c478bd9Sstevel@tonic-gate  *
39017c478bd9Sstevel@tonic-gate  * Local objects:
39027c478bd9Sstevel@tonic-gate  *
39037c478bd9Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
39047c478bd9Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
39057c478bd9Sstevel@tonic-gate  */
39067c478bd9Sstevel@tonic-gate 
39077c478bd9Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
39087c478bd9Sstevel@tonic-gate 
39097c478bd9Sstevel@tonic-gate void
39107c478bd9Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
39117c478bd9Sstevel@tonic-gate {
39127c478bd9Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
39137c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
39147c478bd9Sstevel@tonic-gate 	ftevnt_t *ep;
39157c478bd9Sstevel@tonic-gate 	int ix, nix;
39167c478bd9Sstevel@tonic-gate 
39177c478bd9Sstevel@tonic-gate 	ASSERT(hp != NULL);
39187c478bd9Sstevel@tonic-gate 
39197c478bd9Sstevel@tonic-gate 	for (;;) {
39207c478bd9Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
39217c478bd9Sstevel@tonic-gate 			/*
39227c478bd9Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
39237c478bd9Sstevel@tonic-gate 			 *
39247c478bd9Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
39257c478bd9Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
39267c478bd9Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
39277c478bd9Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
39287c478bd9Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
39297c478bd9Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
39307c478bd9Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
39317c478bd9Sstevel@tonic-gate 			 * got here first, so free the block and start
39327c478bd9Sstevel@tonic-gate 			 * again.
39337c478bd9Sstevel@tonic-gate 			 */
39347c478bd9Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
39357c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP))) {
39367c478bd9Sstevel@tonic-gate 				/* no mem, so punt */
39377c478bd9Sstevel@tonic-gate 				str_ftnever++;
39387c478bd9Sstevel@tonic-gate 				/* free up all flow data? */
39397c478bd9Sstevel@tonic-gate 				return;
39407c478bd9Sstevel@tonic-gate 			}
39417c478bd9Sstevel@tonic-gate 			nbp->nxt = NULL;
39427c478bd9Sstevel@tonic-gate 			nbp->ix = 1;
39437c478bd9Sstevel@tonic-gate 			/*
39447c478bd9Sstevel@tonic-gate 			 * Just in case there is another thread about
39457c478bd9Sstevel@tonic-gate 			 * to get the next index, we need to make sure
39467c478bd9Sstevel@tonic-gate 			 * the value is there for it.
39477c478bd9Sstevel@tonic-gate 			 */
39487c478bd9Sstevel@tonic-gate 			membar_producer();
39497c478bd9Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
39507c478bd9Sstevel@tonic-gate 				/* CAS was successful */
39517c478bd9Sstevel@tonic-gate 				bp->nxt = nbp;
39527c478bd9Sstevel@tonic-gate 				membar_producer();
39537c478bd9Sstevel@tonic-gate 				bp = nbp;
39547c478bd9Sstevel@tonic-gate 				ix = 0;
39557c478bd9Sstevel@tonic-gate 				goto cas_good;
39567c478bd9Sstevel@tonic-gate 			} else {
39577c478bd9Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
39587c478bd9Sstevel@tonic-gate 				bp = hp->tail;
39597c478bd9Sstevel@tonic-gate 				continue;
39607c478bd9Sstevel@tonic-gate 			}
39617c478bd9Sstevel@tonic-gate 		}
39627c478bd9Sstevel@tonic-gate 		nix = ix + 1;
39637c478bd9Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
39647c478bd9Sstevel@tonic-gate 		cas_good:
39657c478bd9Sstevel@tonic-gate 			if (curthread != hp->thread) {
39667c478bd9Sstevel@tonic-gate 				hp->thread = curthread;
39677c478bd9Sstevel@tonic-gate 				evnt |= FTEV_CS;
39687c478bd9Sstevel@tonic-gate 			}
39697c478bd9Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
39707c478bd9Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
39717c478bd9Sstevel@tonic-gate 				evnt |= FTEV_PS;
39727c478bd9Sstevel@tonic-gate 			}
39737c478bd9Sstevel@tonic-gate 			ep = &bp->ev[ix];
39747c478bd9Sstevel@tonic-gate 			break;
39757c478bd9Sstevel@tonic-gate 		}
39767c478bd9Sstevel@tonic-gate 	}
39777c478bd9Sstevel@tonic-gate 
39787c478bd9Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
39797c478bd9Sstevel@tonic-gate 		queue_t *qp = p;
39807c478bd9Sstevel@tonic-gate 
39817c478bd9Sstevel@tonic-gate 		/*
39827c478bd9Sstevel@tonic-gate 		 * It is possible that the module info is broke
39837c478bd9Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
39847c478bd9Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
39857c478bd9Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
39867c478bd9Sstevel@tonic-gate 		 */
39877c478bd9Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
39887c478bd9Sstevel@tonic-gate 			ep->mid = "NONAME";
39897c478bd9Sstevel@tonic-gate 		else
39907c478bd9Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
39917c478bd9Sstevel@tonic-gate 
39927c478bd9Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
39937c478bd9Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
39947c478bd9Sstevel@tonic-gate 	} else {
39957c478bd9Sstevel@tonic-gate 		ep->mid = (char *)p;
39967c478bd9Sstevel@tonic-gate 	}
39977c478bd9Sstevel@tonic-gate 
39987c478bd9Sstevel@tonic-gate 	ep->ts = gethrtime();
39997c478bd9Sstevel@tonic-gate 	ep->evnt = evnt;
40007c478bd9Sstevel@tonic-gate 	ep->data = data;
40017c478bd9Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
40027c478bd9Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
40037c478bd9Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
40047c478bd9Sstevel@tonic-gate }
40057c478bd9Sstevel@tonic-gate 
40067c478bd9Sstevel@tonic-gate /*
40077c478bd9Sstevel@tonic-gate  * Free flow-trace data.
40087c478bd9Sstevel@tonic-gate  */
40097c478bd9Sstevel@tonic-gate void
40107c478bd9Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
40117c478bd9Sstevel@tonic-gate {
40127c478bd9Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
40137c478bd9Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
40147c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
40157c478bd9Sstevel@tonic-gate 
40167c478bd9Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
40177c478bd9Sstevel@tonic-gate 		/*
40187c478bd9Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
40197c478bd9Sstevel@tonic-gate 		 * any continuation blocks.
40207c478bd9Sstevel@tonic-gate 		 */
40217c478bd9Sstevel@tonic-gate 		bp = hp->first.nxt;
40227c478bd9Sstevel@tonic-gate 		hp->tail = &hp->first;
40237c478bd9Sstevel@tonic-gate 		hp->hash = 0;
40247c478bd9Sstevel@tonic-gate 		hp->first.nxt = NULL;
40257c478bd9Sstevel@tonic-gate 		hp->first.ix = 0;
40267c478bd9Sstevel@tonic-gate 		while (bp != NULL) {
40277c478bd9Sstevel@tonic-gate 			nbp = bp->nxt;
40287c478bd9Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
40297c478bd9Sstevel@tonic-gate 			bp = nbp;
40307c478bd9Sstevel@tonic-gate 		}
40317c478bd9Sstevel@tonic-gate 	}
40327c478bd9Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
40337c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
40347c478bd9Sstevel@tonic-gate }
4035