xref: /titanic_53/usr/src/uts/common/io/stream.c (revision c28749e97052f09388969427adf7df641cdcdc22)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
237c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
287c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/param.h>
357c478bd9Sstevel@tonic-gate #include <sys/thread.h>
367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
377c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
387c478bd9Sstevel@tonic-gate #include <sys/stream.h>
397c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
407c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
417c478bd9Sstevel@tonic-gate #include <sys/conf.h>
427c478bd9Sstevel@tonic-gate #include <sys/debug.h>
437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
447c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
457c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
467c478bd9Sstevel@tonic-gate #include <sys/errno.h>
477c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
487c478bd9Sstevel@tonic-gate #include <sys/ftrace.h>
497c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
507c478bd9Sstevel@tonic-gate #include <sys/multidata.h>
517c478bd9Sstevel@tonic-gate #include <sys/multidata_impl.h>
527c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate #ifdef DEBUG
557c478bd9Sstevel@tonic-gate #include <sys/kmem_impl.h>
567c478bd9Sstevel@tonic-gate #endif
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate /*
597c478bd9Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
607c478bd9Sstevel@tonic-gate  * be used by modules and drivers.
617c478bd9Sstevel@tonic-gate  */
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate /*
647c478bd9Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
677c478bd9Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
687c478bd9Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
697c478bd9Sstevel@tonic-gate  * of how the allocator works.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
727c478bd9Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
737c478bd9Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
747c478bd9Sstevel@tonic-gate  * message is allocated:
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
777c478bd9Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
787c478bd9Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
797c478bd9Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
807c478bd9Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
817c478bd9Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
827c478bd9Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
837c478bd9Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
847c478bd9Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
877c478bd9Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
887c478bd9Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
917c478bd9Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
927c478bd9Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
937c478bd9Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
947c478bd9Sstevel@tonic-gate  *
957c478bd9Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
967c478bd9Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
977c478bd9Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
987c478bd9Sstevel@tonic-gate  *
997c478bd9Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
1007c478bd9Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
1017c478bd9Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
1027c478bd9Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
1037c478bd9Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
1047c478bd9Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
1057c478bd9Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1067c478bd9Sstevel@tonic-gate  *
1077c478bd9Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
1087c478bd9Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
1097c478bd9Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
1107c478bd9Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
1117c478bd9Sstevel@tonic-gate  *
1127c478bd9Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
1137c478bd9Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
1147c478bd9Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
1157c478bd9Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
1167c478bd9Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
1177c478bd9Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
1187c478bd9Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
1197c478bd9Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
1207c478bd9Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
1217c478bd9Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
1227c478bd9Sstevel@tonic-gate  *
1237c478bd9Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
1247c478bd9Sstevel@tonic-gate  * They represent a good trade-off between internal and external
1257c478bd9Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
1267c478bd9Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
1277c478bd9Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
1287c478bd9Sstevel@tonic-gate  * straddle cache lines unnecessarily.
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
1317c478bd9Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
1327c478bd9Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
1337c478bd9Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1367c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1377c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1387c478bd9Sstevel@tonic-gate #else
1397c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1407c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1417c478bd9Sstevel@tonic-gate #endif
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
1447c478bd9Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1457c478bd9Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
1467c478bd9Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1477c478bd9Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1487c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1497c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
1507c478bd9Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate static size_t dblk_sizes[] = {
1537c478bd9Sstevel@tonic-gate #ifdef _LP64
1547c478bd9Sstevel@tonic-gate 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
1557c478bd9Sstevel@tonic-gate 	8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
1567c478bd9Sstevel@tonic-gate 	40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
1577c478bd9Sstevel@tonic-gate #else
1587c478bd9Sstevel@tonic-gate 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
1597c478bd9Sstevel@tonic-gate 	8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
1607c478bd9Sstevel@tonic-gate 	40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504,
1617c478bd9Sstevel@tonic-gate #endif
1627c478bd9Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
1637c478bd9Sstevel@tonic-gate };
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1667c478bd9Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1677c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1687c478bd9Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1697c478bd9Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1727c478bd9Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1737c478bd9Sstevel@tonic-gate static int allocb_tryhard_fails;
1747c478bd9Sstevel@tonic-gate static void frnop_func(void *arg);
1757c478bd9Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1767c478bd9Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1797c478bd9Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate /*
1827c478bd9Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
1837c478bd9Sstevel@tonic-gate  */
1847c478bd9Sstevel@tonic-gate int dblk_kmem_flags = 0;
1857c478bd9Sstevel@tonic-gate int mblk_kmem_flags = 0;
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate static int
1897c478bd9Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1907c478bd9Sstevel@tonic-gate {
1917c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
1927c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
1937c478bd9Sstevel@tonic-gate 	size_t index;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1987c478bd9Sstevel@tonic-gate 
199e4506d67Smeem 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2027c478bd9Sstevel@tonic-gate 		return (-1);
2037c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2047c478bd9Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
2057c478bd9Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
2067c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
2077c478bd9Sstevel@tonic-gate 			return (-1);
2087c478bd9Sstevel@tonic-gate 		}
2097c478bd9Sstevel@tonic-gate 	} else {
2107c478bd9Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
2117c478bd9Sstevel@tonic-gate 	}
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2147c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
2157c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
2167c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2177c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2187c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2197c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2207c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2217c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2227c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2237c478bd9Sstevel@tonic-gate 	return (0);
2247c478bd9Sstevel@tonic-gate }
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2277c478bd9Sstevel@tonic-gate static int
2287c478bd9Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2297c478bd9Sstevel@tonic-gate {
2307c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2337c478bd9Sstevel@tonic-gate 		return (-1);
2347c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2357c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
2367c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2377c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2387c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2397c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2407c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2417c478bd9Sstevel@tonic-gate 	return (0);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate static int
2457c478bd9Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2467c478bd9Sstevel@tonic-gate {
2477c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2487c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2517c478bd9Sstevel@tonic-gate 		return (-1);
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
2547c478bd9Sstevel@tonic-gate 	    kmflags)) == NULL) {
2557c478bd9Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
2567c478bd9Sstevel@tonic-gate 		return (-1);
2577c478bd9Sstevel@tonic-gate 	}
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2607c478bd9Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
2617c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
2627c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2637c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2647c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2657c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2667c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2677c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2687c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2697c478bd9Sstevel@tonic-gate 	return (0);
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2737c478bd9Sstevel@tonic-gate static void
2747c478bd9Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2757c478bd9Sstevel@tonic-gate {
2767c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2777c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2847c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2877c478bd9Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
2887c478bd9Sstevel@tonic-gate 	}
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
2917c478bd9Sstevel@tonic-gate }
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate static void
2947c478bd9Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2957c478bd9Sstevel@tonic-gate {
2967c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2977c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
3047c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate void
3107c478bd9Sstevel@tonic-gate streams_msg_init(void)
3117c478bd9Sstevel@tonic-gate {
3127c478bd9Sstevel@tonic-gate 	char name[40];
3137c478bd9Sstevel@tonic-gate 	size_t size;
3147c478bd9Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
3157c478bd9Sstevel@tonic-gate 	size_t *sizep;
3167c478bd9Sstevel@tonic-gate 	struct kmem_cache *cp;
3177c478bd9Sstevel@tonic-gate 	size_t tot_size;
3187c478bd9Sstevel@tonic-gate 	int offset;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
3217c478bd9Sstevel@tonic-gate 		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
3227c478bd9Sstevel@tonic-gate 		mblk_kmem_flags);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
3277c478bd9Sstevel@tonic-gate 			/*
3287c478bd9Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
3297c478bd9Sstevel@tonic-gate 			 * be allocated on the same page
3307c478bd9Sstevel@tonic-gate 			 */
3317c478bd9Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
3327c478bd9Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3337c478bd9Sstevel@tonic-gate 								< PAGESIZE);
3347c478bd9Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 		} else {
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 			/*
3397c478bd9Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
3407c478bd9Sstevel@tonic-gate 			 * buffer are allocated separately.
3417c478bd9Sstevel@tonic-gate 			 */
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3447c478bd9Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
3457c478bd9Sstevel@tonic-gate 		}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
3487c478bd9Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
3497c478bd9Sstevel@tonic-gate 			DBLK_CACHE_ALIGN, dblk_constructor,
3507c478bd9Sstevel@tonic-gate 			dblk_destructor, NULL,
3517c478bd9Sstevel@tonic-gate 			(void *)(size), NULL, dblk_kmem_flags);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 		while (lastsize <= size) {
3547c478bd9Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3557c478bd9Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
3567c478bd9Sstevel@tonic-gate 		}
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
3607c478bd9Sstevel@tonic-gate 			sizeof (dblk_t), DBLK_CACHE_ALIGN,
3617c478bd9Sstevel@tonic-gate 			dblk_esb_constructor, dblk_destructor, NULL,
3627c478bd9Sstevel@tonic-gate 			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
3637c478bd9Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
3647c478bd9Sstevel@tonic-gate 		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3657c478bd9Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
3667c478bd9Sstevel@tonic-gate 		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	/* Initialize Multidata caches */
3697c478bd9Sstevel@tonic-gate 	mmd_init();
3707c478bd9Sstevel@tonic-gate }
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3737c478bd9Sstevel@tonic-gate mblk_t *
3747c478bd9Sstevel@tonic-gate allocb(size_t size, uint_t pri)
3757c478bd9Sstevel@tonic-gate {
3767c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
3777c478bd9Sstevel@tonic-gate 	mblk_t *mp;
3787c478bd9Sstevel@tonic-gate 	size_t index;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
3837c478bd9Sstevel@tonic-gate 		if (size != 0) {
3847c478bd9Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
3857c478bd9Sstevel@tonic-gate 			goto out;
3867c478bd9Sstevel@tonic-gate 		}
3877c478bd9Sstevel@tonic-gate 		index = 0;
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
3917c478bd9Sstevel@tonic-gate 		mp = NULL;
3927c478bd9Sstevel@tonic-gate 		goto out;
3937c478bd9Sstevel@tonic-gate 	}
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
3967c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
3977c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
3987c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
3997c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
4007c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
4017c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4027c478bd9Sstevel@tonic-gate out:
4037c478bd9Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 	return (mp);
4067c478bd9Sstevel@tonic-gate }
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate mblk_t *
4097c478bd9Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4107c478bd9Sstevel@tonic-gate {
4117c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
4147c478bd9Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
4157c478bd9Sstevel@tonic-gate 		if (cr != NULL)
4167c478bd9Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
4177c478bd9Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
4187c478bd9Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
4197c478bd9Sstevel@tonic-gate 	}
4207c478bd9Sstevel@tonic-gate 	return (mp);
4217c478bd9Sstevel@tonic-gate }
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate mblk_t *
4247c478bd9Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
4257c478bd9Sstevel@tonic-gate {
4267c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4297c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	return (mp);
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate mblk_t *
4357c478bd9Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4407c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	return (mp);
4437c478bd9Sstevel@tonic-gate }
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate void
4467c478bd9Sstevel@tonic-gate freeb(mblk_t *mp)
4477c478bd9Sstevel@tonic-gate {
4487c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
4517c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4527c478bd9Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
4577c478bd9Sstevel@tonic-gate }
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate void
4607c478bd9Sstevel@tonic-gate freemsg(mblk_t *mp)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
4637c478bd9Sstevel@tonic-gate 	while (mp) {
4647c478bd9Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
4657c478bd9Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
4687c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
4737c478bd9Sstevel@tonic-gate 		mp = mp_cont;
4747c478bd9Sstevel@tonic-gate 	}
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate /*
4787c478bd9Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
4797c478bd9Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
4807c478bd9Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
4817c478bd9Sstevel@tonic-gate  *
4827c478bd9Sstevel@tonic-gate  * This routine is private and unstable.
4837c478bd9Sstevel@tonic-gate  */
4847c478bd9Sstevel@tonic-gate mblk_t	*
4857c478bd9Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
4867c478bd9Sstevel@tonic-gate {
4877c478bd9Sstevel@tonic-gate 	mblk_t		*mp1;
4887c478bd9Sstevel@tonic-gate 	unsigned char	*old_rptr;
4897c478bd9Sstevel@tonic-gate 	ptrdiff_t	cur_size;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (mp == NULL)
4927c478bd9Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
4957c478bd9Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
5007c478bd9Sstevel@tonic-gate 		/*
5017c478bd9Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
5027c478bd9Sstevel@tonic-gate 		 * work is required.
5037c478bd9Sstevel@tonic-gate 		 */
5047c478bd9Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
5057c478bd9Sstevel@tonic-gate 			return (mp);
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
5087c478bd9Sstevel@tonic-gate 		mp1 = mp;
5097c478bd9Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
5107c478bd9Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
5117c478bd9Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
5127c478bd9Sstevel@tonic-gate 	} else {
5137c478bd9Sstevel@tonic-gate 		return (NULL);
5147c478bd9Sstevel@tonic-gate 	}
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate 	if (copy) {
5177c478bd9Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
5187c478bd9Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
5197c478bd9Sstevel@tonic-gate 	}
5207c478bd9Sstevel@tonic-gate 
5217c478bd9Sstevel@tonic-gate 	if (mp != mp1)
5227c478bd9Sstevel@tonic-gate 		freeb(mp);
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	return (mp1);
5257c478bd9Sstevel@tonic-gate }
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate static void
5287c478bd9Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
5297c478bd9Sstevel@tonic-gate {
5307c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
5317c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
5327c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
5357c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
5367c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
5377c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
5387c478bd9Sstevel@tonic-gate 	}
5397c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
5427c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
5437c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
5447c478bd9Sstevel@tonic-gate 
545*c28749e9Skais 	/* and the COOKED flag */
546*c28749e9Skais 	dbp->db_flags &= ~DBLK_COOKED;
547*c28749e9Skais 
5487c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
5497c478bd9Sstevel@tonic-gate }
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate static void
5527c478bd9Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
5537c478bd9Sstevel@tonic-gate {
5547c478bd9Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
5557c478bd9Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
5567c478bd9Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
5577c478bd9Sstevel@tonic-gate 		/*
5587c478bd9Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
5597c478bd9Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
5607c478bd9Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
5617c478bd9Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
5627c478bd9Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
5637c478bd9Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
5647c478bd9Sstevel@tonic-gate 		 */
5657c478bd9Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
5667c478bd9Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
5677c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
5687c478bd9Sstevel@tonic-gate 			return;
5697c478bd9Sstevel@tonic-gate 		}
5707c478bd9Sstevel@tonic-gate 	}
5717c478bd9Sstevel@tonic-gate 	dbp->db_mblk = mp;
5727c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
5737c478bd9Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
5747c478bd9Sstevel@tonic-gate }
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate mblk_t *
5777c478bd9Sstevel@tonic-gate dupb(mblk_t *mp)
5787c478bd9Sstevel@tonic-gate {
5797c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
5807c478bd9Sstevel@tonic-gate 	mblk_t *new_mp;
5817c478bd9Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
5847c478bd9Sstevel@tonic-gate 		goto out;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
5877c478bd9Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
5887c478bd9Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
5897c478bd9Sstevel@tonic-gate 	new_mp->b_datap = dbp;
5907c478bd9Sstevel@tonic-gate 	new_mp->b_queue = NULL;
5917c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate 	/*
5967c478bd9Sstevel@tonic-gate 	 * First-dup optimization.  The enabling assumption is that there
5977c478bd9Sstevel@tonic-gate 	 * can can never be a race (in correct code) to dup the first copy
5987c478bd9Sstevel@tonic-gate 	 * of a message.  Therefore we don't need to do it atomically.
5997c478bd9Sstevel@tonic-gate 	 */
6007c478bd9Sstevel@tonic-gate 	if (dbp->db_free != dblk_decref) {
6017c478bd9Sstevel@tonic-gate 		dbp->db_free = dblk_decref;
6027c478bd9Sstevel@tonic-gate 		dbp->db_ref++;
6037c478bd9Sstevel@tonic-gate 		goto out;
6047c478bd9Sstevel@tonic-gate 	}
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 	do {
6077c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
6087c478bd9Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
6097c478bd9Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
6107c478bd9Sstevel@tonic-gate 		/*
6117c478bd9Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
6127c478bd9Sstevel@tonic-gate 		 */
6137c478bd9Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
6147c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
6157c478bd9Sstevel@tonic-gate 			new_mp = NULL;
6167c478bd9Sstevel@tonic-gate 			goto out;
6177c478bd9Sstevel@tonic-gate 		}
6187c478bd9Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate out:
6217c478bd9Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
6227c478bd9Sstevel@tonic-gate 	return (new_mp);
6237c478bd9Sstevel@tonic-gate }
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate static void
6267c478bd9Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
6277c478bd9Sstevel@tonic-gate {
6287c478bd9Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
6317c478bd9Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
6327c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
6337c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
6367c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
6377c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
6387c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
6397c478bd9Sstevel@tonic-gate 	}
6407c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
6417c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
6427c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
6457c478bd9Sstevel@tonic-gate }
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6487c478bd9Sstevel@tonic-gate static void
6497c478bd9Sstevel@tonic-gate frnop_func(void *arg)
6507c478bd9Sstevel@tonic-gate {
6517c478bd9Sstevel@tonic-gate }
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate /*
6547c478bd9Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
6557c478bd9Sstevel@tonic-gate  */
6567c478bd9Sstevel@tonic-gate static mblk_t *
6577c478bd9Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
6587c478bd9Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
6597c478bd9Sstevel@tonic-gate {
6607c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
6617c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
6647c478bd9Sstevel@tonic-gate 
6657c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
6667c478bd9Sstevel@tonic-gate 		mp = NULL;
6677c478bd9Sstevel@tonic-gate 		goto out;
6687c478bd9Sstevel@tonic-gate 	}
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
6717c478bd9Sstevel@tonic-gate 	dbp->db_base = base;
6727c478bd9Sstevel@tonic-gate 	dbp->db_lim = base + size;
6737c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
6747c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = frp;
6757c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
6767c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
6777c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
6787c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
6797c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate out:
6827c478bd9Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
6837c478bd9Sstevel@tonic-gate 	return (mp);
6847c478bd9Sstevel@tonic-gate }
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6877c478bd9Sstevel@tonic-gate mblk_t *
6887c478bd9Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
6897c478bd9Sstevel@tonic-gate {
6907c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	/*
6937c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
6947c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
6957c478bd9Sstevel@tonic-gate 	 * call optimization.
6967c478bd9Sstevel@tonic-gate 	 */
6977c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
6987c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
6997c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7027c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7037c478bd9Sstevel@tonic-gate 		return (mp);
7047c478bd9Sstevel@tonic-gate 	}
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7077c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7087c478bd9Sstevel@tonic-gate }
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate /*
7117c478bd9Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
7127c478bd9Sstevel@tonic-gate  */
7137c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7147c478bd9Sstevel@tonic-gate mblk_t *
7157c478bd9Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7167c478bd9Sstevel@tonic-gate {
7177c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate 	/*
7207c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7217c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7227c478bd9Sstevel@tonic-gate 	 * call optimization.
7237c478bd9Sstevel@tonic-gate 	 */
7247c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7257c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7267c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
7277c478bd9Sstevel@tonic-gate 
7287c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7297c478bd9Sstevel@tonic-gate 		return (mp);
7307c478bd9Sstevel@tonic-gate 	}
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7337c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
7347c478bd9Sstevel@tonic-gate }
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7377c478bd9Sstevel@tonic-gate mblk_t *
7387c478bd9Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7397c478bd9Sstevel@tonic-gate {
7407c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 	/*
7437c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7447c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7457c478bd9Sstevel@tonic-gate 	 * call optimization.
7467c478bd9Sstevel@tonic-gate 	 */
7477c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7487c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7497c478bd9Sstevel@tonic-gate 			frp, dblk_lastfree_desb, KM_NOSLEEP);
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7527c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
7537c478bd9Sstevel@tonic-gate 		return (mp);
7547c478bd9Sstevel@tonic-gate 	}
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7577c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
7587c478bd9Sstevel@tonic-gate }
7597c478bd9Sstevel@tonic-gate 
7607c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7617c478bd9Sstevel@tonic-gate mblk_t *
7627c478bd9Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7637c478bd9Sstevel@tonic-gate {
7647c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate 	/*
7677c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7687c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7697c478bd9Sstevel@tonic-gate 	 * call optimization.
7707c478bd9Sstevel@tonic-gate 	 */
7717c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7727c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7737c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7767c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
7777c478bd9Sstevel@tonic-gate 		return (mp);
7787c478bd9Sstevel@tonic-gate 	}
7797c478bd9Sstevel@tonic-gate 
7807c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7817c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7827c478bd9Sstevel@tonic-gate }
7837c478bd9Sstevel@tonic-gate 
7847c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7857c478bd9Sstevel@tonic-gate mblk_t *
7867c478bd9Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7877c478bd9Sstevel@tonic-gate {
7887c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	/*
7917c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7927c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7937c478bd9Sstevel@tonic-gate 	 * call optimization.
7947c478bd9Sstevel@tonic-gate 	 */
7957c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7967c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7977c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 		if (mp != NULL)
8007c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
8017c478bd9Sstevel@tonic-gate 		return (mp);
8027c478bd9Sstevel@tonic-gate 	}
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
8057c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
8067c478bd9Sstevel@tonic-gate }
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate static void
8097c478bd9Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
8107c478bd9Sstevel@tonic-gate {
8117c478bd9Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
8147c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
8157c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
8167c478bd9Sstevel@tonic-gate 
8177c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
8187c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
8197c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
8207c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
8217c478bd9Sstevel@tonic-gate 	}
8227c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
8237c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
8247c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
8257c478bd9Sstevel@tonic-gate 
8267c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8277c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
8287c478bd9Sstevel@tonic-gate 	bcp->alloc--;
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
8317c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8327c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8337c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8347c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8357c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8367c478bd9Sstevel@tonic-gate 	} else {
8377c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate }
8407c478bd9Sstevel@tonic-gate 
8417c478bd9Sstevel@tonic-gate bcache_t *
8427c478bd9Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
8437c478bd9Sstevel@tonic-gate {
8447c478bd9Sstevel@tonic-gate 	bcache_t *bcp;
8457c478bd9Sstevel@tonic-gate 	char buffer[255];
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
8507c478bd9Sstevel@tonic-gate 	    NULL) {
8517c478bd9Sstevel@tonic-gate 		return (NULL);
8527c478bd9Sstevel@tonic-gate 	}
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 	bcp->size = size;
8557c478bd9Sstevel@tonic-gate 	bcp->align = align;
8567c478bd9Sstevel@tonic-gate 	bcp->alloc = 0;
8577c478bd9Sstevel@tonic-gate 	bcp->destroy = 0;
8587c478bd9Sstevel@tonic-gate 
8597c478bd9Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
8607c478bd9Sstevel@tonic-gate 
8617c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
8627c478bd9Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
8637c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
8647c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
8657c478bd9Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
8667c478bd9Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
8677c478bd9Sstevel@tonic-gate 						NULL, (void *)bcp, NULL, 0);
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate 	return (bcp);
8707c478bd9Sstevel@tonic-gate }
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate void
8737c478bd9Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
8747c478bd9Sstevel@tonic-gate {
8757c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8787c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0) {
8797c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8807c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8817c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8827c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8837c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8847c478bd9Sstevel@tonic-gate 	} else {
8857c478bd9Sstevel@tonic-gate 		bcp->destroy++;
8867c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8877c478bd9Sstevel@tonic-gate 	}
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8917c478bd9Sstevel@tonic-gate mblk_t *
8927c478bd9Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
8937c478bd9Sstevel@tonic-gate {
8947c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
8957c478bd9Sstevel@tonic-gate 	mblk_t *mp = NULL;
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
9007c478bd9Sstevel@tonic-gate 	if (bcp->destroy != 0) {
9017c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9027c478bd9Sstevel@tonic-gate 		goto out;
9037c478bd9Sstevel@tonic-gate 	}
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
9067c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9077c478bd9Sstevel@tonic-gate 		goto out;
9087c478bd9Sstevel@tonic-gate 	}
9097c478bd9Sstevel@tonic-gate 	bcp->alloc++;
9107c478bd9Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
9157c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
9167c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
9177c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
9187c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
9197c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
9207c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
9217c478bd9Sstevel@tonic-gate out:
9227c478bd9Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 	return (mp);
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate static void
9287c478bd9Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
9297c478bd9Sstevel@tonic-gate {
9307c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
9317c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
9327c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
9357c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
9367c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
9377c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
9387c478bd9Sstevel@tonic-gate 	}
9397c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
9407c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
9417c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
9447c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
9457c478bd9Sstevel@tonic-gate }
9467c478bd9Sstevel@tonic-gate 
9477c478bd9Sstevel@tonic-gate static mblk_t *
9487c478bd9Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
9497c478bd9Sstevel@tonic-gate {
9507c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9517c478bd9Sstevel@tonic-gate 	void *buf;
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
9547c478bd9Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
9557c478bd9Sstevel@tonic-gate 		return (NULL);
9567c478bd9Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
9577c478bd9Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
9587c478bd9Sstevel@tonic-gate 		kmem_free(buf, size);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	if (mp != NULL)
9617c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate 	return (mp);
9647c478bd9Sstevel@tonic-gate }
9657c478bd9Sstevel@tonic-gate 
9667c478bd9Sstevel@tonic-gate mblk_t *
9677c478bd9Sstevel@tonic-gate allocb_tryhard(size_t target_size)
9687c478bd9Sstevel@tonic-gate {
9697c478bd9Sstevel@tonic-gate 	size_t size;
9707c478bd9Sstevel@tonic-gate 	mblk_t *bp;
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
9737c478bd9Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
9747c478bd9Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
9757c478bd9Sstevel@tonic-gate 			return (bp);
9767c478bd9Sstevel@tonic-gate 	allocb_tryhard_fails++;
9777c478bd9Sstevel@tonic-gate 	return (NULL);
9787c478bd9Sstevel@tonic-gate }
9797c478bd9Sstevel@tonic-gate 
9807c478bd9Sstevel@tonic-gate /*
9817c478bd9Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
9827c478bd9Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
9837c478bd9Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
9847c478bd9Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
9857c478bd9Sstevel@tonic-gate  * allocb() implementation details in header files.
9867c478bd9Sstevel@tonic-gate  */
9877c478bd9Sstevel@tonic-gate mblk_t *
9887c478bd9Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
9897c478bd9Sstevel@tonic-gate {
9907c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
9917c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9927c478bd9Sstevel@tonic-gate 	size_t index;
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
9957c478bd9Sstevel@tonic-gate 
9967c478bd9Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
9977c478bd9Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
9987c478bd9Sstevel@tonic-gate 			if (size != 0) {
9997c478bd9Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
10007c478bd9Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
10017c478bd9Sstevel@tonic-gate 				    (uintptr_t)mp);
10027c478bd9Sstevel@tonic-gate 				return (mp);
10037c478bd9Sstevel@tonic-gate 			}
10047c478bd9Sstevel@tonic-gate 			index = 0;
10057c478bd9Sstevel@tonic-gate 		}
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
10087c478bd9Sstevel@tonic-gate 		mp = dbp->db_mblk;
10097c478bd9Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
10107c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
10117c478bd9Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
10127c478bd9Sstevel@tonic-gate 		mp->b_queue = NULL;
10137c478bd9Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
10147c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	} else {
10197c478bd9Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
10207c478bd9Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
10217c478bd9Sstevel@tonic-gate 				return (NULL);
10227c478bd9Sstevel@tonic-gate 		}
10237c478bd9Sstevel@tonic-gate 	}
10247c478bd9Sstevel@tonic-gate 
10257c478bd9Sstevel@tonic-gate 	return (mp);
10267c478bd9Sstevel@tonic-gate }
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate /*
10297c478bd9Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
10307c478bd9Sstevel@tonic-gate  * be allocated with priority 'pri'.
10317c478bd9Sstevel@tonic-gate  */
10327c478bd9Sstevel@tonic-gate bufcall_id_t
10337c478bd9Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
10347c478bd9Sstevel@tonic-gate {
10357c478bd9Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
10367c478bd9Sstevel@tonic-gate }
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate /*
10397c478bd9Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
10407c478bd9Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
10417c478bd9Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
10427c478bd9Sstevel@tonic-gate  */
10437c478bd9Sstevel@tonic-gate mblk_t *
10447c478bd9Sstevel@tonic-gate mkiocb(uint_t cmd)
10457c478bd9Sstevel@tonic-gate {
10467c478bd9Sstevel@tonic-gate 	struct iocblk	*ioc;
10477c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	/*
10507c478bd9Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
10517c478bd9Sstevel@tonic-gate 	 */
10527c478bd9Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
10537c478bd9Sstevel@tonic-gate 		return (NULL);
10547c478bd9Sstevel@tonic-gate 
10557c478bd9Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
10567c478bd9Sstevel@tonic-gate 
10577c478bd9Sstevel@tonic-gate 	/*
10587c478bd9Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
10597c478bd9Sstevel@tonic-gate 	 */
10607c478bd9Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
10617c478bd9Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
10627c478bd9Sstevel@tonic-gate 
10637c478bd9Sstevel@tonic-gate 	/*
10647c478bd9Sstevel@tonic-gate 	 * Fill in the fields.
10657c478bd9Sstevel@tonic-gate 	 */
10667c478bd9Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
10677c478bd9Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
10687c478bd9Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
10697c478bd9Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
10707c478bd9Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
10717c478bd9Sstevel@tonic-gate 	return (mp);
10727c478bd9Sstevel@tonic-gate }
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate /*
10757c478bd9Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
10767c478bd9Sstevel@tonic-gate  * the given priority.
10777c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10787c478bd9Sstevel@tonic-gate  */
10797c478bd9Sstevel@tonic-gate /* ARGSUSED */
10807c478bd9Sstevel@tonic-gate int
10817c478bd9Sstevel@tonic-gate testb(size_t size, uint_t pri)
10827c478bd9Sstevel@tonic-gate {
10837c478bd9Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
10847c478bd9Sstevel@tonic-gate }
10857c478bd9Sstevel@tonic-gate 
10867c478bd9Sstevel@tonic-gate /*
10877c478bd9Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
10887c478bd9Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
10897c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10907c478bd9Sstevel@tonic-gate  */
10917c478bd9Sstevel@tonic-gate /* ARGSUSED */
10927c478bd9Sstevel@tonic-gate bufcall_id_t
10937c478bd9Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
10947c478bd9Sstevel@tonic-gate {
10957c478bd9Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
10967c478bd9Sstevel@tonic-gate 	bufcall_id_t bc_id;
10977c478bd9Sstevel@tonic-gate 	struct strbufcall *bcp;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
11007c478bd9Sstevel@tonic-gate 		return (0);
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 	bcp->bc_func = func;
11037c478bd9Sstevel@tonic-gate 	bcp->bc_arg = arg;
11047c478bd9Sstevel@tonic-gate 	bcp->bc_size = size;
11057c478bd9Sstevel@tonic-gate 	bcp->bc_next = NULL;
11067c478bd9Sstevel@tonic-gate 	bcp->bc_executor = NULL;
11077c478bd9Sstevel@tonic-gate 
11087c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11097c478bd9Sstevel@tonic-gate 	/*
11107c478bd9Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
11117c478bd9Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
11127c478bd9Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
11137c478bd9Sstevel@tonic-gate 	 * the local var.
11147c478bd9Sstevel@tonic-gate 	 */
11157c478bd9Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
11167c478bd9Sstevel@tonic-gate 
11177c478bd9Sstevel@tonic-gate 	/*
11187c478bd9Sstevel@tonic-gate 	 * add newly allocated stream event to existing
11197c478bd9Sstevel@tonic-gate 	 * linked list of events.
11207c478bd9Sstevel@tonic-gate 	 */
11217c478bd9Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
11227c478bd9Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
11237c478bd9Sstevel@tonic-gate 	} else {
11247c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
11257c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
11267c478bd9Sstevel@tonic-gate 	}
11277c478bd9Sstevel@tonic-gate 
11287c478bd9Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
11297c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11307c478bd9Sstevel@tonic-gate 	return (bc_id);
11317c478bd9Sstevel@tonic-gate }
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate /*
11347c478bd9Sstevel@tonic-gate  * Cancel a bufcall request.
11357c478bd9Sstevel@tonic-gate  */
11367c478bd9Sstevel@tonic-gate void
11377c478bd9Sstevel@tonic-gate unbufcall(bufcall_id_t id)
11387c478bd9Sstevel@tonic-gate {
11397c478bd9Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11427c478bd9Sstevel@tonic-gate again:
11437c478bd9Sstevel@tonic-gate 	pbcp = NULL;
11447c478bd9Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
11457c478bd9Sstevel@tonic-gate 		if (id == bcp->bc_id)
11467c478bd9Sstevel@tonic-gate 			break;
11477c478bd9Sstevel@tonic-gate 		pbcp = bcp;
11487c478bd9Sstevel@tonic-gate 	}
11497c478bd9Sstevel@tonic-gate 	if (bcp) {
11507c478bd9Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
11517c478bd9Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
11527c478bd9Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
11537c478bd9Sstevel@tonic-gate 				goto again;
11547c478bd9Sstevel@tonic-gate 			}
11557c478bd9Sstevel@tonic-gate 		} else {
11567c478bd9Sstevel@tonic-gate 			if (pbcp)
11577c478bd9Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
11587c478bd9Sstevel@tonic-gate 			else
11597c478bd9Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
11607c478bd9Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
11617c478bd9Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
11627c478bd9Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
11637c478bd9Sstevel@tonic-gate 		}
11647c478bd9Sstevel@tonic-gate 	}
11657c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11667c478bd9Sstevel@tonic-gate }
11677c478bd9Sstevel@tonic-gate 
11687c478bd9Sstevel@tonic-gate /*
11697c478bd9Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
11707c478bd9Sstevel@tonic-gate  * a pointer to the duplicate message.
11717c478bd9Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
11727c478bd9Sstevel@tonic-gate  * was dup'd.
11737c478bd9Sstevel@tonic-gate  */
11747c478bd9Sstevel@tonic-gate mblk_t *
11757c478bd9Sstevel@tonic-gate dupmsg(mblk_t *bp)
11767c478bd9Sstevel@tonic-gate {
11777c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
11807c478bd9Sstevel@tonic-gate 		return (NULL);
11817c478bd9Sstevel@tonic-gate 
11827c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
11837c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
11847c478bd9Sstevel@tonic-gate 			freemsg(head);
11857c478bd9Sstevel@tonic-gate 			return (NULL);
11867c478bd9Sstevel@tonic-gate 		}
11877c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
11887c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
11897c478bd9Sstevel@tonic-gate 	}
11907c478bd9Sstevel@tonic-gate 	return (head);
11917c478bd9Sstevel@tonic-gate }
11927c478bd9Sstevel@tonic-gate 
11937c478bd9Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
11947c478bd9Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
11957c478bd9Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate mblk_t *
11987c478bd9Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
11997c478bd9Sstevel@tonic-gate {
12007c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
12017c478bd9Sstevel@tonic-gate 
12027c478bd9Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
12037c478bd9Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
12047c478bd9Sstevel@tonic-gate 		return (NULL);
12057c478bd9Sstevel@tonic-gate 
12067c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
12077c478bd9Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
12087c478bd9Sstevel@tonic-gate 			freemsg(head);
12097c478bd9Sstevel@tonic-gate 			return (NULL);
12107c478bd9Sstevel@tonic-gate 		}
12117c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
12127c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
12137c478bd9Sstevel@tonic-gate 	}
12147c478bd9Sstevel@tonic-gate 	return (head);
12157c478bd9Sstevel@tonic-gate }
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate /*
12187c478bd9Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
12197c478bd9Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
12207c478bd9Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
12217c478bd9Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
12227c478bd9Sstevel@tonic-gate  */
12237c478bd9Sstevel@tonic-gate mblk_t *
12247c478bd9Sstevel@tonic-gate copyb(mblk_t *bp)
12257c478bd9Sstevel@tonic-gate {
12267c478bd9Sstevel@tonic-gate 	mblk_t	*nbp;
12277c478bd9Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
12287c478bd9Sstevel@tonic-gate 	uchar_t *base;
12297c478bd9Sstevel@tonic-gate 	size_t	size;
12307c478bd9Sstevel@tonic-gate 	size_t	unaligned;
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
12337c478bd9Sstevel@tonic-gate 
12347c478bd9Sstevel@tonic-gate 	dp = bp->b_datap;
12357c478bd9Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
12367c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
12377c478bd9Sstevel@tonic-gate 
12387c478bd9Sstevel@tonic-gate 	/*
12397c478bd9Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
12407c478bd9Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
12417c478bd9Sstevel@tonic-gate 	 */
12427c478bd9Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
12437c478bd9Sstevel@tonic-gate 		cred_t *cr;
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
12467c478bd9Sstevel@tonic-gate 			return (NULL);
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
12497c478bd9Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
12507c478bd9Sstevel@tonic-gate 		ndp = nbp->b_datap;
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 		/* See comments below on potential issues. */
12537c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
12567c478bd9Sstevel@tonic-gate 		cr = dp->db_credp;
12577c478bd9Sstevel@tonic-gate 		if (cr != NULL)
12587c478bd9Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
12597c478bd9Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
12607c478bd9Sstevel@tonic-gate 		return (nbp);
12617c478bd9Sstevel@tonic-gate 	}
12627c478bd9Sstevel@tonic-gate 
12637c478bd9Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
12647c478bd9Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
12657c478bd9Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
12667c478bd9Sstevel@tonic-gate 		return (NULL);
12677c478bd9Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
12687c478bd9Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
12697c478bd9Sstevel@tonic-gate 	ndp = nbp->b_datap;
12707c478bd9Sstevel@tonic-gate 
12717c478bd9Sstevel@tonic-gate 	/*
12727c478bd9Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
12737c478bd9Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
12747c478bd9Sstevel@tonic-gate 	 * information about where this message might have been.
12757c478bd9Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
12767c478bd9Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
12777c478bd9Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
12787c478bd9Sstevel@tonic-gate 	 */
12797c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12807c478bd9Sstevel@tonic-gate 
12817c478bd9Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
12827c478bd9Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
12857c478bd9Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
12867c478bd9Sstevel@tonic-gate 
12877c478bd9Sstevel@tonic-gate 	return (nbp);
12887c478bd9Sstevel@tonic-gate }
12897c478bd9Sstevel@tonic-gate 
12907c478bd9Sstevel@tonic-gate /*
12917c478bd9Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
12927c478bd9Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
12937c478bd9Sstevel@tonic-gate  */
12947c478bd9Sstevel@tonic-gate mblk_t *
12957c478bd9Sstevel@tonic-gate copymsg(mblk_t *bp)
12967c478bd9Sstevel@tonic-gate {
12977c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
12987c478bd9Sstevel@tonic-gate 
12997c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
13007c478bd9Sstevel@tonic-gate 		return (NULL);
13017c478bd9Sstevel@tonic-gate 
13027c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
13037c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
13047c478bd9Sstevel@tonic-gate 			freemsg(head);
13057c478bd9Sstevel@tonic-gate 			return (NULL);
13067c478bd9Sstevel@tonic-gate 		}
13077c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
13087c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
13097c478bd9Sstevel@tonic-gate 	}
13107c478bd9Sstevel@tonic-gate 	return (head);
13117c478bd9Sstevel@tonic-gate }
13127c478bd9Sstevel@tonic-gate 
13137c478bd9Sstevel@tonic-gate /*
13147c478bd9Sstevel@tonic-gate  * link a message block to tail of message
13157c478bd9Sstevel@tonic-gate  */
13167c478bd9Sstevel@tonic-gate void
13177c478bd9Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
13187c478bd9Sstevel@tonic-gate {
13197c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13207c478bd9Sstevel@tonic-gate 
13217c478bd9Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
13227c478bd9Sstevel@tonic-gate 		;
13237c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;
13247c478bd9Sstevel@tonic-gate }
13257c478bd9Sstevel@tonic-gate 
13267c478bd9Sstevel@tonic-gate /*
13277c478bd9Sstevel@tonic-gate  * unlink a message block from head of message
13287c478bd9Sstevel@tonic-gate  * return pointer to new message.
13297c478bd9Sstevel@tonic-gate  * NULL if message becomes empty.
13307c478bd9Sstevel@tonic-gate  */
13317c478bd9Sstevel@tonic-gate mblk_t *
13327c478bd9Sstevel@tonic-gate unlinkb(mblk_t *bp)
13337c478bd9Sstevel@tonic-gate {
13347c478bd9Sstevel@tonic-gate 	mblk_t *bp1;
13357c478bd9Sstevel@tonic-gate 
13367c478bd9Sstevel@tonic-gate 	bp1 = bp->b_cont;
13377c478bd9Sstevel@tonic-gate 	bp->b_cont = NULL;
13387c478bd9Sstevel@tonic-gate 	return (bp1);
13397c478bd9Sstevel@tonic-gate }
13407c478bd9Sstevel@tonic-gate 
13417c478bd9Sstevel@tonic-gate /*
13427c478bd9Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
13437c478bd9Sstevel@tonic-gate  *
13447c478bd9Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
13457c478bd9Sstevel@tonic-gate  * Return -1 if bp is not found in message.
13467c478bd9Sstevel@tonic-gate  */
13477c478bd9Sstevel@tonic-gate mblk_t *
13487c478bd9Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
13497c478bd9Sstevel@tonic-gate {
13507c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
13517c478bd9Sstevel@tonic-gate 	mblk_t *lastp = NULL;
13527c478bd9Sstevel@tonic-gate 
13537c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13547c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
13557c478bd9Sstevel@tonic-gate 		if (tmp == bp) {
13567c478bd9Sstevel@tonic-gate 			if (lastp)
13577c478bd9Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
13587c478bd9Sstevel@tonic-gate 			else
13597c478bd9Sstevel@tonic-gate 				mp = tmp->b_cont;
13607c478bd9Sstevel@tonic-gate 			tmp->b_cont = NULL;
13617c478bd9Sstevel@tonic-gate 			return (mp);
13627c478bd9Sstevel@tonic-gate 		}
13637c478bd9Sstevel@tonic-gate 		lastp = tmp;
13647c478bd9Sstevel@tonic-gate 	}
13657c478bd9Sstevel@tonic-gate 	return ((mblk_t *)-1);
13667c478bd9Sstevel@tonic-gate }
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate /*
13697c478bd9Sstevel@tonic-gate  * Concatenate and align first len bytes of common
13707c478bd9Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
13717c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
13727c478bd9Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
13737c478bd9Sstevel@tonic-gate  */
13747c478bd9Sstevel@tonic-gate int
13757c478bd9Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
13767c478bd9Sstevel@tonic-gate {
13777c478bd9Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
13787c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
13797c478bd9Sstevel@tonic-gate 	ssize_t n;
13807c478bd9Sstevel@tonic-gate 
13817c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
13827c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
13837c478bd9Sstevel@tonic-gate 
13847c478bd9Sstevel@tonic-gate 	/*
13857c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
13867c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
13877c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
13887c478bd9Sstevel@tonic-gate 	 */
13897c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
13907c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
13917c478bd9Sstevel@tonic-gate 		return (0);
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 	if (len == -1) {
13947c478bd9Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
13957c478bd9Sstevel@tonic-gate 			return (1);
13967c478bd9Sstevel@tonic-gate 		len = xmsgsize(mp);
13977c478bd9Sstevel@tonic-gate 	} else {
13987c478bd9Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
13997c478bd9Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
14007c478bd9Sstevel@tonic-gate 		/*
14017c478bd9Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
14027c478bd9Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
14037c478bd9Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
14047c478bd9Sstevel@tonic-gate 		 */
14057c478bd9Sstevel@tonic-gate 		if (len <= first_mblk_len) {
14067c478bd9Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
14077c478bd9Sstevel@tonic-gate 				return (1);
14087c478bd9Sstevel@tonic-gate 			len = first_mblk_len;
14097c478bd9Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
14107c478bd9Sstevel@tonic-gate 			return (0);
14117c478bd9Sstevel@tonic-gate 	}
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
14147c478bd9Sstevel@tonic-gate 		return (0);
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate 	dbp = bp->b_datap;
14177c478bd9Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
14187c478bd9Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
14197c478bd9Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
14207c478bd9Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
14217c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 	do {
14247c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
14257c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
14267c478bd9Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
14277c478bd9Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
14287c478bd9Sstevel@tonic-gate 		mp->b_wptr += n;
14297c478bd9Sstevel@tonic-gate 		bp->b_rptr += n;
14307c478bd9Sstevel@tonic-gate 		len -= n;
14317c478bd9Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
14327c478bd9Sstevel@tonic-gate 			break;
14337c478bd9Sstevel@tonic-gate 		b_cont = bp->b_cont;
14347c478bd9Sstevel@tonic-gate 		freeb(bp);
14357c478bd9Sstevel@tonic-gate 		bp = b_cont;
14367c478bd9Sstevel@tonic-gate 	} while (len && bp);
14377c478bd9Sstevel@tonic-gate 
14387c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate 	return (1);
14417c478bd9Sstevel@tonic-gate }
14427c478bd9Sstevel@tonic-gate 
14437c478bd9Sstevel@tonic-gate /*
14447c478bd9Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
14457c478bd9Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
14467c478bd9Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
14477c478bd9Sstevel@tonic-gate  * returns NULL.
14487c478bd9Sstevel@tonic-gate  */
14497c478bd9Sstevel@tonic-gate mblk_t *
14507c478bd9Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
14517c478bd9Sstevel@tonic-gate {
14527c478bd9Sstevel@tonic-gate 	mblk_t	*newmp;
14537c478bd9Sstevel@tonic-gate 	ssize_t	totlen;
14547c478bd9Sstevel@tonic-gate 	ssize_t	n;
14557c478bd9Sstevel@tonic-gate 
14567c478bd9Sstevel@tonic-gate 	/*
14577c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
14587c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
14597c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
14607c478bd9Sstevel@tonic-gate 	 */
14617c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
14627c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
14637c478bd9Sstevel@tonic-gate 		return (NULL);
14647c478bd9Sstevel@tonic-gate 
14657c478bd9Sstevel@tonic-gate 	totlen = xmsgsize(mp);
14667c478bd9Sstevel@tonic-gate 
14677c478bd9Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
14687c478bd9Sstevel@tonic-gate 		return (NULL);
14697c478bd9Sstevel@tonic-gate 
14707c478bd9Sstevel@tonic-gate 	/*
14717c478bd9Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
14727c478bd9Sstevel@tonic-gate 	 * and link the rest onto this.
14737c478bd9Sstevel@tonic-gate 	 */
14747c478bd9Sstevel@tonic-gate 
14757c478bd9Sstevel@tonic-gate 	len = totlen;
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
14787c478bd9Sstevel@tonic-gate 		return (NULL);
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
14817c478bd9Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
14827c478bd9Sstevel@tonic-gate 
14837c478bd9Sstevel@tonic-gate 	while (len > 0) {
14847c478bd9Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
14857c478bd9Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
14867c478bd9Sstevel@tonic-gate 		if (n > 0)
14877c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
14887c478bd9Sstevel@tonic-gate 		newmp->b_wptr += n;
14897c478bd9Sstevel@tonic-gate 		len -= n;
14907c478bd9Sstevel@tonic-gate 		mp = mp->b_cont;
14917c478bd9Sstevel@tonic-gate 	}
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
14947c478bd9Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
14957c478bd9Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
14967c478bd9Sstevel@tonic-gate 			freemsg(newmp);
14977c478bd9Sstevel@tonic-gate 			return (NULL);
14987c478bd9Sstevel@tonic-gate 		}
14997c478bd9Sstevel@tonic-gate 	}
15007c478bd9Sstevel@tonic-gate 
15017c478bd9Sstevel@tonic-gate 	return (newmp);
15027c478bd9Sstevel@tonic-gate }
15037c478bd9Sstevel@tonic-gate 
15047c478bd9Sstevel@tonic-gate /*
15057c478bd9Sstevel@tonic-gate  * Trim bytes from message
15067c478bd9Sstevel@tonic-gate  *  len > 0, trim from head
15077c478bd9Sstevel@tonic-gate  *  len < 0, trim from tail
15087c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
15097c478bd9Sstevel@tonic-gate  */
15107c478bd9Sstevel@tonic-gate int
15117c478bd9Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
15127c478bd9Sstevel@tonic-gate {
15137c478bd9Sstevel@tonic-gate 	mblk_t *bp;
15147c478bd9Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
15157c478bd9Sstevel@tonic-gate 	mblk_t *prev_bp;
15167c478bd9Sstevel@tonic-gate 	mblk_t *bcont;
15177c478bd9Sstevel@tonic-gate 	unsigned char type;
15187c478bd9Sstevel@tonic-gate 	ssize_t n;
15197c478bd9Sstevel@tonic-gate 	int fromhead;
15207c478bd9Sstevel@tonic-gate 	int first;
15217c478bd9Sstevel@tonic-gate 
15227c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
15237c478bd9Sstevel@tonic-gate 	/*
15247c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
15257c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
15267c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
15277c478bd9Sstevel@tonic-gate 	 */
15287c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
15297c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
15307c478bd9Sstevel@tonic-gate 		return (0);
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate 	if (len < 0) {
15337c478bd9Sstevel@tonic-gate 		fromhead = 0;
15347c478bd9Sstevel@tonic-gate 		len = -len;
15357c478bd9Sstevel@tonic-gate 	} else {
15367c478bd9Sstevel@tonic-gate 		fromhead = 1;
15377c478bd9Sstevel@tonic-gate 	}
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
15407c478bd9Sstevel@tonic-gate 		return (0);
15417c478bd9Sstevel@tonic-gate 
15427c478bd9Sstevel@tonic-gate 
15437c478bd9Sstevel@tonic-gate 	if (fromhead) {
15447c478bd9Sstevel@tonic-gate 		first = 1;
15457c478bd9Sstevel@tonic-gate 		while (len) {
15467c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
15477c478bd9Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
15487c478bd9Sstevel@tonic-gate 			mp->b_rptr += n;
15497c478bd9Sstevel@tonic-gate 			len -= n;
15507c478bd9Sstevel@tonic-gate 
15517c478bd9Sstevel@tonic-gate 			/*
15527c478bd9Sstevel@tonic-gate 			 * If this is not the first zero length
15537c478bd9Sstevel@tonic-gate 			 * message remove it
15547c478bd9Sstevel@tonic-gate 			 */
15557c478bd9Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
15567c478bd9Sstevel@tonic-gate 				bcont = mp->b_cont;
15577c478bd9Sstevel@tonic-gate 				freeb(mp);
15587c478bd9Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
15597c478bd9Sstevel@tonic-gate 			} else {
15607c478bd9Sstevel@tonic-gate 				save_bp = mp;
15617c478bd9Sstevel@tonic-gate 				mp = mp->b_cont;
15627c478bd9Sstevel@tonic-gate 			}
15637c478bd9Sstevel@tonic-gate 			first = 0;
15647c478bd9Sstevel@tonic-gate 		}
15657c478bd9Sstevel@tonic-gate 	} else {
15667c478bd9Sstevel@tonic-gate 		type = mp->b_datap->db_type;
15677c478bd9Sstevel@tonic-gate 		while (len) {
15687c478bd9Sstevel@tonic-gate 			bp = mp;
15697c478bd9Sstevel@tonic-gate 			save_bp = NULL;
15707c478bd9Sstevel@tonic-gate 
15717c478bd9Sstevel@tonic-gate 			/*
15727c478bd9Sstevel@tonic-gate 			 * Find the last message of same type
15737c478bd9Sstevel@tonic-gate 			 */
15747c478bd9Sstevel@tonic-gate 
15757c478bd9Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
15767c478bd9Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
15777c478bd9Sstevel@tonic-gate 				prev_bp = save_bp;
15787c478bd9Sstevel@tonic-gate 				save_bp = bp;
15797c478bd9Sstevel@tonic-gate 				bp = bp->b_cont;
15807c478bd9Sstevel@tonic-gate 			}
15817c478bd9Sstevel@tonic-gate 			if (save_bp == NULL)
15827c478bd9Sstevel@tonic-gate 				break;
15837c478bd9Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
15847c478bd9Sstevel@tonic-gate 			save_bp->b_wptr -= n;
15857c478bd9Sstevel@tonic-gate 			len -= n;
15867c478bd9Sstevel@tonic-gate 
15877c478bd9Sstevel@tonic-gate 			/*
15887c478bd9Sstevel@tonic-gate 			 * If this is not the first message
15897c478bd9Sstevel@tonic-gate 			 * and we have taken away everything
15907c478bd9Sstevel@tonic-gate 			 * from this message, remove it
15917c478bd9Sstevel@tonic-gate 			 */
15927c478bd9Sstevel@tonic-gate 
15937c478bd9Sstevel@tonic-gate 			if ((save_bp != mp) &&
15947c478bd9Sstevel@tonic-gate 				(save_bp->b_wptr == save_bp->b_rptr)) {
15957c478bd9Sstevel@tonic-gate 				bcont = save_bp->b_cont;
15967c478bd9Sstevel@tonic-gate 				freeb(save_bp);
15977c478bd9Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
15987c478bd9Sstevel@tonic-gate 			}
15997c478bd9Sstevel@tonic-gate 		}
16007c478bd9Sstevel@tonic-gate 	}
16017c478bd9Sstevel@tonic-gate 	return (1);
16027c478bd9Sstevel@tonic-gate }
16037c478bd9Sstevel@tonic-gate 
16047c478bd9Sstevel@tonic-gate /*
16057c478bd9Sstevel@tonic-gate  * get number of data bytes in message
16067c478bd9Sstevel@tonic-gate  */
16077c478bd9Sstevel@tonic-gate size_t
16087c478bd9Sstevel@tonic-gate msgdsize(mblk_t *bp)
16097c478bd9Sstevel@tonic-gate {
16107c478bd9Sstevel@tonic-gate 	size_t count = 0;
16117c478bd9Sstevel@tonic-gate 
16127c478bd9Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
16137c478bd9Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
16147c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
16157c478bd9Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
16167c478bd9Sstevel@tonic-gate 		}
16177c478bd9Sstevel@tonic-gate 	return (count);
16187c478bd9Sstevel@tonic-gate }
16197c478bd9Sstevel@tonic-gate 
16207c478bd9Sstevel@tonic-gate /*
16217c478bd9Sstevel@tonic-gate  * Get a message off head of queue
16227c478bd9Sstevel@tonic-gate  *
16237c478bd9Sstevel@tonic-gate  * If queue has no buffers then mark queue
16247c478bd9Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
16257c478bd9Sstevel@tonic-gate  * someone when data becomes available)
16267c478bd9Sstevel@tonic-gate  *
16277c478bd9Sstevel@tonic-gate  * If there is something to take off then do so.
16287c478bd9Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
16297c478bd9Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
16307c478bd9Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
16317c478bd9Sstevel@tonic-gate  *
16327c478bd9Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
16337c478bd9Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
16347c478bd9Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
16357c478bd9Sstevel@tonic-gate  * fields in their respective qband structures
16367c478bd9Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
16377c478bd9Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
16387c478bd9Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
16397c478bd9Sstevel@tonic-gate  * not reflect the size of all the messages on the
16407c478bd9Sstevel@tonic-gate  * queue.  It only reflects those messages in the
16417c478bd9Sstevel@tonic-gate  * normal band of flow.  The one exception to this
16427c478bd9Sstevel@tonic-gate  * deals with high priority messages.  They are in
16437c478bd9Sstevel@tonic-gate  * their own conceptual "band", but are accounted
16447c478bd9Sstevel@tonic-gate  * against q_count.
16457c478bd9Sstevel@tonic-gate  *
16467c478bd9Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
16477c478bd9Sstevel@tonic-gate  * is set, enable the closest backq which has a service
16487c478bd9Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
16497c478bd9Sstevel@tonic-gate  *
16507c478bd9Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
16517c478bd9Sstevel@tonic-gate  * of performance considerations.
16527c478bd9Sstevel@tonic-gate  *
16537c478bd9Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
16547c478bd9Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
16557c478bd9Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
16567c478bd9Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
16577c478bd9Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
16587c478bd9Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
16597c478bd9Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
16607c478bd9Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
16617c478bd9Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
16627c478bd9Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
16637c478bd9Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
16647c478bd9Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
16657c478bd9Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
16667c478bd9Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
16677c478bd9Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
16687c478bd9Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
16697c478bd9Sstevel@tonic-gate  *   mark.
16707c478bd9Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
16717c478bd9Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
16727c478bd9Sstevel@tonic-gate  *   framework can still try and limit resource usage.
16737c478bd9Sstevel@tonic-gate  */
16747c478bd9Sstevel@tonic-gate mblk_t *
16757c478bd9Sstevel@tonic-gate getq(queue_t *q)
16767c478bd9Sstevel@tonic-gate {
16777c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1678116094b2Smicheng 	uchar_t band = 0;
16797c478bd9Sstevel@tonic-gate 
16807c478bd9Sstevel@tonic-gate 	bp = getq_noenab(q);
16817c478bd9Sstevel@tonic-gate 	if (bp != NULL)
16827c478bd9Sstevel@tonic-gate 		band = bp->b_band;
16837c478bd9Sstevel@tonic-gate 
16847c478bd9Sstevel@tonic-gate 	/*
16857c478bd9Sstevel@tonic-gate 	 * Inlined from qbackenable().
16867c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
16877c478bd9Sstevel@tonic-gate 	 */
16887c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
16897c478bd9Sstevel@tonic-gate 		return (bp);
16907c478bd9Sstevel@tonic-gate 
16917c478bd9Sstevel@tonic-gate 	qbackenable(q, band);
16927c478bd9Sstevel@tonic-gate 	return (bp);
16937c478bd9Sstevel@tonic-gate }
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate /*
1696ff550d0eSmasputra  * Calculate number of data bytes in a single data message block taking
1697ff550d0eSmasputra  * multidata messages into account.
1698ff550d0eSmasputra  */
1699ff550d0eSmasputra 
1700ff550d0eSmasputra #define	ADD_MBLK_SIZE(mp, size) 					\
1701ff550d0eSmasputra 	if (DB_TYPE(mp) != M_MULTIDATA) {				\
1702ff550d0eSmasputra 		(size) += MBLKL(mp);					\
1703ff550d0eSmasputra 	} else {							\
1704ff550d0eSmasputra 		uint_t	pinuse;						\
1705ff550d0eSmasputra 									\
1706ff550d0eSmasputra 		mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse);	\
1707ff550d0eSmasputra 		(size) += pinuse;					\
1708ff550d0eSmasputra 	}
1709ff550d0eSmasputra 
1710ff550d0eSmasputra /*
17117c478bd9Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
17127c478bd9Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
17137c478bd9Sstevel@tonic-gate  * after it is done with accessing the queue.
17147c478bd9Sstevel@tonic-gate  */
17157c478bd9Sstevel@tonic-gate mblk_t *
17167c478bd9Sstevel@tonic-gate getq_noenab(queue_t *q)
17177c478bd9Sstevel@tonic-gate {
17187c478bd9Sstevel@tonic-gate 	mblk_t *bp;
17197c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
17207c478bd9Sstevel@tonic-gate 	qband_t *qbp;
17217c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
17227c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
17237c478bd9Sstevel@tonic-gate 
17247c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
17257c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
17267c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
17277c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
17287c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
17297c478bd9Sstevel@tonic-gate 	} else
17307c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
17317c478bd9Sstevel@tonic-gate 
17327c478bd9Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
17337c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTR;
17347c478bd9Sstevel@tonic-gate 	} else {
17357c478bd9Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
17367c478bd9Sstevel@tonic-gate 			q->q_last = NULL;
17377c478bd9Sstevel@tonic-gate 		else
17387c478bd9Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
17397c478bd9Sstevel@tonic-gate 
17407c478bd9Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
17417c478bd9Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
1742ff550d0eSmasputra 			ADD_MBLK_SIZE(tmp, bytecnt);
17437c478bd9Sstevel@tonic-gate 			mblkcnt++;
17447c478bd9Sstevel@tonic-gate 		}
17457c478bd9Sstevel@tonic-gate 
17467c478bd9Sstevel@tonic-gate 		if (bp->b_band == 0) {
17477c478bd9Sstevel@tonic-gate 			q->q_count -= bytecnt;
17487c478bd9Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
17497c478bd9Sstevel@tonic-gate 			if ((q->q_count < q->q_hiwat) &&
17507c478bd9Sstevel@tonic-gate 			    (q->q_mblkcnt < q->q_hiwat)) {
17517c478bd9Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
17527c478bd9Sstevel@tonic-gate 			}
17537c478bd9Sstevel@tonic-gate 		} else {
17547c478bd9Sstevel@tonic-gate 			int i;
17557c478bd9Sstevel@tonic-gate 
17567c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
17577c478bd9Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
17587c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
17597c478bd9Sstevel@tonic-gate 			qbp = q->q_bandp;
17607c478bd9Sstevel@tonic-gate 			i = bp->b_band;
17617c478bd9Sstevel@tonic-gate 			while (--i > 0)
17627c478bd9Sstevel@tonic-gate 				qbp = qbp->qb_next;
17637c478bd9Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
17647c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
17657c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
17667c478bd9Sstevel@tonic-gate 			} else {
17677c478bd9Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
17687c478bd9Sstevel@tonic-gate 			}
17697c478bd9Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
17707c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
17717c478bd9Sstevel@tonic-gate 			if ((qbp->qb_count < qbp->qb_hiwat) &&
17727c478bd9Sstevel@tonic-gate 			    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
17737c478bd9Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
17747c478bd9Sstevel@tonic-gate 			}
17757c478bd9Sstevel@tonic-gate 		}
17767c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
17777c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
17787c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
17797c478bd9Sstevel@tonic-gate 	}
17807c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
17817c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
17827c478bd9Sstevel@tonic-gate 
17837c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
17847c478bd9Sstevel@tonic-gate 
17857c478bd9Sstevel@tonic-gate 	return (bp);
17867c478bd9Sstevel@tonic-gate }
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate /*
17897c478bd9Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
17907c478bd9Sstevel@tonic-gate  * specified band.
17917c478bd9Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
17927c478bd9Sstevel@tonic-gate  * already called.
17937c478bd9Sstevel@tonic-gate  *
17947c478bd9Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
17957c478bd9Sstevel@tonic-gate  * stream head often does).
17967c478bd9Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
17977c478bd9Sstevel@tonic-gate  */
17987c478bd9Sstevel@tonic-gate void
1799116094b2Smicheng qbackenable(queue_t *q, uchar_t band)
18007c478bd9Sstevel@tonic-gate {
18017c478bd9Sstevel@tonic-gate 	int backenab = 0;
18027c478bd9Sstevel@tonic-gate 	qband_t *qbp;
18037c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
18047c478bd9Sstevel@tonic-gate 
18057c478bd9Sstevel@tonic-gate 	ASSERT(q);
18067c478bd9Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
18077c478bd9Sstevel@tonic-gate 
18087c478bd9Sstevel@tonic-gate 	/*
18097c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
18107c478bd9Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
18117c478bd9Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
18127c478bd9Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
18137c478bd9Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
18147c478bd9Sstevel@tonic-gate 	 * drops below q_lowat.
18157c478bd9Sstevel@tonic-gate 	 */
18167c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
18177c478bd9Sstevel@tonic-gate 		return;
18187c478bd9Sstevel@tonic-gate 
18197c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
18207c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
18217c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
18227c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
18237c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
18247c478bd9Sstevel@tonic-gate 	} else
18257c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18267c478bd9Sstevel@tonic-gate 
18277c478bd9Sstevel@tonic-gate 	if (band == 0) {
18287c478bd9Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
18297c478bd9Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
18307c478bd9Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
18317c478bd9Sstevel@tonic-gate 		}
18327c478bd9Sstevel@tonic-gate 	} else {
18337c478bd9Sstevel@tonic-gate 		int i;
18347c478bd9Sstevel@tonic-gate 
18357c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
18367c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
18377c478bd9Sstevel@tonic-gate 
18387c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
18397c478bd9Sstevel@tonic-gate 		i = band;
18407c478bd9Sstevel@tonic-gate 		while (--i > 0)
18417c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
18427c478bd9Sstevel@tonic-gate 
18437c478bd9Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
18447c478bd9Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
18457c478bd9Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
18467c478bd9Sstevel@tonic-gate 		}
18477c478bd9Sstevel@tonic-gate 	}
18487c478bd9Sstevel@tonic-gate 
18497c478bd9Sstevel@tonic-gate 	if (backenab == 0) {
18507c478bd9Sstevel@tonic-gate 		if (freezer != curthread)
18517c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
18527c478bd9Sstevel@tonic-gate 		return;
18537c478bd9Sstevel@tonic-gate 	}
18547c478bd9Sstevel@tonic-gate 
18557c478bd9Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
18567c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18577c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
18587c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
18597c478bd9Sstevel@tonic-gate 		if (band != 0)
18607c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
18617c478bd9Sstevel@tonic-gate 		else {
18627c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
18637c478bd9Sstevel@tonic-gate 		}
18647c478bd9Sstevel@tonic-gate 	}
18657c478bd9Sstevel@tonic-gate 
18667c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
18677c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18687c478bd9Sstevel@tonic-gate 
18697c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18707c478bd9Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
18717c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
18727c478bd9Sstevel@tonic-gate 		backenable(q, band);
18737c478bd9Sstevel@tonic-gate }
18747c478bd9Sstevel@tonic-gate 
18757c478bd9Sstevel@tonic-gate /*
18767c478bd9Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
18777c478bd9Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
18787c478bd9Sstevel@tonic-gate  * enabled if necessary.
18797c478bd9Sstevel@tonic-gate  *
18807c478bd9Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
18817c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
18827c478bd9Sstevel@tonic-gate  */
18837c478bd9Sstevel@tonic-gate void
18847c478bd9Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
18857c478bd9Sstevel@tonic-gate {
18867c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
18877c478bd9Sstevel@tonic-gate 
18887c478bd9Sstevel@tonic-gate 	rmvq_noenab(q, mp);
18897c478bd9Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
18907c478bd9Sstevel@tonic-gate 		/*
18917c478bd9Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
18927c478bd9Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
18937c478bd9Sstevel@tonic-gate 		 */
18947c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18957c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18967c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18977c478bd9Sstevel@tonic-gate 	} else {
18987c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18997c478bd9Sstevel@tonic-gate 	}
19007c478bd9Sstevel@tonic-gate }
19017c478bd9Sstevel@tonic-gate 
19027c478bd9Sstevel@tonic-gate /*
19037c478bd9Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
19047c478bd9Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
19057c478bd9Sstevel@tonic-gate  */
19067c478bd9Sstevel@tonic-gate void
19077c478bd9Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
19087c478bd9Sstevel@tonic-gate {
19097c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
19107c478bd9Sstevel@tonic-gate 	int i;
19117c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
19127c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
19137c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
19147c478bd9Sstevel@tonic-gate 
19157c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
19167c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
19177c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
19187c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
19197c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
19207c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
19217c478bd9Sstevel@tonic-gate 		freezer = curthread;
19227c478bd9Sstevel@tonic-gate 	} else
19237c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19247c478bd9Sstevel@tonic-gate 
19257c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
19267c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
19277c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
19287c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
19297c478bd9Sstevel@tonic-gate 		i = mp->b_band;
19307c478bd9Sstevel@tonic-gate 		while (--i > 0)
19317c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
19327c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
19337c478bd9Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
19347c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
19357c478bd9Sstevel@tonic-gate 			else
19367c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
19377c478bd9Sstevel@tonic-gate 		}
19387c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
19397c478bd9Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
19407c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
19417c478bd9Sstevel@tonic-gate 			else
19427c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
19437c478bd9Sstevel@tonic-gate 		}
19447c478bd9Sstevel@tonic-gate 	}
19457c478bd9Sstevel@tonic-gate 
19467c478bd9Sstevel@tonic-gate 	/*
19477c478bd9Sstevel@tonic-gate 	 * Remove the message from the list.
19487c478bd9Sstevel@tonic-gate 	 */
19497c478bd9Sstevel@tonic-gate 	if (mp->b_prev)
19507c478bd9Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
19517c478bd9Sstevel@tonic-gate 	else
19527c478bd9Sstevel@tonic-gate 		q->q_first = mp->b_next;
19537c478bd9Sstevel@tonic-gate 	if (mp->b_next)
19547c478bd9Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
19557c478bd9Sstevel@tonic-gate 	else
19567c478bd9Sstevel@tonic-gate 		q->q_last = mp->b_prev;
19577c478bd9Sstevel@tonic-gate 	mp->b_next = NULL;
19587c478bd9Sstevel@tonic-gate 	mp->b_prev = NULL;
19597c478bd9Sstevel@tonic-gate 
19607c478bd9Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
19617c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
1962ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
19637c478bd9Sstevel@tonic-gate 		mblkcnt++;
19647c478bd9Sstevel@tonic-gate 	}
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
19677c478bd9Sstevel@tonic-gate 		q->q_count -= bytecnt;
19687c478bd9Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
19697c478bd9Sstevel@tonic-gate 		if ((q->q_count < q->q_hiwat) &&
19707c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_hiwat)) {
19717c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
19727c478bd9Sstevel@tonic-gate 		}
19737c478bd9Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
19747c478bd9Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
19757c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
19767c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count < qbp->qb_hiwat) &&
19777c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
19787c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
19797c478bd9Sstevel@tonic-gate 		}
19807c478bd9Sstevel@tonic-gate 	}
19817c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
19827c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
19837c478bd9Sstevel@tonic-gate 
19847c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
19857c478bd9Sstevel@tonic-gate }
19867c478bd9Sstevel@tonic-gate 
19877c478bd9Sstevel@tonic-gate /*
19887c478bd9Sstevel@tonic-gate  * Empty a queue.
19897c478bd9Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
19907c478bd9Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
19917c478bd9Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
19927c478bd9Sstevel@tonic-gate  * service procedure.
19937c478bd9Sstevel@tonic-gate  *
19947c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
19957c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
19967c478bd9Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
19977c478bd9Sstevel@tonic-gate  *
19987c478bd9Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
19997c478bd9Sstevel@tonic-gate  * if one exists on the queue.
20007c478bd9Sstevel@tonic-gate  */
20017c478bd9Sstevel@tonic-gate void
20027c478bd9Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
20037c478bd9Sstevel@tonic-gate {
20047c478bd9Sstevel@tonic-gate 	mblk_t *mp, *nmp;
20057c478bd9Sstevel@tonic-gate 	qband_t *qbp;
20067c478bd9Sstevel@tonic-gate 	int backenab = 0;
20077c478bd9Sstevel@tonic-gate 	unsigned char bpri;
20087c478bd9Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
20097c478bd9Sstevel@tonic-gate 
20107c478bd9Sstevel@tonic-gate 	if (q->q_first == NULL)
20117c478bd9Sstevel@tonic-gate 		return;
20127c478bd9Sstevel@tonic-gate 
20137c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20147c478bd9Sstevel@tonic-gate 	mp = q->q_first;
20157c478bd9Sstevel@tonic-gate 	q->q_first = NULL;
20167c478bd9Sstevel@tonic-gate 	q->q_last = NULL;
20177c478bd9Sstevel@tonic-gate 	q->q_count = 0;
20187c478bd9Sstevel@tonic-gate 	q->q_mblkcnt = 0;
20197c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20207c478bd9Sstevel@tonic-gate 		qbp->qb_first = NULL;
20217c478bd9Sstevel@tonic-gate 		qbp->qb_last = NULL;
20227c478bd9Sstevel@tonic-gate 		qbp->qb_count = 0;
20237c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
20247c478bd9Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
20257c478bd9Sstevel@tonic-gate 	}
20267c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
20277c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
20287c478bd9Sstevel@tonic-gate 	while (mp) {
20297c478bd9Sstevel@tonic-gate 		nmp = mp->b_next;
20307c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
20317c478bd9Sstevel@tonic-gate 
20327c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
20337c478bd9Sstevel@tonic-gate 
20347c478bd9Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
20357c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20367c478bd9Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
20377c478bd9Sstevel@tonic-gate 			freemsg(mp);
20387c478bd9Sstevel@tonic-gate 		else
20397c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20407c478bd9Sstevel@tonic-gate 		mp = nmp;
20417c478bd9Sstevel@tonic-gate 	}
20427c478bd9Sstevel@tonic-gate 	bpri = 1;
20437c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20447c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20457c478bd9Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
20467c478bd9Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
20477c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
20487c478bd9Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
20497c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
20507c478bd9Sstevel@tonic-gate 			backenab = 1;
20517c478bd9Sstevel@tonic-gate 			qbf[bpri] = 1;
20527c478bd9Sstevel@tonic-gate 		} else
20537c478bd9Sstevel@tonic-gate 			qbf[bpri] = 0;
20547c478bd9Sstevel@tonic-gate 		bpri++;
20557c478bd9Sstevel@tonic-gate 	}
20567c478bd9Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
20577c478bd9Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
20587c478bd9Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
20597c478bd9Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
20607c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
20617c478bd9Sstevel@tonic-gate 		backenab = 1;
20627c478bd9Sstevel@tonic-gate 		qbf[0] = 1;
20637c478bd9Sstevel@tonic-gate 	} else
20647c478bd9Sstevel@tonic-gate 		qbf[0] = 0;
20657c478bd9Sstevel@tonic-gate 
20667c478bd9Sstevel@tonic-gate 	/*
20677c478bd9Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
20687c478bd9Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
20697c478bd9Sstevel@tonic-gate 	 */
20707c478bd9Sstevel@tonic-gate 	if (backenab) {
20717c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20727c478bd9Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
20737c478bd9Sstevel@tonic-gate 			if (qbf[bpri])
2074116094b2Smicheng 				backenable(q, bpri);
20757c478bd9Sstevel@tonic-gate 		if (qbf[0])
20767c478bd9Sstevel@tonic-gate 			backenable(q, 0);
20777c478bd9Sstevel@tonic-gate 	} else
20787c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20797c478bd9Sstevel@tonic-gate }
20807c478bd9Sstevel@tonic-gate 
20817c478bd9Sstevel@tonic-gate /*
20827c478bd9Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
20837c478bd9Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
20847c478bd9Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
20857c478bd9Sstevel@tonic-gate  */
20867c478bd9Sstevel@tonic-gate void
20877c478bd9Sstevel@tonic-gate flushq(queue_t *q, int flag)
20887c478bd9Sstevel@tonic-gate {
20897c478bd9Sstevel@tonic-gate 	flushq_common(q, flag, 0);
20907c478bd9Sstevel@tonic-gate }
20917c478bd9Sstevel@tonic-gate 
20927c478bd9Sstevel@tonic-gate /*
20937c478bd9Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
20947c478bd9Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
20957c478bd9Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
20967c478bd9Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
20977c478bd9Sstevel@tonic-gate  * (priority 0) band than in any other.
20987c478bd9Sstevel@tonic-gate  *
20997c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
21007c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
21017c478bd9Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
21027c478bd9Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
21037c478bd9Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
21047c478bd9Sstevel@tonic-gate  */
21057c478bd9Sstevel@tonic-gate void
21067c478bd9Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
21077c478bd9Sstevel@tonic-gate {
21087c478bd9Sstevel@tonic-gate 	mblk_t *mp;
21097c478bd9Sstevel@tonic-gate 	mblk_t *nmp;
21107c478bd9Sstevel@tonic-gate 	mblk_t *last;
21117c478bd9Sstevel@tonic-gate 	qband_t *qbp;
21127c478bd9Sstevel@tonic-gate 	int band;
21137c478bd9Sstevel@tonic-gate 
21147c478bd9Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
21157c478bd9Sstevel@tonic-gate 	if (pri > q->q_nband) {
21167c478bd9Sstevel@tonic-gate 		return;
21177c478bd9Sstevel@tonic-gate 	}
21187c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
21197c478bd9Sstevel@tonic-gate 	if (pri == 0) {
21207c478bd9Sstevel@tonic-gate 		mp = q->q_first;
21217c478bd9Sstevel@tonic-gate 		q->q_first = NULL;
21227c478bd9Sstevel@tonic-gate 		q->q_last = NULL;
21237c478bd9Sstevel@tonic-gate 		q->q_count = 0;
21247c478bd9Sstevel@tonic-gate 		q->q_mblkcnt = 0;
21257c478bd9Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
21267c478bd9Sstevel@tonic-gate 			qbp->qb_first = NULL;
21277c478bd9Sstevel@tonic-gate 			qbp->qb_last = NULL;
21287c478bd9Sstevel@tonic-gate 			qbp->qb_count = 0;
21297c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
21307c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
21317c478bd9Sstevel@tonic-gate 		}
21327c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
21337c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21347c478bd9Sstevel@tonic-gate 		while (mp) {
21357c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21367c478bd9Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
21377c478bd9Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
21387c478bd9Sstevel@tonic-gate 				((flag == FLUSHALL) ||
21397c478bd9Sstevel@tonic-gate 				datamsg(mp->b_datap->db_type)))
21407c478bd9Sstevel@tonic-gate 				freemsg(mp);
21417c478bd9Sstevel@tonic-gate 			else
21427c478bd9Sstevel@tonic-gate 				(void) putq(q, mp);
21437c478bd9Sstevel@tonic-gate 			mp = nmp;
21447c478bd9Sstevel@tonic-gate 		}
21457c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21467c478bd9Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
21477c478bd9Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
21487c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
21497c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
21507c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21517c478bd9Sstevel@tonic-gate 
2152116094b2Smicheng 			backenable(q, pri);
21537c478bd9Sstevel@tonic-gate 		} else
21547c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21557c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
21567c478bd9Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
21577c478bd9Sstevel@tonic-gate 		band = pri;
21587c478bd9Sstevel@tonic-gate 
21597c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
21607c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
21617c478bd9Sstevel@tonic-gate 		while (--band > 0)
21627c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
21637c478bd9Sstevel@tonic-gate 		mp = qbp->qb_first;
21647c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
21657c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21667c478bd9Sstevel@tonic-gate 			return;
21677c478bd9Sstevel@tonic-gate 		}
21687c478bd9Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
21697c478bd9Sstevel@tonic-gate 		/*
21707c478bd9Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
21717c478bd9Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
21727c478bd9Sstevel@tonic-gate 		 * mblk has been processed.
21737c478bd9Sstevel@tonic-gate 		 */
21747c478bd9Sstevel@tonic-gate 		while (mp != last) {
21757c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
21767c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21777c478bd9Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
21787c478bd9Sstevel@tonic-gate 				rmvq_noenab(q, mp);
21797c478bd9Sstevel@tonic-gate 				freemsg(mp);
21807c478bd9Sstevel@tonic-gate 				flushed = B_TRUE;
21817c478bd9Sstevel@tonic-gate 			}
21827c478bd9Sstevel@tonic-gate 			mp = nmp;
21837c478bd9Sstevel@tonic-gate 		}
21847c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21857c478bd9Sstevel@tonic-gate 
21867c478bd9Sstevel@tonic-gate 		/*
21877c478bd9Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
21887c478bd9Sstevel@tonic-gate 		 * will need to be called.
21897c478bd9Sstevel@tonic-gate 		 */
21907c478bd9Sstevel@tonic-gate 		if (flushed)
2191116094b2Smicheng 			qbackenable(q, pri);
21927c478bd9Sstevel@tonic-gate 	}
21937c478bd9Sstevel@tonic-gate }
21947c478bd9Sstevel@tonic-gate 
21957c478bd9Sstevel@tonic-gate /*
21967c478bd9Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
21977c478bd9Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
21987c478bd9Sstevel@tonic-gate  * to the queue).
21997c478bd9Sstevel@tonic-gate  */
22007c478bd9Sstevel@tonic-gate int
22017c478bd9Sstevel@tonic-gate canput(queue_t *q)
22027c478bd9Sstevel@tonic-gate {
22037c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
22047c478bd9Sstevel@tonic-gate 
22057c478bd9Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
22067c478bd9Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
22077c478bd9Sstevel@tonic-gate 
22087c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22097c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
22107c478bd9Sstevel@tonic-gate 
22117c478bd9Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
22127c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22137c478bd9Sstevel@tonic-gate 		return (1);
22147c478bd9Sstevel@tonic-gate 	}
22157c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22167c478bd9Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
22177c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTW;
22187c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22197c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
22207c478bd9Sstevel@tonic-gate 		return (0);
22217c478bd9Sstevel@tonic-gate 	}
22227c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22237c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22247c478bd9Sstevel@tonic-gate 	return (1);
22257c478bd9Sstevel@tonic-gate }
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate /*
22287c478bd9Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
22297c478bd9Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
22307c478bd9Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
22317c478bd9Sstevel@tonic-gate  * write to the queue).
22327c478bd9Sstevel@tonic-gate  */
22337c478bd9Sstevel@tonic-gate int
22347c478bd9Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
22357c478bd9Sstevel@tonic-gate {
22367c478bd9Sstevel@tonic-gate 	qband_t *qbp;
22377c478bd9Sstevel@tonic-gate 
22387c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
22397c478bd9Sstevel@tonic-gate 	if (!q)
22407c478bd9Sstevel@tonic-gate 		return (0);
22417c478bd9Sstevel@tonic-gate 
22427c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22437c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
22447c478bd9Sstevel@tonic-gate 
22457c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22467c478bd9Sstevel@tonic-gate 	if (pri == 0) {
22477c478bd9Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
22487c478bd9Sstevel@tonic-gate 			q->q_flag |= QWANTW;
22497c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22507c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22517c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22527c478bd9Sstevel@tonic-gate 			return (0);
22537c478bd9Sstevel@tonic-gate 		}
22547c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
22557c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
22567c478bd9Sstevel@tonic-gate 			/*
22577c478bd9Sstevel@tonic-gate 			 * No band exists yet, so return success.
22587c478bd9Sstevel@tonic-gate 			 */
22597c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22607c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22617c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 1);
22627c478bd9Sstevel@tonic-gate 			return (1);
22637c478bd9Sstevel@tonic-gate 		}
22647c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
22657c478bd9Sstevel@tonic-gate 		while (--pri)
22667c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
22677c478bd9Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
22687c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
22697c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22707c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22717c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22727c478bd9Sstevel@tonic-gate 			return (0);
22737c478bd9Sstevel@tonic-gate 		}
22747c478bd9Sstevel@tonic-gate 	}
22757c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22767c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22777c478bd9Sstevel@tonic-gate 		"bcanput:%p %X %d", q, pri, 1);
22787c478bd9Sstevel@tonic-gate 	return (1);
22797c478bd9Sstevel@tonic-gate }
22807c478bd9Sstevel@tonic-gate 
22817c478bd9Sstevel@tonic-gate /*
22827c478bd9Sstevel@tonic-gate  * Put a message on a queue.
22837c478bd9Sstevel@tonic-gate  *
22847c478bd9Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
22857c478bd9Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
22867c478bd9Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
22877c478bd9Sstevel@tonic-gate  *
22887c478bd9Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
22897c478bd9Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
22907c478bd9Sstevel@tonic-gate  *
22917c478bd9Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
22927c478bd9Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
22937c478bd9Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
22947c478bd9Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
22957c478bd9Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
22967c478bd9Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
22977c478bd9Sstevel@tonic-gate  */
22987c478bd9Sstevel@tonic-gate int
22997c478bd9Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
23007c478bd9Sstevel@tonic-gate {
23017c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
23027c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
23037c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
23047c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
23057c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
23087c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
23097c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
23107c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23117c478bd9Sstevel@tonic-gate 	} else
23127c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
23137c478bd9Sstevel@tonic-gate 
23147c478bd9Sstevel@tonic-gate 	/*
23157c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
23167c478bd9Sstevel@tonic-gate 	 * allocated, do so.
23177c478bd9Sstevel@tonic-gate 	 */
23187c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
23197c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
23207c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
23217c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
23227c478bd9Sstevel@tonic-gate 		int i;
23237c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
23247c478bd9Sstevel@tonic-gate 
23257c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
23267c478bd9Sstevel@tonic-gate 
23277c478bd9Sstevel@tonic-gate 			/*
23287c478bd9Sstevel@tonic-gate 			 * The qband structure for this priority band is
23297c478bd9Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
23307c478bd9Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
23317c478bd9Sstevel@tonic-gate 			 * associate the qband structures with every
23327c478bd9Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
23337c478bd9Sstevel@tonic-gate 			 * because most queues will only need the normal
23347c478bd9Sstevel@tonic-gate 			 * band of flow which can be described entirely
23357c478bd9Sstevel@tonic-gate 			 * by the queue itself.
23367c478bd9Sstevel@tonic-gate 			 */
23377c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
23387c478bd9Sstevel@tonic-gate 			while (*qbpp)
23397c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23407c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
23417c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
23427c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
23437c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
23447c478bd9Sstevel@tonic-gate 					return (0);
23457c478bd9Sstevel@tonic-gate 				}
23467c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
23477c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
23487c478bd9Sstevel@tonic-gate 				q->q_nband++;
23497c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23507c478bd9Sstevel@tonic-gate 			}
23517c478bd9Sstevel@tonic-gate 		}
23527c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23537c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
23547c478bd9Sstevel@tonic-gate 		i = bp->b_band;
23557c478bd9Sstevel@tonic-gate 		while (--i)
23567c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
23577c478bd9Sstevel@tonic-gate 	}
23587c478bd9Sstevel@tonic-gate 
23597c478bd9Sstevel@tonic-gate 	/*
23607c478bd9Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
23617c478bd9Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
23627c478bd9Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
23637c478bd9Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
23647c478bd9Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
23657c478bd9Sstevel@tonic-gate 	 */
23667c478bd9Sstevel@tonic-gate 	if (!q->q_first) {
23677c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
23687c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
23697c478bd9Sstevel@tonic-gate 		q->q_first = bp;
23707c478bd9Sstevel@tonic-gate 		q->q_last = bp;
23717c478bd9Sstevel@tonic-gate 		if (qbp) {
23727c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
23737c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
23747c478bd9Sstevel@tonic-gate 		}
23757c478bd9Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
23767c478bd9Sstevel@tonic-gate 
23777c478bd9Sstevel@tonic-gate 		/*
23787c478bd9Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
23797c478bd9Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
23807c478bd9Sstevel@tonic-gate 		 */
23817c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
23827c478bd9Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
23837c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
23847c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
23857c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
23867c478bd9Sstevel@tonic-gate 			q->q_last = bp;
23877c478bd9Sstevel@tonic-gate 		} else {
23887c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
23897c478bd9Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
23907c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
23917c478bd9Sstevel@tonic-gate 
23927c478bd9Sstevel@tonic-gate 			/*
23937c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
23947c478bd9Sstevel@tonic-gate 			 */
23957c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
23967c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
23977c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
23987c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
23997c478bd9Sstevel@tonic-gate 			else
24007c478bd9Sstevel@tonic-gate 				q->q_first = bp;
24017c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
24027c478bd9Sstevel@tonic-gate 		}
24037c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
24047c478bd9Sstevel@tonic-gate 		if (qbp->qb_first) {
24057c478bd9Sstevel@tonic-gate 			tmp = qbp->qb_last;
24067c478bd9Sstevel@tonic-gate 
24077c478bd9Sstevel@tonic-gate 			/*
24087c478bd9Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
24097c478bd9Sstevel@tonic-gate 			 */
24107c478bd9Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
24117c478bd9Sstevel@tonic-gate 			if (tmp->b_next)
24127c478bd9Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
24137c478bd9Sstevel@tonic-gate 			else
24147c478bd9Sstevel@tonic-gate 				q->q_last = bp;
24157c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
24167c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
24177c478bd9Sstevel@tonic-gate 		} else {
24187c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
24197c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
24207c478bd9Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
24217c478bd9Sstevel@tonic-gate 
24227c478bd9Sstevel@tonic-gate 				/*
24237c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
24247c478bd9Sstevel@tonic-gate 				 */
24257c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
24267c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
24277c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
24287c478bd9Sstevel@tonic-gate 				q->q_last = bp;
24297c478bd9Sstevel@tonic-gate 			} else {
24307c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
24317c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
24327c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24337c478bd9Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
24347c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24357c478bd9Sstevel@tonic-gate 
24367c478bd9Sstevel@tonic-gate 				/*
24377c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
24387c478bd9Sstevel@tonic-gate 				 */
24397c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
24407c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
24417c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
24427c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
24437c478bd9Sstevel@tonic-gate 				else
24447c478bd9Sstevel@tonic-gate 					q->q_first = bp;
24457c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
24467c478bd9Sstevel@tonic-gate 			}
24477c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
24487c478bd9Sstevel@tonic-gate 		}
24497c478bd9Sstevel@tonic-gate 		qbp->qb_last = bp;
24507c478bd9Sstevel@tonic-gate 	}
24517c478bd9Sstevel@tonic-gate 
24527c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
24537c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2454ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
24557c478bd9Sstevel@tonic-gate 		mblkcnt++;
24567c478bd9Sstevel@tonic-gate 	}
2457ff550d0eSmasputra 
24587c478bd9Sstevel@tonic-gate 	if (qbp) {
24597c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
24607c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
24617c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
24627c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
24637c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
24647c478bd9Sstevel@tonic-gate 		}
24657c478bd9Sstevel@tonic-gate 	} else {
24667c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
24677c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
24687c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
24697c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
24707c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
24717c478bd9Sstevel@tonic-gate 		}
24727c478bd9Sstevel@tonic-gate 	}
24737c478bd9Sstevel@tonic-gate 
24747c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) ||
24777c478bd9Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
24787c478bd9Sstevel@tonic-gate 		qenable_locked(q);
24797c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
24807c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
24817c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24827c478bd9Sstevel@tonic-gate 
24837c478bd9Sstevel@tonic-gate 	return (1);
24847c478bd9Sstevel@tonic-gate }
24857c478bd9Sstevel@tonic-gate 
24867c478bd9Sstevel@tonic-gate /*
24877c478bd9Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
24887c478bd9Sstevel@tonic-gate  * See comment on putq above for details.
24897c478bd9Sstevel@tonic-gate  */
24907c478bd9Sstevel@tonic-gate int
24917c478bd9Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
24927c478bd9Sstevel@tonic-gate {
24937c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
24947c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
24957c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
24967c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
24977c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
24987c478bd9Sstevel@tonic-gate 
24997c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
25007c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
25017c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
25027c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
25037c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
25047c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
25057c478bd9Sstevel@tonic-gate 	} else
25067c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate 	/*
25097c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
25107c478bd9Sstevel@tonic-gate 	 * allocated, do so.
25117c478bd9Sstevel@tonic-gate 	 */
25127c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
25137c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
25147c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
25157c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
25167c478bd9Sstevel@tonic-gate 		int i;
25177c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
25187c478bd9Sstevel@tonic-gate 
25197c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
25207c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
25217c478bd9Sstevel@tonic-gate 			while (*qbpp)
25227c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25237c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
25247c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
25257c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
25267c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
25277c478bd9Sstevel@tonic-gate 					return (0);
25287c478bd9Sstevel@tonic-gate 				}
25297c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
25307c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
25317c478bd9Sstevel@tonic-gate 				q->q_nband++;
25327c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25337c478bd9Sstevel@tonic-gate 			}
25347c478bd9Sstevel@tonic-gate 		}
25357c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
25367c478bd9Sstevel@tonic-gate 		i = bp->b_band;
25377c478bd9Sstevel@tonic-gate 		while (--i)
25387c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
25397c478bd9Sstevel@tonic-gate 	}
25407c478bd9Sstevel@tonic-gate 
25417c478bd9Sstevel@tonic-gate 	/*
25427c478bd9Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
25437c478bd9Sstevel@tonic-gate 	 * place on the front of the queue.
25447c478bd9Sstevel@tonic-gate 	 */
25457c478bd9Sstevel@tonic-gate 	tmp = q->q_first;
25467c478bd9Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
25477c478bd9Sstevel@tonic-gate 		bp->b_next = tmp;
25487c478bd9Sstevel@tonic-gate 		if (tmp)
25497c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25507c478bd9Sstevel@tonic-gate 		else
25517c478bd9Sstevel@tonic-gate 			q->q_last = bp;
25527c478bd9Sstevel@tonic-gate 		q->q_first = bp;
25537c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
25547c478bd9Sstevel@tonic-gate 		if (qbp) {
25557c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
25567c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
25577c478bd9Sstevel@tonic-gate 		}
25587c478bd9Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
25597c478bd9Sstevel@tonic-gate 		tmp = qbp->qb_first;
25607c478bd9Sstevel@tonic-gate 		if (tmp) {
25617c478bd9Sstevel@tonic-gate 
25627c478bd9Sstevel@tonic-gate 			/*
25637c478bd9Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
25647c478bd9Sstevel@tonic-gate 			 */
25657c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
25667c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
25677c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
25687c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
25697c478bd9Sstevel@tonic-gate 			else
25707c478bd9Sstevel@tonic-gate 				q->q_first = bp;
25717c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25727c478bd9Sstevel@tonic-gate 		} else {
25737c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
25747c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
25757c478bd9Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
25767c478bd9Sstevel@tonic-gate 
25777c478bd9Sstevel@tonic-gate 				/*
25787c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
25797c478bd9Sstevel@tonic-gate 				 */
25807c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
25817c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
25827c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
25837c478bd9Sstevel@tonic-gate 				q->q_last = bp;
25847c478bd9Sstevel@tonic-gate 			} else {
25857c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
25867c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
25877c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25887c478bd9Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
25897c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25907c478bd9Sstevel@tonic-gate 
25917c478bd9Sstevel@tonic-gate 				/*
25927c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
25937c478bd9Sstevel@tonic-gate 				 */
25947c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
25957c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
25967c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
25977c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
25987c478bd9Sstevel@tonic-gate 				else
25997c478bd9Sstevel@tonic-gate 					q->q_first = bp;
26007c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
26017c478bd9Sstevel@tonic-gate 			}
26027c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
26037c478bd9Sstevel@tonic-gate 		}
26047c478bd9Sstevel@tonic-gate 		qbp->qb_first = bp;
26057c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
26067c478bd9Sstevel@tonic-gate 
26077c478bd9Sstevel@tonic-gate 		/*
26087c478bd9Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
26097c478bd9Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
26107c478bd9Sstevel@tonic-gate 		 */
26117c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
26127c478bd9Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
26137c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
26147c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
26157c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
26167c478bd9Sstevel@tonic-gate 			q->q_last = bp;
26177c478bd9Sstevel@tonic-gate 		} else {
26187c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
26197c478bd9Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
26207c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26217c478bd9Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
26227c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26237c478bd9Sstevel@tonic-gate 
26247c478bd9Sstevel@tonic-gate 			/*
26257c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
26267c478bd9Sstevel@tonic-gate 			 */
26277c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
26287c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
26297c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
26307c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
26317c478bd9Sstevel@tonic-gate 			else
26327c478bd9Sstevel@tonic-gate 				q->q_first = bp;
26337c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
26347c478bd9Sstevel@tonic-gate 		}
26357c478bd9Sstevel@tonic-gate 	}
26367c478bd9Sstevel@tonic-gate 
26377c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
26387c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
2639ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
26407c478bd9Sstevel@tonic-gate 		mblkcnt++;
26417c478bd9Sstevel@tonic-gate 	}
26427c478bd9Sstevel@tonic-gate 	if (qbp) {
26437c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
26447c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
26457c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
26467c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
26477c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
26487c478bd9Sstevel@tonic-gate 		}
26497c478bd9Sstevel@tonic-gate 	} else {
26507c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
26517c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
26527c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
26537c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
26547c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
26557c478bd9Sstevel@tonic-gate 		}
26567c478bd9Sstevel@tonic-gate 	}
26577c478bd9Sstevel@tonic-gate 
26587c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
26597c478bd9Sstevel@tonic-gate 
26607c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
26617c478bd9Sstevel@tonic-gate 		qenable_locked(q);
26627c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
26637c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
26647c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
26657c478bd9Sstevel@tonic-gate 
26667c478bd9Sstevel@tonic-gate 	return (1);
26677c478bd9Sstevel@tonic-gate }
26687c478bd9Sstevel@tonic-gate 
26697c478bd9Sstevel@tonic-gate /*
26707c478bd9Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
26717c478bd9Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
26727c478bd9Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
26737c478bd9Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
26747c478bd9Sstevel@tonic-gate  * ordering:
26757c478bd9Sstevel@tonic-gate  *
26767c478bd9Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
26777c478bd9Sstevel@tonic-gate  *
26787c478bd9Sstevel@tonic-gate  * All flow control parameters are updated.
26797c478bd9Sstevel@tonic-gate  *
26807c478bd9Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
26817c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
26827c478bd9Sstevel@tonic-gate  */
26837c478bd9Sstevel@tonic-gate int
26847c478bd9Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
26857c478bd9Sstevel@tonic-gate {
26867c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
26877c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
26887c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
26897c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
26907c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
26917c478bd9Sstevel@tonic-gate 
26927c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
26937c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
26947c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
26957c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
26967c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
26977c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
26987c478bd9Sstevel@tonic-gate 		freezer = curthread;
26997c478bd9Sstevel@tonic-gate 	} else
27007c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
27017c478bd9Sstevel@tonic-gate 
27027c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
27037c478bd9Sstevel@tonic-gate 		if (mp->b_band != 0)
27047c478bd9Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
27057c478bd9Sstevel@tonic-gate 		if (emp && emp->b_prev &&
27067c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
27077c478bd9Sstevel@tonic-gate 			goto badord;
27087c478bd9Sstevel@tonic-gate 	}
27097c478bd9Sstevel@tonic-gate 	if (emp) {
27107c478bd9Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
27117c478bd9Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
27127c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
27137c478bd9Sstevel@tonic-gate 			goto badord;
27147c478bd9Sstevel@tonic-gate 		}
27157c478bd9Sstevel@tonic-gate 	} else {
27167c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
27177c478bd9Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
27187c478bd9Sstevel@tonic-gate badord:
27197c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
27207c478bd9Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
27217c478bd9Sstevel@tonic-gate 			    "on q %p", (void *)q);
27227c478bd9Sstevel@tonic-gate 			if (freezer != curthread)
27237c478bd9Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
27247c478bd9Sstevel@tonic-gate 			return (0);
27257c478bd9Sstevel@tonic-gate 		}
27267c478bd9Sstevel@tonic-gate 	}
27277c478bd9Sstevel@tonic-gate 
27287c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {
27297c478bd9Sstevel@tonic-gate 		int i;
27307c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
27317c478bd9Sstevel@tonic-gate 
27327c478bd9Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
27337c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
27347c478bd9Sstevel@tonic-gate 			while (*qbpp)
27357c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27367c478bd9Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
27377c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
27387c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
27397c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
27407c478bd9Sstevel@tonic-gate 					return (0);
27417c478bd9Sstevel@tonic-gate 				}
27427c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
27437c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
27447c478bd9Sstevel@tonic-gate 				q->q_nband++;
27457c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27467c478bd9Sstevel@tonic-gate 			}
27477c478bd9Sstevel@tonic-gate 		}
27487c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
27497c478bd9Sstevel@tonic-gate 		i = mp->b_band;
27507c478bd9Sstevel@tonic-gate 		while (--i)
27517c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
27527c478bd9Sstevel@tonic-gate 	}
27537c478bd9Sstevel@tonic-gate 
27547c478bd9Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
27557c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
27567c478bd9Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
27577c478bd9Sstevel@tonic-gate 		else
27587c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27597c478bd9Sstevel@tonic-gate 		emp->b_prev = mp;
27607c478bd9Sstevel@tonic-gate 	} else {
27617c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
27627c478bd9Sstevel@tonic-gate 			q->q_last->b_next = mp;
27637c478bd9Sstevel@tonic-gate 		else
27647c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27657c478bd9Sstevel@tonic-gate 		q->q_last = mp;
27667c478bd9Sstevel@tonic-gate 	}
27677c478bd9Sstevel@tonic-gate 
27687c478bd9Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
27697c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
2770ff550d0eSmasputra 		ADD_MBLK_SIZE(tmp, bytecnt);
27717c478bd9Sstevel@tonic-gate 		mblkcnt++;
27727c478bd9Sstevel@tonic-gate 	}
27737c478bd9Sstevel@tonic-gate 
27747c478bd9Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
27757c478bd9Sstevel@tonic-gate 		if (!qbp->qb_first) {
27767c478bd9Sstevel@tonic-gate 			qbp->qb_first = mp;
27777c478bd9Sstevel@tonic-gate 			qbp->qb_last = mp;
27787c478bd9Sstevel@tonic-gate 		} else {
27797c478bd9Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
27807c478bd9Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
27817c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp;
27827c478bd9Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
27837c478bd9Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
27847c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp;
27857c478bd9Sstevel@tonic-gate 		}
27867c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
27877c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
27887c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
27897c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
27907c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
27917c478bd9Sstevel@tonic-gate 		}
27927c478bd9Sstevel@tonic-gate 	} else {
27937c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
27947c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
27957c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
27967c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
27977c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
27987c478bd9Sstevel@tonic-gate 		}
27997c478bd9Sstevel@tonic-gate 	}
28007c478bd9Sstevel@tonic-gate 
28017c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
28027c478bd9Sstevel@tonic-gate 
28037c478bd9Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
28047c478bd9Sstevel@tonic-gate 		qenable_locked(q);
28057c478bd9Sstevel@tonic-gate 
28067c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
28077c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
28087c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
28097c478bd9Sstevel@tonic-gate 
28107c478bd9Sstevel@tonic-gate 	return (1);
28117c478bd9Sstevel@tonic-gate }
28127c478bd9Sstevel@tonic-gate 
28137c478bd9Sstevel@tonic-gate /*
28147c478bd9Sstevel@tonic-gate  * Create and put a control message on queue.
28157c478bd9Sstevel@tonic-gate  */
28167c478bd9Sstevel@tonic-gate int
28177c478bd9Sstevel@tonic-gate putctl(queue_t *q, int type)
28187c478bd9Sstevel@tonic-gate {
28197c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28207c478bd9Sstevel@tonic-gate 
28217c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28227c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
28237c478bd9Sstevel@tonic-gate 		return (0);
28247c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
28257c478bd9Sstevel@tonic-gate 
28267c478bd9Sstevel@tonic-gate 	put(q, bp);
28277c478bd9Sstevel@tonic-gate 
28287c478bd9Sstevel@tonic-gate 	return (1);
28297c478bd9Sstevel@tonic-gate }
28307c478bd9Sstevel@tonic-gate 
28317c478bd9Sstevel@tonic-gate /*
28327c478bd9Sstevel@tonic-gate  * Control message with a single-byte parameter
28337c478bd9Sstevel@tonic-gate  */
28347c478bd9Sstevel@tonic-gate int
28357c478bd9Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
28367c478bd9Sstevel@tonic-gate {
28377c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28387c478bd9Sstevel@tonic-gate 
28397c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28407c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
28417c478bd9Sstevel@tonic-gate 		return (0);
28427c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28437c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 	put(q, bp);
28467c478bd9Sstevel@tonic-gate 
28477c478bd9Sstevel@tonic-gate 	return (1);
28487c478bd9Sstevel@tonic-gate }
28497c478bd9Sstevel@tonic-gate 
28507c478bd9Sstevel@tonic-gate int
28517c478bd9Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
28527c478bd9Sstevel@tonic-gate {
28537c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28547c478bd9Sstevel@tonic-gate 
28557c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28567c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(1)) == NULL))
28577c478bd9Sstevel@tonic-gate 		return (0);
28587c478bd9Sstevel@tonic-gate 
28597c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28607c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28617c478bd9Sstevel@tonic-gate 
28627c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28637c478bd9Sstevel@tonic-gate 
28647c478bd9Sstevel@tonic-gate 	return (1);
28657c478bd9Sstevel@tonic-gate }
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate int
28687c478bd9Sstevel@tonic-gate putnextctl(queue_t *q, int type)
28697c478bd9Sstevel@tonic-gate {
28707c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28717c478bd9Sstevel@tonic-gate 
28727c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28737c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(0)) == NULL))
28747c478bd9Sstevel@tonic-gate 		return (0);
28757c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28767c478bd9Sstevel@tonic-gate 
28777c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28787c478bd9Sstevel@tonic-gate 
28797c478bd9Sstevel@tonic-gate 	return (1);
28807c478bd9Sstevel@tonic-gate }
28817c478bd9Sstevel@tonic-gate 
28827c478bd9Sstevel@tonic-gate /*
28837c478bd9Sstevel@tonic-gate  * Return the queue upstream from this one
28847c478bd9Sstevel@tonic-gate  */
28857c478bd9Sstevel@tonic-gate queue_t *
28867c478bd9Sstevel@tonic-gate backq(queue_t *q)
28877c478bd9Sstevel@tonic-gate {
28887c478bd9Sstevel@tonic-gate 	q = _OTHERQ(q);
28897c478bd9Sstevel@tonic-gate 	if (q->q_next) {
28907c478bd9Sstevel@tonic-gate 		q = q->q_next;
28917c478bd9Sstevel@tonic-gate 		return (_OTHERQ(q));
28927c478bd9Sstevel@tonic-gate 	}
28937c478bd9Sstevel@tonic-gate 	return (NULL);
28947c478bd9Sstevel@tonic-gate }
28957c478bd9Sstevel@tonic-gate 
28967c478bd9Sstevel@tonic-gate /*
28977c478bd9Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
28987c478bd9Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
28997c478bd9Sstevel@tonic-gate  */
29007c478bd9Sstevel@tonic-gate void
29017c478bd9Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
29027c478bd9Sstevel@tonic-gate {
29037c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
29047c478bd9Sstevel@tonic-gate 
29057c478bd9Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
29067c478bd9Sstevel@tonic-gate }
29077c478bd9Sstevel@tonic-gate 
29087c478bd9Sstevel@tonic-gate /*
29097c478bd9Sstevel@tonic-gate  * Streams Queue Scheduling
29107c478bd9Sstevel@tonic-gate  *
29117c478bd9Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
29127c478bd9Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
29137c478bd9Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
29147c478bd9Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
29157c478bd9Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
29167c478bd9Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
29177c478bd9Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
29187c478bd9Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
29197c478bd9Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
29207c478bd9Sstevel@tonic-gate  */
29217c478bd9Sstevel@tonic-gate 
29227c478bd9Sstevel@tonic-gate /*
29237c478bd9Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
29247c478bd9Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
29257c478bd9Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
29267c478bd9Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
29277c478bd9Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
29287c478bd9Sstevel@tonic-gate  */
29297c478bd9Sstevel@tonic-gate void
29307c478bd9Sstevel@tonic-gate qenable(queue_t *q)
29317c478bd9Sstevel@tonic-gate {
29327c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29337c478bd9Sstevel@tonic-gate 	qenable_locked(q);
29347c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29357c478bd9Sstevel@tonic-gate }
29367c478bd9Sstevel@tonic-gate /*
29377c478bd9Sstevel@tonic-gate  * Return number of messages on queue
29387c478bd9Sstevel@tonic-gate  */
29397c478bd9Sstevel@tonic-gate int
29407c478bd9Sstevel@tonic-gate qsize(queue_t *qp)
29417c478bd9Sstevel@tonic-gate {
29427c478bd9Sstevel@tonic-gate 	int count = 0;
29437c478bd9Sstevel@tonic-gate 	mblk_t *mp;
29447c478bd9Sstevel@tonic-gate 
29457c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
29467c478bd9Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
29477c478bd9Sstevel@tonic-gate 		count++;
29487c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
29497c478bd9Sstevel@tonic-gate 	return (count);
29507c478bd9Sstevel@tonic-gate }
29517c478bd9Sstevel@tonic-gate 
29527c478bd9Sstevel@tonic-gate /*
29537c478bd9Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
29547c478bd9Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
29557c478bd9Sstevel@tonic-gate  */
29567c478bd9Sstevel@tonic-gate void
29577c478bd9Sstevel@tonic-gate noenable(queue_t *q)
29587c478bd9Sstevel@tonic-gate {
29597c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29607c478bd9Sstevel@tonic-gate 	q->q_flag |= QNOENB;
29617c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29627c478bd9Sstevel@tonic-gate }
29637c478bd9Sstevel@tonic-gate 
29647c478bd9Sstevel@tonic-gate void
29657c478bd9Sstevel@tonic-gate enableok(queue_t *q)
29667c478bd9Sstevel@tonic-gate {
29677c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29687c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
29697c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29707c478bd9Sstevel@tonic-gate }
29717c478bd9Sstevel@tonic-gate 
29727c478bd9Sstevel@tonic-gate /*
29737c478bd9Sstevel@tonic-gate  * Set queue fields.
29747c478bd9Sstevel@tonic-gate  */
29757c478bd9Sstevel@tonic-gate int
29767c478bd9Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
29777c478bd9Sstevel@tonic-gate {
29787c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
29797c478bd9Sstevel@tonic-gate 	queue_t	*wrq;
29807c478bd9Sstevel@tonic-gate 	int error = 0;
29817c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
29827c478bd9Sstevel@tonic-gate 
29837c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
29847c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
29857c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
29867c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
29877c478bd9Sstevel@tonic-gate 	} else
29887c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
29897c478bd9Sstevel@tonic-gate 
29907c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
29917c478bd9Sstevel@tonic-gate 		error = EINVAL;
29927c478bd9Sstevel@tonic-gate 		goto done;
29937c478bd9Sstevel@tonic-gate 	}
29947c478bd9Sstevel@tonic-gate 	if (pri != 0) {
29957c478bd9Sstevel@tonic-gate 		int i;
29967c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
29977c478bd9Sstevel@tonic-gate 
29987c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
29997c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
30007c478bd9Sstevel@tonic-gate 			while (*qbpp)
30017c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30027c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
30037c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
30047c478bd9Sstevel@tonic-gate 					error = EAGAIN;
30057c478bd9Sstevel@tonic-gate 					goto done;
30067c478bd9Sstevel@tonic-gate 				}
30077c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
30087c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
30097c478bd9Sstevel@tonic-gate 				q->q_nband++;
30107c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
30117c478bd9Sstevel@tonic-gate 			}
30127c478bd9Sstevel@tonic-gate 		}
30137c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
30147c478bd9Sstevel@tonic-gate 		i = pri;
30157c478bd9Sstevel@tonic-gate 		while (--i)
30167c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
30177c478bd9Sstevel@tonic-gate 	}
30187c478bd9Sstevel@tonic-gate 	switch (what) {
30197c478bd9Sstevel@tonic-gate 
30207c478bd9Sstevel@tonic-gate 	case QHIWAT:
30217c478bd9Sstevel@tonic-gate 		if (qbp)
30227c478bd9Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
30237c478bd9Sstevel@tonic-gate 		else
30247c478bd9Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
30257c478bd9Sstevel@tonic-gate 		break;
30267c478bd9Sstevel@tonic-gate 
30277c478bd9Sstevel@tonic-gate 	case QLOWAT:
30287c478bd9Sstevel@tonic-gate 		if (qbp)
30297c478bd9Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
30307c478bd9Sstevel@tonic-gate 		else
30317c478bd9Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
30327c478bd9Sstevel@tonic-gate 		break;
30337c478bd9Sstevel@tonic-gate 
30347c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
30357c478bd9Sstevel@tonic-gate 		if (qbp)
30367c478bd9Sstevel@tonic-gate 			error = EINVAL;
30377c478bd9Sstevel@tonic-gate 		else
30387c478bd9Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
30397c478bd9Sstevel@tonic-gate 
30407c478bd9Sstevel@tonic-gate 		/*
30417c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30427c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30437c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30447c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30457c478bd9Sstevel@tonic-gate 		 */
30467c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30477c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30487c478bd9Sstevel@tonic-gate 			break;
30497c478bd9Sstevel@tonic-gate 
30507c478bd9Sstevel@tonic-gate 		/*
30517c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30527c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30537c478bd9Sstevel@tonic-gate 		 */
30547c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30557c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30567c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30577c478bd9Sstevel@tonic-gate 		}
30587c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
30597c478bd9Sstevel@tonic-gate 
30607c478bd9Sstevel@tonic-gate 		if (strmsgsz != 0) {
30617c478bd9Sstevel@tonic-gate 			if (val == INFPSZ)
30627c478bd9Sstevel@tonic-gate 				val = strmsgsz;
30637c478bd9Sstevel@tonic-gate 			else  {
30647c478bd9Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
30657c478bd9Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
30667c478bd9Sstevel@tonic-gate 				else
30677c478bd9Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
30687c478bd9Sstevel@tonic-gate 			}
30697c478bd9Sstevel@tonic-gate 		}
30707c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
30717c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30727c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30737c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
30747c478bd9Sstevel@tonic-gate 		}
30757c478bd9Sstevel@tonic-gate 		break;
30767c478bd9Sstevel@tonic-gate 
30777c478bd9Sstevel@tonic-gate 	case QMINPSZ:
30787c478bd9Sstevel@tonic-gate 		if (qbp)
30797c478bd9Sstevel@tonic-gate 			error = EINVAL;
30807c478bd9Sstevel@tonic-gate 		else
30817c478bd9Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
30827c478bd9Sstevel@tonic-gate 
30837c478bd9Sstevel@tonic-gate 		/*
30847c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30857c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30867c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30877c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30887c478bd9Sstevel@tonic-gate 		 */
30897c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30907c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30917c478bd9Sstevel@tonic-gate 			break;
30927c478bd9Sstevel@tonic-gate 
30937c478bd9Sstevel@tonic-gate 		/*
30947c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30957c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30967c478bd9Sstevel@tonic-gate 		 */
30977c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30987c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30997c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
31007c478bd9Sstevel@tonic-gate 		}
31017c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
31027c478bd9Sstevel@tonic-gate 
31037c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
31047c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
31057c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
31067c478bd9Sstevel@tonic-gate 		}
31077c478bd9Sstevel@tonic-gate 		break;
31087c478bd9Sstevel@tonic-gate 
31097c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
31107c478bd9Sstevel@tonic-gate 		if (qbp)
31117c478bd9Sstevel@tonic-gate 			error = EINVAL;
31127c478bd9Sstevel@tonic-gate 		else
31137c478bd9Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
31147c478bd9Sstevel@tonic-gate 		break;
31157c478bd9Sstevel@tonic-gate 
31167c478bd9Sstevel@tonic-gate 	case QCOUNT:
31177c478bd9Sstevel@tonic-gate 	case QFIRST:
31187c478bd9Sstevel@tonic-gate 	case QLAST:
31197c478bd9Sstevel@tonic-gate 	case QFLAG:
31207c478bd9Sstevel@tonic-gate 		error = EPERM;
31217c478bd9Sstevel@tonic-gate 		break;
31227c478bd9Sstevel@tonic-gate 
31237c478bd9Sstevel@tonic-gate 	default:
31247c478bd9Sstevel@tonic-gate 		error = EINVAL;
31257c478bd9Sstevel@tonic-gate 		break;
31267c478bd9Sstevel@tonic-gate 	}
31277c478bd9Sstevel@tonic-gate done:
31287c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
31297c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
31307c478bd9Sstevel@tonic-gate 	return (error);
31317c478bd9Sstevel@tonic-gate }
31327c478bd9Sstevel@tonic-gate 
31337c478bd9Sstevel@tonic-gate /*
31347c478bd9Sstevel@tonic-gate  * Get queue fields.
31357c478bd9Sstevel@tonic-gate  */
31367c478bd9Sstevel@tonic-gate int
31377c478bd9Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
31387c478bd9Sstevel@tonic-gate {
31397c478bd9Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
31407c478bd9Sstevel@tonic-gate 	int 		error = 0;
31417c478bd9Sstevel@tonic-gate 	kthread_id_t 	freezer;
31427c478bd9Sstevel@tonic-gate 
31437c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
31447c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
31457c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
31467c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
31477c478bd9Sstevel@tonic-gate 	} else
31487c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
31497c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
31507c478bd9Sstevel@tonic-gate 		error = EINVAL;
31517c478bd9Sstevel@tonic-gate 		goto done;
31527c478bd9Sstevel@tonic-gate 	}
31537c478bd9Sstevel@tonic-gate 	if (pri != 0) {
31547c478bd9Sstevel@tonic-gate 		int i;
31557c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
31567c478bd9Sstevel@tonic-gate 
31577c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
31587c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
31597c478bd9Sstevel@tonic-gate 			while (*qbpp)
31607c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31617c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
31627c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
31637c478bd9Sstevel@tonic-gate 					error = EAGAIN;
31647c478bd9Sstevel@tonic-gate 					goto done;
31657c478bd9Sstevel@tonic-gate 				}
31667c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
31677c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
31687c478bd9Sstevel@tonic-gate 				q->q_nband++;
31697c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31707c478bd9Sstevel@tonic-gate 			}
31717c478bd9Sstevel@tonic-gate 		}
31727c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
31737c478bd9Sstevel@tonic-gate 		i = pri;
31747c478bd9Sstevel@tonic-gate 		while (--i)
31757c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
31767c478bd9Sstevel@tonic-gate 	}
31777c478bd9Sstevel@tonic-gate 	switch (what) {
31787c478bd9Sstevel@tonic-gate 	case QHIWAT:
31797c478bd9Sstevel@tonic-gate 		if (qbp)
31807c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
31817c478bd9Sstevel@tonic-gate 		else
31827c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
31837c478bd9Sstevel@tonic-gate 		break;
31847c478bd9Sstevel@tonic-gate 
31857c478bd9Sstevel@tonic-gate 	case QLOWAT:
31867c478bd9Sstevel@tonic-gate 		if (qbp)
31877c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
31887c478bd9Sstevel@tonic-gate 		else
31897c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
31907c478bd9Sstevel@tonic-gate 		break;
31917c478bd9Sstevel@tonic-gate 
31927c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
31937c478bd9Sstevel@tonic-gate 		if (qbp)
31947c478bd9Sstevel@tonic-gate 			error = EINVAL;
31957c478bd9Sstevel@tonic-gate 		else
31967c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
31977c478bd9Sstevel@tonic-gate 		break;
31987c478bd9Sstevel@tonic-gate 
31997c478bd9Sstevel@tonic-gate 	case QMINPSZ:
32007c478bd9Sstevel@tonic-gate 		if (qbp)
32017c478bd9Sstevel@tonic-gate 			error = EINVAL;
32027c478bd9Sstevel@tonic-gate 		else
32037c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
32047c478bd9Sstevel@tonic-gate 		break;
32057c478bd9Sstevel@tonic-gate 
32067c478bd9Sstevel@tonic-gate 	case QCOUNT:
32077c478bd9Sstevel@tonic-gate 		if (qbp)
32087c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
32097c478bd9Sstevel@tonic-gate 		else
32107c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
32117c478bd9Sstevel@tonic-gate 		break;
32127c478bd9Sstevel@tonic-gate 
32137c478bd9Sstevel@tonic-gate 	case QFIRST:
32147c478bd9Sstevel@tonic-gate 		if (qbp)
32157c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
32167c478bd9Sstevel@tonic-gate 		else
32177c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
32187c478bd9Sstevel@tonic-gate 		break;
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 	case QLAST:
32217c478bd9Sstevel@tonic-gate 		if (qbp)
32227c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
32237c478bd9Sstevel@tonic-gate 		else
32247c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
32257c478bd9Sstevel@tonic-gate 		break;
32267c478bd9Sstevel@tonic-gate 
32277c478bd9Sstevel@tonic-gate 	case QFLAG:
32287c478bd9Sstevel@tonic-gate 		if (qbp)
32297c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
32307c478bd9Sstevel@tonic-gate 		else
32317c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
32327c478bd9Sstevel@tonic-gate 		break;
32337c478bd9Sstevel@tonic-gate 
32347c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
32357c478bd9Sstevel@tonic-gate 		if (qbp)
32367c478bd9Sstevel@tonic-gate 			error = EINVAL;
32377c478bd9Sstevel@tonic-gate 		else
32387c478bd9Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
32397c478bd9Sstevel@tonic-gate 		break;
32407c478bd9Sstevel@tonic-gate 
32417c478bd9Sstevel@tonic-gate 	default:
32427c478bd9Sstevel@tonic-gate 		error = EINVAL;
32437c478bd9Sstevel@tonic-gate 		break;
32447c478bd9Sstevel@tonic-gate 	}
32457c478bd9Sstevel@tonic-gate done:
32467c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
32477c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
32487c478bd9Sstevel@tonic-gate 	return (error);
32497c478bd9Sstevel@tonic-gate }
32507c478bd9Sstevel@tonic-gate 
32517c478bd9Sstevel@tonic-gate /*
32527c478bd9Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
32537c478bd9Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
32547c478bd9Sstevel@tonic-gate  *
32557c478bd9Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
32567c478bd9Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
32577c478bd9Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
32587c478bd9Sstevel@tonic-gate  */
32597c478bd9Sstevel@tonic-gate void
32607c478bd9Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
32617c478bd9Sstevel@tonic-gate {
32627c478bd9Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
32637c478bd9Sstevel@tonic-gate 	pollhead_t 	*pl;
32647c478bd9Sstevel@tonic-gate 
32657c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
32667c478bd9Sstevel@tonic-gate 	pl = &stp->sd_pollist;
32677c478bd9Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
32687c478bd9Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
32697c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32707c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32717c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32727c478bd9Sstevel@tonic-gate 		} else {
32737c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
32747c478bd9Sstevel@tonic-gate 		}
32757c478bd9Sstevel@tonic-gate 
32767c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32777c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
32787c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32797c478bd9Sstevel@tonic-gate 
32807c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
32817c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
32827c478bd9Sstevel@tonic-gate 	} else if (flag & QWANTR) {
32837c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
32847c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
32857c478bd9Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
32867c478bd9Sstevel@tonic-gate 		} else {
32877c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
32887c478bd9Sstevel@tonic-gate 		}
32897c478bd9Sstevel@tonic-gate 
32907c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32917c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
32927c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32937c478bd9Sstevel@tonic-gate 
32947c478bd9Sstevel@tonic-gate 		{
32957c478bd9Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
32967c478bd9Sstevel@tonic-gate 
32977c478bd9Sstevel@tonic-gate 			if (events)
32987c478bd9Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
32997c478bd9Sstevel@tonic-gate 		}
33007c478bd9Sstevel@tonic-gate 	} else {
33017c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
33027c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
33037c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
33047c478bd9Sstevel@tonic-gate 		}
33057c478bd9Sstevel@tonic-gate 
33067c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
33077c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
33087c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
33097c478bd9Sstevel@tonic-gate 
33107c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
33117c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
33127c478bd9Sstevel@tonic-gate 	}
33137c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
33147c478bd9Sstevel@tonic-gate }
33157c478bd9Sstevel@tonic-gate 
33167c478bd9Sstevel@tonic-gate int
33177c478bd9Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
33187c478bd9Sstevel@tonic-gate {
33197c478bd9Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
33207c478bd9Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
33217c478bd9Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
33227c478bd9Sstevel@tonic-gate 	dblk_t	 *dbp;
33237c478bd9Sstevel@tonic-gate 	ssize_t	 uiocnt;
33247c478bd9Sstevel@tonic-gate 	ssize_t	 cnt;
33257c478bd9Sstevel@tonic-gate 	unsigned char *ptr;
33267c478bd9Sstevel@tonic-gate 	ssize_t	 resid;
33277c478bd9Sstevel@tonic-gate 	int	 error = 0;
33287c478bd9Sstevel@tonic-gate 	on_trap_data_t otd;
33297c478bd9Sstevel@tonic-gate 	queue_t	*stwrq;
33307c478bd9Sstevel@tonic-gate 
33317c478bd9Sstevel@tonic-gate 	/*
33327c478bd9Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
33337c478bd9Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
33347c478bd9Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
33357c478bd9Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
33367c478bd9Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
33377c478bd9Sstevel@tonic-gate 	 * processing the message the way it would have been processed
33387c478bd9Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
33397c478bd9Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
33407c478bd9Sstevel@tonic-gate 	 *
33417c478bd9Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
33427c478bd9Sstevel@tonic-gate 	 * very hot.
33437c478bd9Sstevel@tonic-gate 	 */
33447c478bd9Sstevel@tonic-gate 
33457c478bd9Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
33467c478bd9Sstevel@tonic-gate 	if (stwrq)
33477c478bd9Sstevel@tonic-gate 		typ = stwrq->q_struiot;
33487c478bd9Sstevel@tonic-gate 
33497c478bd9Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
33507c478bd9Sstevel@tonic-gate 		dbp = mp->b_datap;
33517c478bd9Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
33527c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33537c478bd9Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
33547c478bd9Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
33557c478bd9Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
33567c478bd9Sstevel@tonic-gate 			/*
33577c478bd9Sstevel@tonic-gate 			 * Either this mblk has already been processed
33587c478bd9Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
33597c478bd9Sstevel@tonic-gate 			 */
33607c478bd9Sstevel@tonic-gate 			continue;
33617c478bd9Sstevel@tonic-gate 		}
33627c478bd9Sstevel@tonic-gate 		switch (typ) {
33637c478bd9Sstevel@tonic-gate 		case STRUIOT_STANDARD:
33647c478bd9Sstevel@tonic-gate 			if (noblock) {
33657c478bd9Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
33667c478bd9Sstevel@tonic-gate 					no_trap();
33677c478bd9Sstevel@tonic-gate 					error = EWOULDBLOCK;
33687c478bd9Sstevel@tonic-gate 					goto out;
33697c478bd9Sstevel@tonic-gate 				}
33707c478bd9Sstevel@tonic-gate 			}
33717c478bd9Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
33727c478bd9Sstevel@tonic-gate 				if (noblock)
33737c478bd9Sstevel@tonic-gate 					no_trap();
33747c478bd9Sstevel@tonic-gate 				goto out;
33757c478bd9Sstevel@tonic-gate 			}
33767c478bd9Sstevel@tonic-gate 			if (noblock)
33777c478bd9Sstevel@tonic-gate 				no_trap();
33787c478bd9Sstevel@tonic-gate 			break;
33797c478bd9Sstevel@tonic-gate 
33807c478bd9Sstevel@tonic-gate 		default:
33817c478bd9Sstevel@tonic-gate 			error = EIO;
33827c478bd9Sstevel@tonic-gate 			goto out;
33837c478bd9Sstevel@tonic-gate 		}
33847c478bd9Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
33857c478bd9Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
33867c478bd9Sstevel@tonic-gate 	}
33877c478bd9Sstevel@tonic-gate out:
33887c478bd9Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
33897c478bd9Sstevel@tonic-gate 		/*
33907c478bd9Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
33917c478bd9Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
33927c478bd9Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
33937c478bd9Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
33947c478bd9Sstevel@tonic-gate 		 * again (after the fault has been handled).
33957c478bd9Sstevel@tonic-gate 		 */
33967c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33977c478bd9Sstevel@tonic-gate 		if (uiocnt >= resid)
33987c478bd9Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
33997c478bd9Sstevel@tonic-gate 	}
34007c478bd9Sstevel@tonic-gate 	return (error);
34017c478bd9Sstevel@tonic-gate }
34027c478bd9Sstevel@tonic-gate 
34037c478bd9Sstevel@tonic-gate /*
34047c478bd9Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
34057c478bd9Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
34067c478bd9Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
34077c478bd9Sstevel@tonic-gate  * queue.
34087c478bd9Sstevel@tonic-gate  */
34097c478bd9Sstevel@tonic-gate static boolean_t
34107c478bd9Sstevel@tonic-gate rwnext_enter(queue_t *qp)
34117c478bd9Sstevel@tonic-gate {
34127c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34137c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
34147c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
34157c478bd9Sstevel@tonic-gate 		return (B_FALSE);
34167c478bd9Sstevel@tonic-gate 	}
34177c478bd9Sstevel@tonic-gate 	qp->q_rwcnt++;
34187c478bd9Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
34197c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34207c478bd9Sstevel@tonic-gate 	return (B_TRUE);
34217c478bd9Sstevel@tonic-gate }
34227c478bd9Sstevel@tonic-gate 
34237c478bd9Sstevel@tonic-gate /*
34247c478bd9Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
34257c478bd9Sstevel@tonic-gate  * threads blocked in removeq().
34267c478bd9Sstevel@tonic-gate  */
34277c478bd9Sstevel@tonic-gate static void
34287c478bd9Sstevel@tonic-gate rwnext_exit(queue_t *qp)
34297c478bd9Sstevel@tonic-gate {
34307c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34317c478bd9Sstevel@tonic-gate 	qp->q_rwcnt--;
34327c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
34337c478bd9Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
34347c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
34357c478bd9Sstevel@tonic-gate 	}
34367c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34377c478bd9Sstevel@tonic-gate }
34387c478bd9Sstevel@tonic-gate 
34397c478bd9Sstevel@tonic-gate /*
34407c478bd9Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
34417c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
34427c478bd9Sstevel@tonic-gate  *
34437c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
34447c478bd9Sstevel@tonic-gate  *
34457c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
34467c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
34477c478bd9Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
34487c478bd9Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
34497c478bd9Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
34507c478bd9Sstevel@tonic-gate  *
34517c478bd9Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
34527c478bd9Sstevel@tonic-gate  */
34537c478bd9Sstevel@tonic-gate int
34547c478bd9Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
34557c478bd9Sstevel@tonic-gate {
34567c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
34577c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
34587c478bd9Sstevel@tonic-gate 	uint16_t	count;
34597c478bd9Sstevel@tonic-gate 	uint16_t	flags;
34607c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
34617c478bd9Sstevel@tonic-gate 	int		(*proc)();
34627c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
34637c478bd9Sstevel@tonic-gate 	int		isread;
34647c478bd9Sstevel@tonic-gate 	int		rval;
34657c478bd9Sstevel@tonic-gate 
34667c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
34677c478bd9Sstevel@tonic-gate 	/*
34687c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
34697c478bd9Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
34707c478bd9Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
34717c478bd9Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
34727c478bd9Sstevel@tonic-gate 	 *
34737c478bd9Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
34747c478bd9Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
34757c478bd9Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
34767c478bd9Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
34777c478bd9Sstevel@tonic-gate 	 */
34787c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
34797c478bd9Sstevel@tonic-gate 		isread = 0;
34807c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
34817c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
34827c478bd9Sstevel@tonic-gate 	} else {
34837c478bd9Sstevel@tonic-gate 		isread = 1;
34847c478bd9Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
34857c478bd9Sstevel@tonic-gate 			/* Not streamhead */
34867c478bd9Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
34877c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
34887c478bd9Sstevel@tonic-gate 	}
34897c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
34907c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
34917c478bd9Sstevel@tonic-gate 		/*
34927c478bd9Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
34937c478bd9Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
34947c478bd9Sstevel@tonic-gate 		 */
34957c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
34967c478bd9Sstevel@tonic-gate 		return (EINVAL);
34977c478bd9Sstevel@tonic-gate 	}
34987c478bd9Sstevel@tonic-gate 
34997c478bd9Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
35007c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
35017c478bd9Sstevel@tonic-gate 		return (EINVAL);
35027c478bd9Sstevel@tonic-gate 	}
35037c478bd9Sstevel@tonic-gate 
35047c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
35057c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35067c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
35077c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
35087c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
35097c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
35107c478bd9Sstevel@tonic-gate 
35117c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
35127c478bd9Sstevel@tonic-gate 		/*
35137c478bd9Sstevel@tonic-gate 		 * if this queue is being closed, return.
35147c478bd9Sstevel@tonic-gate 		 */
35157c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
35167c478bd9Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
35177c478bd9Sstevel@tonic-gate 			rwnext_exit(qp);
35187c478bd9Sstevel@tonic-gate 			return (EINVAL);
35197c478bd9Sstevel@tonic-gate 		}
35207c478bd9Sstevel@tonic-gate 
35217c478bd9Sstevel@tonic-gate 		/*
35227c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
35237c478bd9Sstevel@tonic-gate 		 */
35247c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
35257c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
35267c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
35277c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
35287c478bd9Sstevel@tonic-gate 	}
35297c478bd9Sstevel@tonic-gate 
35307c478bd9Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
35317c478bd9Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
35327c478bd9Sstevel@tonic-gate 		/*
35337c478bd9Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
35347c478bd9Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
35357c478bd9Sstevel@tonic-gate 		 */
35367c478bd9Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
35377c478bd9Sstevel@tonic-gate 		rwnext_exit(qp);
35387c478bd9Sstevel@tonic-gate 		return (EINVAL);
35397c478bd9Sstevel@tonic-gate 	}
35407c478bd9Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
35417c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
35427c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
35437c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
35447c478bd9Sstevel@tonic-gate 	/*
35457c478bd9Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
35467c478bd9Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
35477c478bd9Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
35487c478bd9Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
35497c478bd9Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
35507c478bd9Sstevel@tonic-gate 	 *	 the most efficent for all cases.
35517c478bd9Sstevel@tonic-gate 	 */
35527c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
35537c478bd9Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
35547c478bd9Sstevel@tonic-gate 		/*
35557c478bd9Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
35567c478bd9Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
35577c478bd9Sstevel@tonic-gate 		 */
35587c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
35597c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
35607c478bd9Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
35617c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
35627c478bd9Sstevel@tonic-gate 			rval = EWOULDBLOCK;
35637c478bd9Sstevel@tonic-gate 			goto out;
35647c478bd9Sstevel@tonic-gate 		}
35657c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
35667c478bd9Sstevel@tonic-gate 	}
35677c478bd9Sstevel@tonic-gate 
35687c478bd9Sstevel@tonic-gate 	if (! isread && dp->d_mp)
35697c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
35707c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
35717c478bd9Sstevel@tonic-gate 
35727c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
35737c478bd9Sstevel@tonic-gate 
35747c478bd9Sstevel@tonic-gate 	if (isread && dp->d_mp)
35757c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
35767c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
35777c478bd9Sstevel@tonic-gate out:
35787c478bd9Sstevel@tonic-gate 	/*
35797c478bd9Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
35807c478bd9Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
35817c478bd9Sstevel@tonic-gate 	 */
35827c478bd9Sstevel@tonic-gate 	rwnext_exit(qp);
35837c478bd9Sstevel@tonic-gate 
35847c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35857c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
35867c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
35877c478bd9Sstevel@tonic-gate 	sq->sq_count--;
35887c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
35897c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
35907c478bd9Sstevel@tonic-gate 		/*
35917c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
35927c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
35937c478bd9Sstevel@tonic-gate 		 */
35947c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
35957c478bd9Sstevel@tonic-gate 		return (rval);
35967c478bd9Sstevel@tonic-gate 	}
35977c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
35987c478bd9Sstevel@tonic-gate 	/*
35997c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
36007c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
36017c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
36027c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
36037c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
36047c478bd9Sstevel@tonic-gate 	 *
36057c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
36067c478bd9Sstevel@tonic-gate 	 *
36077c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
36087c478bd9Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
36097c478bd9Sstevel@tonic-gate 	 *
36107c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
36117c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
36127c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
36137c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
36147c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
36157c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
36167c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
36177c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
36187c478bd9Sstevel@tonic-gate 	 * test invalid.
36197c478bd9Sstevel@tonic-gate 	 */
36207c478bd9Sstevel@tonic-gate 
36217c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
36227c478bd9Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
36237c478bd9Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
36247c478bd9Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
36257c478bd9Sstevel@tonic-gate 	}
36267c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36277c478bd9Sstevel@tonic-gate 	return (rval);
36287c478bd9Sstevel@tonic-gate }
36297c478bd9Sstevel@tonic-gate 
36307c478bd9Sstevel@tonic-gate /*
36317c478bd9Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
36327c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
36337c478bd9Sstevel@tonic-gate  *
36347c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
36357c478bd9Sstevel@tonic-gate  *
36367c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
36377c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
36387c478bd9Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
36397c478bd9Sstevel@tonic-gate  * entered.
36407c478bd9Sstevel@tonic-gate  */
36417c478bd9Sstevel@tonic-gate int
36427c478bd9Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
36437c478bd9Sstevel@tonic-gate {
36447c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
36457c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
36467c478bd9Sstevel@tonic-gate 	uint16_t	count;
36477c478bd9Sstevel@tonic-gate 	uint16_t 	flags;
36487c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
36497c478bd9Sstevel@tonic-gate 	int		(*proc)();
36507c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
36517c478bd9Sstevel@tonic-gate 	int		rval;
36527c478bd9Sstevel@tonic-gate 
36537c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
36547c478bd9Sstevel@tonic-gate 	/*
36557c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
36567c478bd9Sstevel@tonic-gate 	 * acquiring SQLOCK.
36577c478bd9Sstevel@tonic-gate 	 */
36587c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
36597c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
36607c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
36617c478bd9Sstevel@tonic-gate 	} else {
36627c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
36637c478bd9Sstevel@tonic-gate 	}
36647c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
36657c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
36667c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
36677c478bd9Sstevel@tonic-gate 		return (EINVAL);
36687c478bd9Sstevel@tonic-gate 	}
36697c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
36707c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36717c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
36727c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
36737c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36747c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
36757c478bd9Sstevel@tonic-gate 
36767c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
36777c478bd9Sstevel@tonic-gate 		/*
36787c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
36797c478bd9Sstevel@tonic-gate 		 */
36807c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
36817c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
36827c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
36837c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
36847c478bd9Sstevel@tonic-gate 	}
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
36877c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
36887c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
36897c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
36907c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36917c478bd9Sstevel@tonic-gate 
36927c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
36937c478bd9Sstevel@tonic-gate 
36947c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36957c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36967c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
36977c478bd9Sstevel@tonic-gate 	sq->sq_count--;
36987c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
36997c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
37007c478bd9Sstevel@tonic-gate 		/*
37017c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
37027c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
37037c478bd9Sstevel@tonic-gate 		 */
37047c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
37057c478bd9Sstevel@tonic-gate 		return (rval);
37067c478bd9Sstevel@tonic-gate 	}
37077c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
37087c478bd9Sstevel@tonic-gate /*
37097c478bd9Sstevel@tonic-gate  * XXXX
37107c478bd9Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
37117c478bd9Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
37127c478bd9Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
37137c478bd9Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
37147c478bd9Sstevel@tonic-gate  */
37157c478bd9Sstevel@tonic-gate 	/*
37167c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
37177c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
37187c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
37197c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
37207c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
37217c478bd9Sstevel@tonic-gate 	 *
37227c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
37237c478bd9Sstevel@tonic-gate 	 *
37247c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
37257c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
37267c478bd9Sstevel@tonic-gate 	 *
37277c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
37287c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
37297c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
37307c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
37317c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
37327c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
37337c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
37347c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
37357c478bd9Sstevel@tonic-gate 	 * test invalid.
37367c478bd9Sstevel@tonic-gate 	 */
37377c478bd9Sstevel@tonic-gate 
37387c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
37397c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
37407c478bd9Sstevel@tonic-gate 	return (rval);
37417c478bd9Sstevel@tonic-gate }
37427c478bd9Sstevel@tonic-gate 
37437c478bd9Sstevel@tonic-gate /*
37447c478bd9Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
37457c478bd9Sstevel@tonic-gate  */
37467c478bd9Sstevel@tonic-gate int
37477c478bd9Sstevel@tonic-gate isuioq(queue_t *q)
37487c478bd9Sstevel@tonic-gate {
37497c478bd9Sstevel@tonic-gate 	if (q->q_flag & QREADR)
37507c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
37517c478bd9Sstevel@tonic-gate 	else
37527c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
37537c478bd9Sstevel@tonic-gate }
37547c478bd9Sstevel@tonic-gate 
37557c478bd9Sstevel@tonic-gate #if defined(__sparc)
37567c478bd9Sstevel@tonic-gate int disable_putlocks = 0;
37577c478bd9Sstevel@tonic-gate #else
37587c478bd9Sstevel@tonic-gate int disable_putlocks = 1;
37597c478bd9Sstevel@tonic-gate #endif
37607c478bd9Sstevel@tonic-gate 
37617c478bd9Sstevel@tonic-gate /*
37627c478bd9Sstevel@tonic-gate  * called by create_putlock.
37637c478bd9Sstevel@tonic-gate  */
37647c478bd9Sstevel@tonic-gate static void
37657c478bd9Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
37667c478bd9Sstevel@tonic-gate {
37677c478bd9Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
37687c478bd9Sstevel@tonic-gate 	ciputctrl_t *cip;
37697c478bd9Sstevel@tonic-gate 	int i;
37707c478bd9Sstevel@tonic-gate 
37717c478bd9Sstevel@tonic-gate 	ASSERT(sq != NULL);
37727c478bd9Sstevel@tonic-gate 
37737c478bd9Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
37747c478bd9Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
37757c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
37767c478bd9Sstevel@tonic-gate 
37777c478bd9Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
37787c478bd9Sstevel@tonic-gate 		return;
37797c478bd9Sstevel@tonic-gate 
37807c478bd9Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
37817c478bd9Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
37827c478bd9Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
37837c478bd9Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
37847c478bd9Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
37857c478bd9Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
37867c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
37877c478bd9Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
37887c478bd9Sstevel@tonic-gate 			} else {
37897c478bd9Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
37907c478bd9Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
37917c478bd9Sstevel@tonic-gate 				/*
37927c478bd9Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
37937c478bd9Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
37947c478bd9Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
37957c478bd9Sstevel@tonic-gate 				 * insures that.
37967c478bd9Sstevel@tonic-gate 				 */
37977c478bd9Sstevel@tonic-gate 				membar_producer();
37987c478bd9Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
37997c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
38007c478bd9Sstevel@tonic-gate 			}
38017c478bd9Sstevel@tonic-gate 		}
38027c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
38037c478bd9Sstevel@tonic-gate 		if (i == 1)
38047c478bd9Sstevel@tonic-gate 			break;
38057c478bd9Sstevel@tonic-gate 		q = _OTHERQ(q);
38067c478bd9Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
38077c478bd9Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
38087c478bd9Sstevel@tonic-gate 			break;
38097c478bd9Sstevel@tonic-gate 		}
38107c478bd9Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
38117c478bd9Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
38127c478bd9Sstevel@tonic-gate 		sq = q->q_syncq;
38137c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
38147c478bd9Sstevel@tonic-gate 	}
38157c478bd9Sstevel@tonic-gate }
38167c478bd9Sstevel@tonic-gate 
38177c478bd9Sstevel@tonic-gate /*
38187c478bd9Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
38197c478bd9Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
38207c478bd9Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
38217c478bd9Sstevel@tonic-gate  * starting from q and down to the driver.
38227c478bd9Sstevel@tonic-gate  *
38237c478bd9Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
38247c478bd9Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
38257c478bd9Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
38267c478bd9Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
38277c478bd9Sstevel@tonic-gate  * call.
38287c478bd9Sstevel@tonic-gate  *
38297c478bd9Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
38307c478bd9Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
38317c478bd9Sstevel@tonic-gate  * (e.g. IP).
38327c478bd9Sstevel@tonic-gate  *
38337c478bd9Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
38347c478bd9Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
38357c478bd9Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
38367c478bd9Sstevel@tonic-gate  * particularly MT hot.
38377c478bd9Sstevel@tonic-gate  *
38387c478bd9Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
38397c478bd9Sstevel@tonic-gate  * q_next can be safely used.
38407c478bd9Sstevel@tonic-gate  */
38417c478bd9Sstevel@tonic-gate 
38427c478bd9Sstevel@tonic-gate void
38437c478bd9Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
38447c478bd9Sstevel@tonic-gate {
38457c478bd9Sstevel@tonic-gate 	ciputctrl_t	*cip;
38467c478bd9Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
38477c478bd9Sstevel@tonic-gate 
38487c478bd9Sstevel@tonic-gate 	q = _WR(q);
38497c478bd9Sstevel@tonic-gate 	ASSERT(stp != NULL);
38507c478bd9Sstevel@tonic-gate 
38517c478bd9Sstevel@tonic-gate 	if (disable_putlocks != 0)
38527c478bd9Sstevel@tonic-gate 		return;
38537c478bd9Sstevel@tonic-gate 
38547c478bd9Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
38557c478bd9Sstevel@tonic-gate 		return;
38567c478bd9Sstevel@tonic-gate 
38577c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
38587c478bd9Sstevel@tonic-gate 
38597c478bd9Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
38607c478bd9Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
38617c478bd9Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
38627c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
38637c478bd9Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
38647c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38657c478bd9Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
38667c478bd9Sstevel@tonic-gate 		} else {
38677c478bd9Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
38687c478bd9Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
38697c478bd9Sstevel@tonic-gate 			/*
38707c478bd9Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
38717c478bd9Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
38727c478bd9Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
38737c478bd9Sstevel@tonic-gate 			 * insures that.
38747c478bd9Sstevel@tonic-gate 			 */
38757c478bd9Sstevel@tonic-gate 			membar_producer();
38767c478bd9Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
38777c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38787c478bd9Sstevel@tonic-gate 		}
38797c478bd9Sstevel@tonic-gate 	}
38807c478bd9Sstevel@tonic-gate 
38817c478bd9Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
38827c478bd9Sstevel@tonic-gate 
38837c478bd9Sstevel@tonic-gate 	while (_SAMESTR(q)) {
38847c478bd9Sstevel@tonic-gate 		create_syncq_putlocks(q);
38857c478bd9Sstevel@tonic-gate 		if (stream == 0)
38867c478bd9Sstevel@tonic-gate 			return;
38877c478bd9Sstevel@tonic-gate 		q = q->q_next;
38887c478bd9Sstevel@tonic-gate 	}
38897c478bd9Sstevel@tonic-gate 	ASSERT(q != NULL);
38907c478bd9Sstevel@tonic-gate 	create_syncq_putlocks(q);
38917c478bd9Sstevel@tonic-gate }
38927c478bd9Sstevel@tonic-gate 
38937c478bd9Sstevel@tonic-gate /*
38947c478bd9Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
38957c478bd9Sstevel@tonic-gate  * through a stream.
38967c478bd9Sstevel@tonic-gate  *
38977c478bd9Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
38987c478bd9Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
38997c478bd9Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
39007c478bd9Sstevel@tonic-gate  * by STREAMS modules and drivers.
39017c478bd9Sstevel@tonic-gate  *
39027c478bd9Sstevel@tonic-gate  * Global objects:
39037c478bd9Sstevel@tonic-gate  *
39047c478bd9Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
39057c478bd9Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
39067c478bd9Sstevel@tonic-gate  *
39077c478bd9Sstevel@tonic-gate  * Local objects:
39087c478bd9Sstevel@tonic-gate  *
39097c478bd9Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
39107c478bd9Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
39117c478bd9Sstevel@tonic-gate  */
39127c478bd9Sstevel@tonic-gate 
39137c478bd9Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
39147c478bd9Sstevel@tonic-gate 
39157c478bd9Sstevel@tonic-gate void
39167c478bd9Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
39177c478bd9Sstevel@tonic-gate {
39187c478bd9Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
39197c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
39207c478bd9Sstevel@tonic-gate 	ftevnt_t *ep;
39217c478bd9Sstevel@tonic-gate 	int ix, nix;
39227c478bd9Sstevel@tonic-gate 
39237c478bd9Sstevel@tonic-gate 	ASSERT(hp != NULL);
39247c478bd9Sstevel@tonic-gate 
39257c478bd9Sstevel@tonic-gate 	for (;;) {
39267c478bd9Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
39277c478bd9Sstevel@tonic-gate 			/*
39287c478bd9Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
39297c478bd9Sstevel@tonic-gate 			 *
39307c478bd9Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
39317c478bd9Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
39327c478bd9Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
39337c478bd9Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
39347c478bd9Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
39357c478bd9Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
39367c478bd9Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
39377c478bd9Sstevel@tonic-gate 			 * got here first, so free the block and start
39387c478bd9Sstevel@tonic-gate 			 * again.
39397c478bd9Sstevel@tonic-gate 			 */
39407c478bd9Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
39417c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP))) {
39427c478bd9Sstevel@tonic-gate 				/* no mem, so punt */
39437c478bd9Sstevel@tonic-gate 				str_ftnever++;
39447c478bd9Sstevel@tonic-gate 				/* free up all flow data? */
39457c478bd9Sstevel@tonic-gate 				return;
39467c478bd9Sstevel@tonic-gate 			}
39477c478bd9Sstevel@tonic-gate 			nbp->nxt = NULL;
39487c478bd9Sstevel@tonic-gate 			nbp->ix = 1;
39497c478bd9Sstevel@tonic-gate 			/*
39507c478bd9Sstevel@tonic-gate 			 * Just in case there is another thread about
39517c478bd9Sstevel@tonic-gate 			 * to get the next index, we need to make sure
39527c478bd9Sstevel@tonic-gate 			 * the value is there for it.
39537c478bd9Sstevel@tonic-gate 			 */
39547c478bd9Sstevel@tonic-gate 			membar_producer();
39557c478bd9Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
39567c478bd9Sstevel@tonic-gate 				/* CAS was successful */
39577c478bd9Sstevel@tonic-gate 				bp->nxt = nbp;
39587c478bd9Sstevel@tonic-gate 				membar_producer();
39597c478bd9Sstevel@tonic-gate 				bp = nbp;
39607c478bd9Sstevel@tonic-gate 				ix = 0;
39617c478bd9Sstevel@tonic-gate 				goto cas_good;
39627c478bd9Sstevel@tonic-gate 			} else {
39637c478bd9Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
39647c478bd9Sstevel@tonic-gate 				bp = hp->tail;
39657c478bd9Sstevel@tonic-gate 				continue;
39667c478bd9Sstevel@tonic-gate 			}
39677c478bd9Sstevel@tonic-gate 		}
39687c478bd9Sstevel@tonic-gate 		nix = ix + 1;
39697c478bd9Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
39707c478bd9Sstevel@tonic-gate 		cas_good:
39717c478bd9Sstevel@tonic-gate 			if (curthread != hp->thread) {
39727c478bd9Sstevel@tonic-gate 				hp->thread = curthread;
39737c478bd9Sstevel@tonic-gate 				evnt |= FTEV_CS;
39747c478bd9Sstevel@tonic-gate 			}
39757c478bd9Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
39767c478bd9Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
39777c478bd9Sstevel@tonic-gate 				evnt |= FTEV_PS;
39787c478bd9Sstevel@tonic-gate 			}
39797c478bd9Sstevel@tonic-gate 			ep = &bp->ev[ix];
39807c478bd9Sstevel@tonic-gate 			break;
39817c478bd9Sstevel@tonic-gate 		}
39827c478bd9Sstevel@tonic-gate 	}
39837c478bd9Sstevel@tonic-gate 
39847c478bd9Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
39857c478bd9Sstevel@tonic-gate 		queue_t *qp = p;
39867c478bd9Sstevel@tonic-gate 
39877c478bd9Sstevel@tonic-gate 		/*
39887c478bd9Sstevel@tonic-gate 		 * It is possible that the module info is broke
39897c478bd9Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
39907c478bd9Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
39917c478bd9Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
39927c478bd9Sstevel@tonic-gate 		 */
39937c478bd9Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
39947c478bd9Sstevel@tonic-gate 			ep->mid = "NONAME";
39957c478bd9Sstevel@tonic-gate 		else
39967c478bd9Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
39977c478bd9Sstevel@tonic-gate 
39987c478bd9Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
39997c478bd9Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
40007c478bd9Sstevel@tonic-gate 	} else {
40017c478bd9Sstevel@tonic-gate 		ep->mid = (char *)p;
40027c478bd9Sstevel@tonic-gate 	}
40037c478bd9Sstevel@tonic-gate 
40047c478bd9Sstevel@tonic-gate 	ep->ts = gethrtime();
40057c478bd9Sstevel@tonic-gate 	ep->evnt = evnt;
40067c478bd9Sstevel@tonic-gate 	ep->data = data;
40077c478bd9Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
40087c478bd9Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
40097c478bd9Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
40107c478bd9Sstevel@tonic-gate }
40117c478bd9Sstevel@tonic-gate 
40127c478bd9Sstevel@tonic-gate /*
40137c478bd9Sstevel@tonic-gate  * Free flow-trace data.
40147c478bd9Sstevel@tonic-gate  */
40157c478bd9Sstevel@tonic-gate void
40167c478bd9Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
40177c478bd9Sstevel@tonic-gate {
40187c478bd9Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
40197c478bd9Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
40207c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
40217c478bd9Sstevel@tonic-gate 
40227c478bd9Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
40237c478bd9Sstevel@tonic-gate 		/*
40247c478bd9Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
40257c478bd9Sstevel@tonic-gate 		 * any continuation blocks.
40267c478bd9Sstevel@tonic-gate 		 */
40277c478bd9Sstevel@tonic-gate 		bp = hp->first.nxt;
40287c478bd9Sstevel@tonic-gate 		hp->tail = &hp->first;
40297c478bd9Sstevel@tonic-gate 		hp->hash = 0;
40307c478bd9Sstevel@tonic-gate 		hp->first.nxt = NULL;
40317c478bd9Sstevel@tonic-gate 		hp->first.ix = 0;
40327c478bd9Sstevel@tonic-gate 		while (bp != NULL) {
40337c478bd9Sstevel@tonic-gate 			nbp = bp->nxt;
40347c478bd9Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
40357c478bd9Sstevel@tonic-gate 			bp = nbp;
40367c478bd9Sstevel@tonic-gate 		}
40377c478bd9Sstevel@tonic-gate 	}
40387c478bd9Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
40397c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
40407c478bd9Sstevel@tonic-gate }
4041