xref: /titanic_53/usr/src/uts/common/io/stream.c (revision 116094b26dbf4a03272e56a5d8b6378a087ad9d2)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
237c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
287c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/param.h>
357c478bd9Sstevel@tonic-gate #include <sys/thread.h>
367c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
377c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
387c478bd9Sstevel@tonic-gate #include <sys/stream.h>
397c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
407c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
417c478bd9Sstevel@tonic-gate #include <sys/conf.h>
427c478bd9Sstevel@tonic-gate #include <sys/debug.h>
437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
447c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
457c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
467c478bd9Sstevel@tonic-gate #include <sys/errno.h>
477c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
487c478bd9Sstevel@tonic-gate #include <sys/ftrace.h>
497c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
507c478bd9Sstevel@tonic-gate #include <sys/multidata.h>
517c478bd9Sstevel@tonic-gate #include <sys/multidata_impl.h>
527c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate #ifdef DEBUG
557c478bd9Sstevel@tonic-gate #include <sys/kmem_impl.h>
567c478bd9Sstevel@tonic-gate #endif
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate /*
597c478bd9Sstevel@tonic-gate  * This file contains all the STREAMS utility routines that may
607c478bd9Sstevel@tonic-gate  * be used by modules and drivers.
617c478bd9Sstevel@tonic-gate  */
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate /*
647c478bd9Sstevel@tonic-gate  * STREAMS message allocator: principles of operation
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * The streams message allocator consists of all the routines that
677c478bd9Sstevel@tonic-gate  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
687c478bd9Sstevel@tonic-gate  * dupb(), freeb() and freemsg().  What follows is a high-level view
697c478bd9Sstevel@tonic-gate  * of how the allocator works.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * Every streams message consists of one or more mblks, a dblk, and data.
727c478bd9Sstevel@tonic-gate  * All mblks for all types of messages come from a common mblk_cache.
737c478bd9Sstevel@tonic-gate  * The dblk and data come in several flavors, depending on how the
747c478bd9Sstevel@tonic-gate  * message is allocated:
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
777c478bd9Sstevel@tonic-gate  *     fixed-size dblk/data caches. For message sizes that are multiples of
787c478bd9Sstevel@tonic-gate  *     PAGESIZE, dblks are allocated separately from the buffer.
797c478bd9Sstevel@tonic-gate  *     The associated buffer is allocated by the constructor using kmem_alloc().
807c478bd9Sstevel@tonic-gate  *     For all other message sizes, dblk and its associated data is allocated
817c478bd9Sstevel@tonic-gate  *     as a single contiguous chunk of memory.
827c478bd9Sstevel@tonic-gate  *     Objects in these caches consist of a dblk plus its associated data.
837c478bd9Sstevel@tonic-gate  *     allocb() determines the nearest-size cache by table lookup:
847c478bd9Sstevel@tonic-gate  *     the dblk_cache[] array provides the mapping from size to dblk cache.
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
877c478bd9Sstevel@tonic-gate  *     kmem_alloc()'ing a buffer for the data and supplying that
887c478bd9Sstevel@tonic-gate  *     buffer to gesballoc(), described below.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * (3) The four flavors of [d]esballoc[a] are all implemented by a
917c478bd9Sstevel@tonic-gate  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
927c478bd9Sstevel@tonic-gate  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
937c478bd9Sstevel@tonic-gate  *     db_lim and db_frtnp to describe the caller-supplied buffer.
947c478bd9Sstevel@tonic-gate  *
957c478bd9Sstevel@tonic-gate  * While there are several routines to allocate messages, there is only
967c478bd9Sstevel@tonic-gate  * one routine to free messages: freeb().  freeb() simply invokes the
977c478bd9Sstevel@tonic-gate  * dblk's free method, dbp->db_free(), which is set at allocation time.
987c478bd9Sstevel@tonic-gate  *
997c478bd9Sstevel@tonic-gate  * dupb() creates a new reference to a message by allocating a new mblk,
1007c478bd9Sstevel@tonic-gate  * incrementing the dblk reference count and setting the dblk's free
1017c478bd9Sstevel@tonic-gate  * method to dblk_decref().  The dblk's original free method is retained
1027c478bd9Sstevel@tonic-gate  * in db_lastfree.  dblk_decref() decrements the reference count on each
1037c478bd9Sstevel@tonic-gate  * freeb().  If this is not the last reference it just frees the mblk;
1047c478bd9Sstevel@tonic-gate  * if this *is* the last reference, it restores db_free to db_lastfree,
1057c478bd9Sstevel@tonic-gate  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
1067c478bd9Sstevel@tonic-gate  *
1077c478bd9Sstevel@tonic-gate  * The implementation makes aggressive use of kmem object caching for
1087c478bd9Sstevel@tonic-gate  * maximum performance.  This makes the code simple and compact, but
1097c478bd9Sstevel@tonic-gate  * also a bit abstruse in some places.  The invariants that constitute a
1107c478bd9Sstevel@tonic-gate  * message's constructed state, described below, are more subtle than usual.
1117c478bd9Sstevel@tonic-gate  *
1127c478bd9Sstevel@tonic-gate  * Every dblk has an "attached mblk" as part of its constructed state.
1137c478bd9Sstevel@tonic-gate  * The mblk is allocated by the dblk's constructor and remains attached
1147c478bd9Sstevel@tonic-gate  * until the message is either dup'ed or pulled up.  In the dupb() case
1157c478bd9Sstevel@tonic-gate  * the mblk association doesn't matter until the last free, at which time
1167c478bd9Sstevel@tonic-gate  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
1177c478bd9Sstevel@tonic-gate  * the mblk association because it swaps the leading mblks of two messages,
1187c478bd9Sstevel@tonic-gate  * so it is responsible for swapping their db_mblk pointers accordingly.
1197c478bd9Sstevel@tonic-gate  * From a constructed-state viewpoint it doesn't matter that a dblk's
1207c478bd9Sstevel@tonic-gate  * attached mblk can change while the message is allocated; all that
1217c478bd9Sstevel@tonic-gate  * matters is that the dblk has *some* attached mblk when it's freed.
1227c478bd9Sstevel@tonic-gate  *
1237c478bd9Sstevel@tonic-gate  * The sizes of the allocb() small-message caches are not magical.
1247c478bd9Sstevel@tonic-gate  * They represent a good trade-off between internal and external
1257c478bd9Sstevel@tonic-gate  * fragmentation for current workloads.  They should be reevaluated
1267c478bd9Sstevel@tonic-gate  * periodically, especially if allocations larger than DBLK_MAX_CACHE
1277c478bd9Sstevel@tonic-gate  * become common.  We use 64-byte alignment so that dblks don't
1287c478bd9Sstevel@tonic-gate  * straddle cache lines unnecessarily.
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate #define	DBLK_MAX_CACHE		73728
1317c478bd9Sstevel@tonic-gate #define	DBLK_CACHE_ALIGN	64
1327c478bd9Sstevel@tonic-gate #define	DBLK_MIN_SIZE		8
1337c478bd9Sstevel@tonic-gate #define	DBLK_SIZE_SHIFT		3
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate #ifdef _BIG_ENDIAN
1367c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1377c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
1387c478bd9Sstevel@tonic-gate #else
1397c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_SHIFT(field)	\
1407c478bd9Sstevel@tonic-gate 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
1417c478bd9Sstevel@tonic-gate #endif
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate #define	DBLK_RTFU(ref, type, flags, uioflag)	\
1447c478bd9Sstevel@tonic-gate 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
1457c478bd9Sstevel@tonic-gate 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
1467c478bd9Sstevel@tonic-gate 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
1477c478bd9Sstevel@tonic-gate 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
1487c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
1497c478bd9Sstevel@tonic-gate #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
1507c478bd9Sstevel@tonic-gate #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate static size_t dblk_sizes[] = {
1537c478bd9Sstevel@tonic-gate #ifdef _LP64
1547c478bd9Sstevel@tonic-gate 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3920,
1557c478bd9Sstevel@tonic-gate 	8192, 12112, 16384, 20304, 24576, 28496, 32768, 36688,
1567c478bd9Sstevel@tonic-gate 	40960, 44880, 49152, 53072, 57344, 61264, 65536, 69456,
1577c478bd9Sstevel@tonic-gate #else
1587c478bd9Sstevel@tonic-gate 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3968,
1597c478bd9Sstevel@tonic-gate 	8192, 12160, 16384, 20352, 24576, 28544, 32768, 36736,
1607c478bd9Sstevel@tonic-gate 	40960, 44928, 49152, 53120, 57344, 61312, 65536, 69504,
1617c478bd9Sstevel@tonic-gate #endif
1627c478bd9Sstevel@tonic-gate 	DBLK_MAX_CACHE, 0
1637c478bd9Sstevel@tonic-gate };
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
1667c478bd9Sstevel@tonic-gate static struct kmem_cache *mblk_cache;
1677c478bd9Sstevel@tonic-gate static struct kmem_cache *dblk_esb_cache;
1687c478bd9Sstevel@tonic-gate static struct kmem_cache *fthdr_cache;
1697c478bd9Sstevel@tonic-gate static struct kmem_cache *ftblk_cache;
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1727c478bd9Sstevel@tonic-gate static mblk_t *allocb_oversize(size_t size, int flags);
1737c478bd9Sstevel@tonic-gate static int allocb_tryhard_fails;
1747c478bd9Sstevel@tonic-gate static void frnop_func(void *arg);
1757c478bd9Sstevel@tonic-gate frtn_t frnop = { frnop_func };
1767c478bd9Sstevel@tonic-gate static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate static boolean_t rwnext_enter(queue_t *qp);
1797c478bd9Sstevel@tonic-gate static void rwnext_exit(queue_t *qp);
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate /*
1827c478bd9Sstevel@tonic-gate  * Patchable mblk/dblk kmem_cache flags.
1837c478bd9Sstevel@tonic-gate  */
1847c478bd9Sstevel@tonic-gate int dblk_kmem_flags = 0;
1857c478bd9Sstevel@tonic-gate int mblk_kmem_flags = 0;
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate static int
1897c478bd9Sstevel@tonic-gate dblk_constructor(void *buf, void *cdrarg, int kmflags)
1907c478bd9Sstevel@tonic-gate {
1917c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
1927c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
1937c478bd9Sstevel@tonic-gate 	size_t index;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 	ASSERT(index <= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2027c478bd9Sstevel@tonic-gate 		return (-1);
2037c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2047c478bd9Sstevel@tonic-gate 		dbp->db_base = kmem_alloc(msg_size, kmflags);
2057c478bd9Sstevel@tonic-gate 		if (dbp->db_base == NULL) {
2067c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, dbp->db_mblk);
2077c478bd9Sstevel@tonic-gate 			return (-1);
2087c478bd9Sstevel@tonic-gate 		}
2097c478bd9Sstevel@tonic-gate 	} else {
2107c478bd9Sstevel@tonic-gate 		dbp->db_base = (unsigned char *)&dbp[1];
2117c478bd9Sstevel@tonic-gate 	}
2127c478bd9Sstevel@tonic-gate 
2137c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2147c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_cache[index];
2157c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + msg_size;
2167c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
2177c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2187c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2197c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2207c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2217c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2227c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2237c478bd9Sstevel@tonic-gate 	return (0);
2247c478bd9Sstevel@tonic-gate }
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2277c478bd9Sstevel@tonic-gate static int
2287c478bd9Sstevel@tonic-gate dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
2297c478bd9Sstevel@tonic-gate {
2307c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2337c478bd9Sstevel@tonic-gate 		return (-1);
2347c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2357c478bd9Sstevel@tonic-gate 	dbp->db_cache = dblk_esb_cache;
2367c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2377c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2387c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2397c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2407c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2417c478bd9Sstevel@tonic-gate 	return (0);
2427c478bd9Sstevel@tonic-gate }
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate static int
2457c478bd9Sstevel@tonic-gate bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
2467c478bd9Sstevel@tonic-gate {
2477c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2487c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
2517c478bd9Sstevel@tonic-gate 		return (-1);
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	if ((dbp->db_base = (unsigned char *)kmem_cache_alloc(bcp->buffer_cache,
2547c478bd9Sstevel@tonic-gate 	    kmflags)) == NULL) {
2557c478bd9Sstevel@tonic-gate 		kmem_cache_free(mblk_cache, dbp->db_mblk);
2567c478bd9Sstevel@tonic-gate 		return (-1);
2577c478bd9Sstevel@tonic-gate 	}
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	dbp->db_mblk->b_datap = dbp;
2607c478bd9Sstevel@tonic-gate 	dbp->db_cache = (void *)bcp;
2617c478bd9Sstevel@tonic-gate 	dbp->db_lim = dbp->db_base + bcp->size;
2627c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
2637c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = NULL;
2647c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
2657c478bd9Sstevel@tonic-gate 	dbp->db_credp = NULL;
2667c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
2677c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
2687c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
2697c478bd9Sstevel@tonic-gate 	return (0);
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2737c478bd9Sstevel@tonic-gate static void
2747c478bd9Sstevel@tonic-gate dblk_destructor(void *buf, void *cdrarg)
2757c478bd9Sstevel@tonic-gate {
2767c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2777c478bd9Sstevel@tonic-gate 	ssize_t msg_size = (ssize_t)cdrarg;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	ASSERT(msg_size != 0);
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
2847c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	if ((msg_size & PAGEOFFSET) == 0) {
2877c478bd9Sstevel@tonic-gate 		kmem_free(dbp->db_base, msg_size);
2887c478bd9Sstevel@tonic-gate 	}
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
2917c478bd9Sstevel@tonic-gate }
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate static void
2947c478bd9Sstevel@tonic-gate bcache_dblk_destructor(void *buf, void *cdrarg)
2957c478bd9Sstevel@tonic-gate {
2967c478bd9Sstevel@tonic-gate 	dblk_t *dbp = buf;
2977c478bd9Sstevel@tonic-gate 	bcache_t *bcp = (bcache_t *)cdrarg;
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk->b_datap == dbp);
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioflag == 0);
3047c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_struioun.cksum.flags == 0);
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	kmem_cache_free(mblk_cache, dbp->db_mblk);
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate void
3107c478bd9Sstevel@tonic-gate streams_msg_init(void)
3117c478bd9Sstevel@tonic-gate {
3127c478bd9Sstevel@tonic-gate 	char name[40];
3137c478bd9Sstevel@tonic-gate 	size_t size;
3147c478bd9Sstevel@tonic-gate 	size_t lastsize = DBLK_MIN_SIZE;
3157c478bd9Sstevel@tonic-gate 	size_t *sizep;
3167c478bd9Sstevel@tonic-gate 	struct kmem_cache *cp;
3177c478bd9Sstevel@tonic-gate 	size_t tot_size;
3187c478bd9Sstevel@tonic-gate 	int offset;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	mblk_cache = kmem_cache_create("streams_mblk",
3217c478bd9Sstevel@tonic-gate 		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
3227c478bd9Sstevel@tonic-gate 		mblk_kmem_flags);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 		if ((offset = (size & PAGEOFFSET)) != 0) {
3277c478bd9Sstevel@tonic-gate 			/*
3287c478bd9Sstevel@tonic-gate 			 * We are in the middle of a page, dblk should
3297c478bd9Sstevel@tonic-gate 			 * be allocated on the same page
3307c478bd9Sstevel@tonic-gate 			 */
3317c478bd9Sstevel@tonic-gate 			tot_size = size + sizeof (dblk_t);
3327c478bd9Sstevel@tonic-gate 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
3337c478bd9Sstevel@tonic-gate 								< PAGESIZE);
3347c478bd9Sstevel@tonic-gate 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 		} else {
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 			/*
3397c478bd9Sstevel@tonic-gate 			 * buf size is multiple of page size, dblk and
3407c478bd9Sstevel@tonic-gate 			 * buffer are allocated separately.
3417c478bd9Sstevel@tonic-gate 			 */
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
3447c478bd9Sstevel@tonic-gate 			tot_size = sizeof (dblk_t);
3457c478bd9Sstevel@tonic-gate 		}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 		(void) sprintf(name, "streams_dblk_%ld", size);
3487c478bd9Sstevel@tonic-gate 		cp = kmem_cache_create(name, tot_size,
3497c478bd9Sstevel@tonic-gate 			DBLK_CACHE_ALIGN, dblk_constructor,
3507c478bd9Sstevel@tonic-gate 			dblk_destructor, NULL,
3517c478bd9Sstevel@tonic-gate 			(void *)(size), NULL, dblk_kmem_flags);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 		while (lastsize <= size) {
3547c478bd9Sstevel@tonic-gate 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
3557c478bd9Sstevel@tonic-gate 			lastsize += DBLK_MIN_SIZE;
3567c478bd9Sstevel@tonic-gate 		}
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
3607c478bd9Sstevel@tonic-gate 			sizeof (dblk_t), DBLK_CACHE_ALIGN,
3617c478bd9Sstevel@tonic-gate 			dblk_esb_constructor, dblk_destructor, NULL,
3627c478bd9Sstevel@tonic-gate 			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
3637c478bd9Sstevel@tonic-gate 	fthdr_cache = kmem_cache_create("streams_fthdr",
3647c478bd9Sstevel@tonic-gate 		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3657c478bd9Sstevel@tonic-gate 	ftblk_cache = kmem_cache_create("streams_ftblk",
3667c478bd9Sstevel@tonic-gate 		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	/* Initialize Multidata caches */
3697c478bd9Sstevel@tonic-gate 	mmd_init();
3707c478bd9Sstevel@tonic-gate }
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3737c478bd9Sstevel@tonic-gate mblk_t *
3747c478bd9Sstevel@tonic-gate allocb(size_t size, uint_t pri)
3757c478bd9Sstevel@tonic-gate {
3767c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
3777c478bd9Sstevel@tonic-gate 	mblk_t *mp;
3787c478bd9Sstevel@tonic-gate 	size_t index;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
3837c478bd9Sstevel@tonic-gate 		if (size != 0) {
3847c478bd9Sstevel@tonic-gate 			mp = allocb_oversize(size, KM_NOSLEEP);
3857c478bd9Sstevel@tonic-gate 			goto out;
3867c478bd9Sstevel@tonic-gate 		}
3877c478bd9Sstevel@tonic-gate 		index = 0;
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
3917c478bd9Sstevel@tonic-gate 		mp = NULL;
3927c478bd9Sstevel@tonic-gate 		goto out;
3937c478bd9Sstevel@tonic-gate 	}
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
3967c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
3977c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
3987c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
3997c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
4007c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
4017c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
4027c478bd9Sstevel@tonic-gate out:
4037c478bd9Sstevel@tonic-gate 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 	return (mp);
4067c478bd9Sstevel@tonic-gate }
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate mblk_t *
4097c478bd9Sstevel@tonic-gate allocb_tmpl(size_t size, const mblk_t *tmpl)
4107c478bd9Sstevel@tonic-gate {
4117c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
4147c478bd9Sstevel@tonic-gate 		cred_t *cr = DB_CRED(tmpl);
4157c478bd9Sstevel@tonic-gate 		if (cr != NULL)
4167c478bd9Sstevel@tonic-gate 			crhold(mp->b_datap->db_credp = cr);
4177c478bd9Sstevel@tonic-gate 		DB_CPID(mp) = DB_CPID(tmpl);
4187c478bd9Sstevel@tonic-gate 		DB_TYPE(mp) = DB_TYPE(tmpl);
4197c478bd9Sstevel@tonic-gate 	}
4207c478bd9Sstevel@tonic-gate 	return (mp);
4217c478bd9Sstevel@tonic-gate }
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate mblk_t *
4247c478bd9Sstevel@tonic-gate allocb_cred(size_t size, cred_t *cr)
4257c478bd9Sstevel@tonic-gate {
4267c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb(size, 0);
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4297c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	return (mp);
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate mblk_t *
4357c478bd9Sstevel@tonic-gate allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr)
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	mblk_t *mp = allocb_wait(size, 0, flags, error);
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	if (mp != NULL && cr != NULL)
4407c478bd9Sstevel@tonic-gate 		crhold(mp->b_datap->db_credp = cr);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	return (mp);
4437c478bd9Sstevel@tonic-gate }
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate void
4467c478bd9Sstevel@tonic-gate freeb(mblk_t *mp)
4477c478bd9Sstevel@tonic-gate {
4487c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_ref > 0);
4517c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4527c478bd9Sstevel@tonic-gate 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate 	dbp->db_free(mp, dbp);
4577c478bd9Sstevel@tonic-gate }
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate void
4607c478bd9Sstevel@tonic-gate freemsg(mblk_t *mp)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
4637c478bd9Sstevel@tonic-gate 	while (mp) {
4647c478bd9Sstevel@tonic-gate 		dblk_t *dbp = mp->b_datap;
4657c478bd9Sstevel@tonic-gate 		mblk_t *mp_cont = mp->b_cont;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
4687c478bd9Sstevel@tonic-gate 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate 		dbp->db_free(mp, dbp);
4737c478bd9Sstevel@tonic-gate 		mp = mp_cont;
4747c478bd9Sstevel@tonic-gate 	}
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate /*
4787c478bd9Sstevel@tonic-gate  * Reallocate a block for another use.  Try hard to use the old block.
4797c478bd9Sstevel@tonic-gate  * If the old data is wanted (copy), leave b_wptr at the end of the data,
4807c478bd9Sstevel@tonic-gate  * otherwise return b_wptr = b_rptr.
4817c478bd9Sstevel@tonic-gate  *
4827c478bd9Sstevel@tonic-gate  * This routine is private and unstable.
4837c478bd9Sstevel@tonic-gate  */
4847c478bd9Sstevel@tonic-gate mblk_t	*
4857c478bd9Sstevel@tonic-gate reallocb(mblk_t *mp, size_t size, uint_t copy)
4867c478bd9Sstevel@tonic-gate {
4877c478bd9Sstevel@tonic-gate 	mblk_t		*mp1;
4887c478bd9Sstevel@tonic-gate 	unsigned char	*old_rptr;
4897c478bd9Sstevel@tonic-gate 	ptrdiff_t	cur_size;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (mp == NULL)
4927c478bd9Sstevel@tonic-gate 		return (allocb(size, BPRI_HI));
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	cur_size = mp->b_wptr - mp->b_rptr;
4957c478bd9Sstevel@tonic-gate 	old_rptr = mp->b_rptr;
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref != 0);
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
5007c478bd9Sstevel@tonic-gate 		/*
5017c478bd9Sstevel@tonic-gate 		 * If the data is wanted and it will fit where it is, no
5027c478bd9Sstevel@tonic-gate 		 * work is required.
5037c478bd9Sstevel@tonic-gate 		 */
5047c478bd9Sstevel@tonic-gate 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
5057c478bd9Sstevel@tonic-gate 			return (mp);
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
5087c478bd9Sstevel@tonic-gate 		mp1 = mp;
5097c478bd9Sstevel@tonic-gate 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
5107c478bd9Sstevel@tonic-gate 		/* XXX other mp state could be copied too, db_flags ... ? */
5117c478bd9Sstevel@tonic-gate 		mp1->b_cont = mp->b_cont;
5127c478bd9Sstevel@tonic-gate 	} else {
5137c478bd9Sstevel@tonic-gate 		return (NULL);
5147c478bd9Sstevel@tonic-gate 	}
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate 	if (copy) {
5177c478bd9Sstevel@tonic-gate 		bcopy(old_rptr, mp1->b_rptr, cur_size);
5187c478bd9Sstevel@tonic-gate 		mp1->b_wptr = mp1->b_rptr + cur_size;
5197c478bd9Sstevel@tonic-gate 	}
5207c478bd9Sstevel@tonic-gate 
5217c478bd9Sstevel@tonic-gate 	if (mp != mp1)
5227c478bd9Sstevel@tonic-gate 		freeb(mp);
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	return (mp1);
5257c478bd9Sstevel@tonic-gate }
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate static void
5287c478bd9Sstevel@tonic-gate dblk_lastfree(mblk_t *mp, dblk_t *dbp)
5297c478bd9Sstevel@tonic-gate {
5307c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
5317c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
5327c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
5357c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
5367c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
5377c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
5387c478bd9Sstevel@tonic-gate 	}
5397c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 	/* Reset the struioflag and the checksum flag fields */
5427c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
5437c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
5467c478bd9Sstevel@tonic-gate }
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate static void
5497c478bd9Sstevel@tonic-gate dblk_decref(mblk_t *mp, dblk_t *dbp)
5507c478bd9Sstevel@tonic-gate {
5517c478bd9Sstevel@tonic-gate 	if (dbp->db_ref != 1) {
5527c478bd9Sstevel@tonic-gate 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
5537c478bd9Sstevel@tonic-gate 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
5547c478bd9Sstevel@tonic-gate 		/*
5557c478bd9Sstevel@tonic-gate 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
5567c478bd9Sstevel@tonic-gate 		 * have a reference to the dblk, which means another thread
5577c478bd9Sstevel@tonic-gate 		 * could free it.  Therefore we cannot examine the dblk to
5587c478bd9Sstevel@tonic-gate 		 * determine whether ours was the last reference.  Instead,
5597c478bd9Sstevel@tonic-gate 		 * we extract the new and minimum reference counts from rtfu.
5607c478bd9Sstevel@tonic-gate 		 * Note that all we're really saying is "if (ref != refmin)".
5617c478bd9Sstevel@tonic-gate 		 */
5627c478bd9Sstevel@tonic-gate 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
5637c478bd9Sstevel@tonic-gate 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
5647c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, mp);
5657c478bd9Sstevel@tonic-gate 			return;
5667c478bd9Sstevel@tonic-gate 		}
5677c478bd9Sstevel@tonic-gate 	}
5687c478bd9Sstevel@tonic-gate 	dbp->db_mblk = mp;
5697c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree;
5707c478bd9Sstevel@tonic-gate 	dbp->db_lastfree(mp, dbp);
5717c478bd9Sstevel@tonic-gate }
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate mblk_t *
5747c478bd9Sstevel@tonic-gate dupb(mblk_t *mp)
5757c478bd9Sstevel@tonic-gate {
5767c478bd9Sstevel@tonic-gate 	dblk_t *dbp = mp->b_datap;
5777c478bd9Sstevel@tonic-gate 	mblk_t *new_mp;
5787c478bd9Sstevel@tonic-gate 	uint32_t oldrtfu, newrtfu;
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
5817c478bd9Sstevel@tonic-gate 		goto out;
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
5847c478bd9Sstevel@tonic-gate 	new_mp->b_rptr = mp->b_rptr;
5857c478bd9Sstevel@tonic-gate 	new_mp->b_wptr = mp->b_wptr;
5867c478bd9Sstevel@tonic-gate 	new_mp->b_datap = dbp;
5877c478bd9Sstevel@tonic-gate 	new_mp->b_queue = NULL;
5887c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	/*
5937c478bd9Sstevel@tonic-gate 	 * First-dup optimization.  The enabling assumption is that there
5947c478bd9Sstevel@tonic-gate 	 * can can never be a race (in correct code) to dup the first copy
5957c478bd9Sstevel@tonic-gate 	 * of a message.  Therefore we don't need to do it atomically.
5967c478bd9Sstevel@tonic-gate 	 */
5977c478bd9Sstevel@tonic-gate 	if (dbp->db_free != dblk_decref) {
5987c478bd9Sstevel@tonic-gate 		dbp->db_free = dblk_decref;
5997c478bd9Sstevel@tonic-gate 		dbp->db_ref++;
6007c478bd9Sstevel@tonic-gate 		goto out;
6017c478bd9Sstevel@tonic-gate 	}
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	do {
6047c478bd9Sstevel@tonic-gate 		ASSERT(dbp->db_ref > 0);
6057c478bd9Sstevel@tonic-gate 		oldrtfu = DBLK_RTFU_WORD(dbp);
6067c478bd9Sstevel@tonic-gate 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
6077c478bd9Sstevel@tonic-gate 		/*
6087c478bd9Sstevel@tonic-gate 		 * If db_ref is maxed out we can't dup this message anymore.
6097c478bd9Sstevel@tonic-gate 		 */
6107c478bd9Sstevel@tonic-gate 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
6117c478bd9Sstevel@tonic-gate 			kmem_cache_free(mblk_cache, new_mp);
6127c478bd9Sstevel@tonic-gate 			new_mp = NULL;
6137c478bd9Sstevel@tonic-gate 			goto out;
6147c478bd9Sstevel@tonic-gate 		}
6157c478bd9Sstevel@tonic-gate 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
6167c478bd9Sstevel@tonic-gate 
6177c478bd9Sstevel@tonic-gate out:
6187c478bd9Sstevel@tonic-gate 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
6197c478bd9Sstevel@tonic-gate 	return (new_mp);
6207c478bd9Sstevel@tonic-gate }
6217c478bd9Sstevel@tonic-gate 
6227c478bd9Sstevel@tonic-gate static void
6237c478bd9Sstevel@tonic-gate dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
6247c478bd9Sstevel@tonic-gate {
6257c478bd9Sstevel@tonic-gate 	frtn_t *frp = dbp->db_frtnp;
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
6287c478bd9Sstevel@tonic-gate 	frp->free_func(frp->free_arg);
6297c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
6307c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
6337c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
6347c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
6357c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
6367c478bd9Sstevel@tonic-gate 	}
6377c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
6387c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
6397c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
6427c478bd9Sstevel@tonic-gate }
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6457c478bd9Sstevel@tonic-gate static void
6467c478bd9Sstevel@tonic-gate frnop_func(void *arg)
6477c478bd9Sstevel@tonic-gate {
6487c478bd9Sstevel@tonic-gate }
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate /*
6517c478bd9Sstevel@tonic-gate  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
6527c478bd9Sstevel@tonic-gate  */
6537c478bd9Sstevel@tonic-gate static mblk_t *
6547c478bd9Sstevel@tonic-gate gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
6557c478bd9Sstevel@tonic-gate 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
6567c478bd9Sstevel@tonic-gate {
6577c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
6587c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	ASSERT(base != NULL && frp != NULL);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
6637c478bd9Sstevel@tonic-gate 		mp = NULL;
6647c478bd9Sstevel@tonic-gate 		goto out;
6657c478bd9Sstevel@tonic-gate 	}
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
6687c478bd9Sstevel@tonic-gate 	dbp->db_base = base;
6697c478bd9Sstevel@tonic-gate 	dbp->db_lim = base + size;
6707c478bd9Sstevel@tonic-gate 	dbp->db_free = dbp->db_lastfree = lastfree;
6717c478bd9Sstevel@tonic-gate 	dbp->db_frtnp = frp;
6727c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = db_rtfu;
6737c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
6747c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = base;
6757c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
6767c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
6777c478bd9Sstevel@tonic-gate 
6787c478bd9Sstevel@tonic-gate out:
6797c478bd9Sstevel@tonic-gate 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
6807c478bd9Sstevel@tonic-gate 	return (mp);
6817c478bd9Sstevel@tonic-gate }
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6847c478bd9Sstevel@tonic-gate mblk_t *
6857c478bd9Sstevel@tonic-gate esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
6867c478bd9Sstevel@tonic-gate {
6877c478bd9Sstevel@tonic-gate 	mblk_t *mp;
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate 	/*
6907c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
6917c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
6927c478bd9Sstevel@tonic-gate 	 * call optimization.
6937c478bd9Sstevel@tonic-gate 	 */
6947c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
6957c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
6967c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate 		if (mp != NULL)
6997c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7007c478bd9Sstevel@tonic-gate 		return (mp);
7017c478bd9Sstevel@tonic-gate 	}
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7047c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7057c478bd9Sstevel@tonic-gate }
7067c478bd9Sstevel@tonic-gate 
7077c478bd9Sstevel@tonic-gate /*
7087c478bd9Sstevel@tonic-gate  * Same as esballoc() but sleeps waiting for memory.
7097c478bd9Sstevel@tonic-gate  */
7107c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7117c478bd9Sstevel@tonic-gate mblk_t *
7127c478bd9Sstevel@tonic-gate esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7137c478bd9Sstevel@tonic-gate {
7147c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	/*
7177c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7187c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7197c478bd9Sstevel@tonic-gate 	 * call optimization.
7207c478bd9Sstevel@tonic-gate 	 */
7217c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7227c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7237c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_SLEEP);
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
7267c478bd9Sstevel@tonic-gate 		return (mp);
7277c478bd9Sstevel@tonic-gate 	}
7287c478bd9Sstevel@tonic-gate 
7297c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7307c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_SLEEP));
7317c478bd9Sstevel@tonic-gate }
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7347c478bd9Sstevel@tonic-gate mblk_t *
7357c478bd9Sstevel@tonic-gate desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7367c478bd9Sstevel@tonic-gate {
7377c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7387c478bd9Sstevel@tonic-gate 
7397c478bd9Sstevel@tonic-gate 	/*
7407c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7417c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7427c478bd9Sstevel@tonic-gate 	 * call optimization.
7437c478bd9Sstevel@tonic-gate 	 */
7447c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7457c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7467c478bd9Sstevel@tonic-gate 			frp, dblk_lastfree_desb, KM_NOSLEEP);
7477c478bd9Sstevel@tonic-gate 
7487c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7497c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
7507c478bd9Sstevel@tonic-gate 		return (mp);
7517c478bd9Sstevel@tonic-gate 	}
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
7547c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
7557c478bd9Sstevel@tonic-gate }
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7587c478bd9Sstevel@tonic-gate mblk_t *
7597c478bd9Sstevel@tonic-gate esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7607c478bd9Sstevel@tonic-gate {
7617c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	/*
7647c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7657c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7667c478bd9Sstevel@tonic-gate 	 * call optimization.
7677c478bd9Sstevel@tonic-gate 	 */
7687c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7697c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7707c478bd9Sstevel@tonic-gate 		    frp, freebs_enqueue, KM_NOSLEEP);
7717c478bd9Sstevel@tonic-gate 
7727c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7737c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
7747c478bd9Sstevel@tonic-gate 		return (mp);
7757c478bd9Sstevel@tonic-gate 	}
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7787c478bd9Sstevel@tonic-gate 	    frp, freebs_enqueue, KM_NOSLEEP));
7797c478bd9Sstevel@tonic-gate }
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7827c478bd9Sstevel@tonic-gate mblk_t *
7837c478bd9Sstevel@tonic-gate desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
7847c478bd9Sstevel@tonic-gate {
7857c478bd9Sstevel@tonic-gate 	mblk_t *mp;
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate 	/*
7887c478bd9Sstevel@tonic-gate 	 * Note that this is structured to allow the common case (i.e.
7897c478bd9Sstevel@tonic-gate 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
7907c478bd9Sstevel@tonic-gate 	 * call optimization.
7917c478bd9Sstevel@tonic-gate 	 */
7927c478bd9Sstevel@tonic-gate 	if (!str_ftnever) {
7937c478bd9Sstevel@tonic-gate 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
7947c478bd9Sstevel@tonic-gate 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate 		if (mp != NULL)
7977c478bd9Sstevel@tonic-gate 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
7987c478bd9Sstevel@tonic-gate 		return (mp);
7997c478bd9Sstevel@tonic-gate 	}
8007c478bd9Sstevel@tonic-gate 
8017c478bd9Sstevel@tonic-gate 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
8027c478bd9Sstevel@tonic-gate 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
8037c478bd9Sstevel@tonic-gate }
8047c478bd9Sstevel@tonic-gate 
8057c478bd9Sstevel@tonic-gate static void
8067c478bd9Sstevel@tonic-gate bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
8077c478bd9Sstevel@tonic-gate {
8087c478bd9Sstevel@tonic-gate 	bcache_t *bcp = dbp->db_cache;
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
8117c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
8127c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
8157c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
8167c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
8177c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
8187c478bd9Sstevel@tonic-gate 	}
8197c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
8207c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
8217c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8247c478bd9Sstevel@tonic-gate 	kmem_cache_free(bcp->dblk_cache, dbp);
8257c478bd9Sstevel@tonic-gate 	bcp->alloc--;
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0 && bcp->destroy != 0) {
8287c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8297c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8307c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8317c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8327c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8337c478bd9Sstevel@tonic-gate 	} else {
8347c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8357c478bd9Sstevel@tonic-gate 	}
8367c478bd9Sstevel@tonic-gate }
8377c478bd9Sstevel@tonic-gate 
8387c478bd9Sstevel@tonic-gate bcache_t *
8397c478bd9Sstevel@tonic-gate bcache_create(char *name, size_t size, uint_t align)
8407c478bd9Sstevel@tonic-gate {
8417c478bd9Sstevel@tonic-gate 	bcache_t *bcp;
8427c478bd9Sstevel@tonic-gate 	char buffer[255];
8437c478bd9Sstevel@tonic-gate 
8447c478bd9Sstevel@tonic-gate 	ASSERT((align & (align - 1)) == 0);
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 	if ((bcp = (bcache_t *)kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) ==
8477c478bd9Sstevel@tonic-gate 	    NULL) {
8487c478bd9Sstevel@tonic-gate 		return (NULL);
8497c478bd9Sstevel@tonic-gate 	}
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 	bcp->size = size;
8527c478bd9Sstevel@tonic-gate 	bcp->align = align;
8537c478bd9Sstevel@tonic-gate 	bcp->alloc = 0;
8547c478bd9Sstevel@tonic-gate 	bcp->destroy = 0;
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_buffer_cache", name);
8597c478bd9Sstevel@tonic-gate 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
8607c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
8617c478bd9Sstevel@tonic-gate 	(void) sprintf(buffer, "%s_dblk_cache", name);
8627c478bd9Sstevel@tonic-gate 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
8637c478bd9Sstevel@tonic-gate 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
8647c478bd9Sstevel@tonic-gate 						NULL, (void *)bcp, NULL, 0);
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate 	return (bcp);
8677c478bd9Sstevel@tonic-gate }
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate void
8707c478bd9Sstevel@tonic-gate bcache_destroy(bcache_t *bcp)
8717c478bd9Sstevel@tonic-gate {
8727c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8757c478bd9Sstevel@tonic-gate 	if (bcp->alloc == 0) {
8767c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->dblk_cache);
8777c478bd9Sstevel@tonic-gate 		kmem_cache_destroy(bcp->buffer_cache);
8787c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8797c478bd9Sstevel@tonic-gate 		mutex_destroy(&bcp->mutex);
8807c478bd9Sstevel@tonic-gate 		kmem_free(bcp, sizeof (bcache_t));
8817c478bd9Sstevel@tonic-gate 	} else {
8827c478bd9Sstevel@tonic-gate 		bcp->destroy++;
8837c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8847c478bd9Sstevel@tonic-gate 	}
8857c478bd9Sstevel@tonic-gate }
8867c478bd9Sstevel@tonic-gate 
8877c478bd9Sstevel@tonic-gate /*ARGSUSED*/
8887c478bd9Sstevel@tonic-gate mblk_t *
8897c478bd9Sstevel@tonic-gate bcache_allocb(bcache_t *bcp, uint_t pri)
8907c478bd9Sstevel@tonic-gate {
8917c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
8927c478bd9Sstevel@tonic-gate 	mblk_t *mp = NULL;
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 	ASSERT(bcp != NULL);
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate 	mutex_enter(&bcp->mutex);
8977c478bd9Sstevel@tonic-gate 	if (bcp->destroy != 0) {
8987c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
8997c478bd9Sstevel@tonic-gate 		goto out;
9007c478bd9Sstevel@tonic-gate 	}
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
9037c478bd9Sstevel@tonic-gate 		mutex_exit(&bcp->mutex);
9047c478bd9Sstevel@tonic-gate 		goto out;
9057c478bd9Sstevel@tonic-gate 	}
9067c478bd9Sstevel@tonic-gate 	bcp->alloc++;
9077c478bd9Sstevel@tonic-gate 	mutex_exit(&bcp->mutex);
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
9107c478bd9Sstevel@tonic-gate 
9117c478bd9Sstevel@tonic-gate 	mp = dbp->db_mblk;
9127c478bd9Sstevel@tonic-gate 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
9137c478bd9Sstevel@tonic-gate 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
9147c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
9157c478bd9Sstevel@tonic-gate 	mp->b_queue = NULL;
9167c478bd9Sstevel@tonic-gate 	MBLK_BAND_FLAG_WORD(mp) = 0;
9177c478bd9Sstevel@tonic-gate 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
9187c478bd9Sstevel@tonic-gate out:
9197c478bd9Sstevel@tonic-gate 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	return (mp);
9227c478bd9Sstevel@tonic-gate }
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate static void
9257c478bd9Sstevel@tonic-gate dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
9267c478bd9Sstevel@tonic-gate {
9277c478bd9Sstevel@tonic-gate 	ASSERT(dbp->db_mblk == mp);
9287c478bd9Sstevel@tonic-gate 	if (dbp->db_fthdr != NULL)
9297c478bd9Sstevel@tonic-gate 		str_ftfree(dbp);
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	/* set credp and projid to be 'unspecified' before returning to cache */
9327c478bd9Sstevel@tonic-gate 	if (dbp->db_credp != NULL) {
9337c478bd9Sstevel@tonic-gate 		crfree(dbp->db_credp);
9347c478bd9Sstevel@tonic-gate 		dbp->db_credp = NULL;
9357c478bd9Sstevel@tonic-gate 	}
9367c478bd9Sstevel@tonic-gate 	dbp->db_cpid = -1;
9377c478bd9Sstevel@tonic-gate 	dbp->db_struioflag = 0;
9387c478bd9Sstevel@tonic-gate 	dbp->db_struioun.cksum.flags = 0;
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
9417c478bd9Sstevel@tonic-gate 	kmem_cache_free(dbp->db_cache, dbp);
9427c478bd9Sstevel@tonic-gate }
9437c478bd9Sstevel@tonic-gate 
9447c478bd9Sstevel@tonic-gate static mblk_t *
9457c478bd9Sstevel@tonic-gate allocb_oversize(size_t size, int kmflags)
9467c478bd9Sstevel@tonic-gate {
9477c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9487c478bd9Sstevel@tonic-gate 	void *buf;
9497c478bd9Sstevel@tonic-gate 
9507c478bd9Sstevel@tonic-gate 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
9517c478bd9Sstevel@tonic-gate 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
9527c478bd9Sstevel@tonic-gate 		return (NULL);
9537c478bd9Sstevel@tonic-gate 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
9547c478bd9Sstevel@tonic-gate 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
9557c478bd9Sstevel@tonic-gate 		kmem_free(buf, size);
9567c478bd9Sstevel@tonic-gate 
9577c478bd9Sstevel@tonic-gate 	if (mp != NULL)
9587c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	return (mp);
9617c478bd9Sstevel@tonic-gate }
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate mblk_t *
9647c478bd9Sstevel@tonic-gate allocb_tryhard(size_t target_size)
9657c478bd9Sstevel@tonic-gate {
9667c478bd9Sstevel@tonic-gate 	size_t size;
9677c478bd9Sstevel@tonic-gate 	mblk_t *bp;
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	for (size = target_size; size < target_size + 512;
9707c478bd9Sstevel@tonic-gate 	    size += DBLK_CACHE_ALIGN)
9717c478bd9Sstevel@tonic-gate 		if ((bp = allocb(size, BPRI_HI)) != NULL)
9727c478bd9Sstevel@tonic-gate 			return (bp);
9737c478bd9Sstevel@tonic-gate 	allocb_tryhard_fails++;
9747c478bd9Sstevel@tonic-gate 	return (NULL);
9757c478bd9Sstevel@tonic-gate }
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate /*
9787c478bd9Sstevel@tonic-gate  * This routine is consolidation private for STREAMS internal use
9797c478bd9Sstevel@tonic-gate  * This routine may only be called from sync routines (i.e., not
9807c478bd9Sstevel@tonic-gate  * from put or service procedures).  It is located here (rather
9817c478bd9Sstevel@tonic-gate  * than strsubr.c) so that we don't have to expose all of the
9827c478bd9Sstevel@tonic-gate  * allocb() implementation details in header files.
9837c478bd9Sstevel@tonic-gate  */
9847c478bd9Sstevel@tonic-gate mblk_t *
9857c478bd9Sstevel@tonic-gate allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
9867c478bd9Sstevel@tonic-gate {
9877c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
9887c478bd9Sstevel@tonic-gate 	mblk_t *mp;
9897c478bd9Sstevel@tonic-gate 	size_t index;
9907c478bd9Sstevel@tonic-gate 
9917c478bd9Sstevel@tonic-gate 	index = (size -1) >> DBLK_SIZE_SHIFT;
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate 	if (flags & STR_NOSIG) {
9947c478bd9Sstevel@tonic-gate 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
9957c478bd9Sstevel@tonic-gate 			if (size != 0) {
9967c478bd9Sstevel@tonic-gate 				mp = allocb_oversize(size, KM_SLEEP);
9977c478bd9Sstevel@tonic-gate 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
9987c478bd9Sstevel@tonic-gate 				    (uintptr_t)mp);
9997c478bd9Sstevel@tonic-gate 				return (mp);
10007c478bd9Sstevel@tonic-gate 			}
10017c478bd9Sstevel@tonic-gate 			index = 0;
10027c478bd9Sstevel@tonic-gate 		}
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
10057c478bd9Sstevel@tonic-gate 		mp = dbp->db_mblk;
10067c478bd9Sstevel@tonic-gate 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
10077c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
10087c478bd9Sstevel@tonic-gate 		mp->b_rptr = mp->b_wptr = dbp->db_base;
10097c478bd9Sstevel@tonic-gate 		mp->b_queue = NULL;
10107c478bd9Sstevel@tonic-gate 		MBLK_BAND_FLAG_WORD(mp) = 0;
10117c478bd9Sstevel@tonic-gate 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	} else {
10167c478bd9Sstevel@tonic-gate 		while ((mp = allocb(size, pri)) == NULL) {
10177c478bd9Sstevel@tonic-gate 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
10187c478bd9Sstevel@tonic-gate 				return (NULL);
10197c478bd9Sstevel@tonic-gate 		}
10207c478bd9Sstevel@tonic-gate 	}
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 	return (mp);
10237c478bd9Sstevel@tonic-gate }
10247c478bd9Sstevel@tonic-gate 
10257c478bd9Sstevel@tonic-gate /*
10267c478bd9Sstevel@tonic-gate  * Call function 'func' with 'arg' when a class zero block can
10277c478bd9Sstevel@tonic-gate  * be allocated with priority 'pri'.
10287c478bd9Sstevel@tonic-gate  */
10297c478bd9Sstevel@tonic-gate bufcall_id_t
10307c478bd9Sstevel@tonic-gate esbbcall(uint_t pri, void (*func)(void *), void *arg)
10317c478bd9Sstevel@tonic-gate {
10327c478bd9Sstevel@tonic-gate 	return (bufcall(1, pri, func, arg));
10337c478bd9Sstevel@tonic-gate }
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate /*
10367c478bd9Sstevel@tonic-gate  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
10377c478bd9Sstevel@tonic-gate  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
10387c478bd9Sstevel@tonic-gate  * This provides consistency for all internal allocators of ioctl.
10397c478bd9Sstevel@tonic-gate  */
10407c478bd9Sstevel@tonic-gate mblk_t *
10417c478bd9Sstevel@tonic-gate mkiocb(uint_t cmd)
10427c478bd9Sstevel@tonic-gate {
10437c478bd9Sstevel@tonic-gate 	struct iocblk	*ioc;
10447c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	/*
10477c478bd9Sstevel@tonic-gate 	 * Allocate enough space for any of the ioctl related messages.
10487c478bd9Sstevel@tonic-gate 	 */
10497c478bd9Sstevel@tonic-gate 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
10507c478bd9Sstevel@tonic-gate 		return (NULL);
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	bzero(mp->b_rptr, sizeof (union ioctypes));
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate 	/*
10557c478bd9Sstevel@tonic-gate 	 * Set the mblk_t information and ptrs correctly.
10567c478bd9Sstevel@tonic-gate 	 */
10577c478bd9Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct iocblk);
10587c478bd9Sstevel@tonic-gate 	mp->b_datap->db_type = M_IOCTL;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	/*
10617c478bd9Sstevel@tonic-gate 	 * Fill in the fields.
10627c478bd9Sstevel@tonic-gate 	 */
10637c478bd9Sstevel@tonic-gate 	ioc		= (struct iocblk *)mp->b_rptr;
10647c478bd9Sstevel@tonic-gate 	ioc->ioc_cmd	= cmd;
10657c478bd9Sstevel@tonic-gate 	ioc->ioc_cr	= kcred;
10667c478bd9Sstevel@tonic-gate 	ioc->ioc_id	= getiocseqno();
10677c478bd9Sstevel@tonic-gate 	ioc->ioc_flag	= IOC_NATIVE;
10687c478bd9Sstevel@tonic-gate 	return (mp);
10697c478bd9Sstevel@tonic-gate }
10707c478bd9Sstevel@tonic-gate 
10717c478bd9Sstevel@tonic-gate /*
10727c478bd9Sstevel@tonic-gate  * test if block of given size can be allocated with a request of
10737c478bd9Sstevel@tonic-gate  * the given priority.
10747c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10757c478bd9Sstevel@tonic-gate  */
10767c478bd9Sstevel@tonic-gate /* ARGSUSED */
10777c478bd9Sstevel@tonic-gate int
10787c478bd9Sstevel@tonic-gate testb(size_t size, uint_t pri)
10797c478bd9Sstevel@tonic-gate {
10807c478bd9Sstevel@tonic-gate 	return ((size + sizeof (dblk_t)) <= kmem_avail());
10817c478bd9Sstevel@tonic-gate }
10827c478bd9Sstevel@tonic-gate 
10837c478bd9Sstevel@tonic-gate /*
10847c478bd9Sstevel@tonic-gate  * Call function 'func' with argument 'arg' when there is a reasonably
10857c478bd9Sstevel@tonic-gate  * good chance that a block of size 'size' can be allocated.
10867c478bd9Sstevel@tonic-gate  * 'pri' is no longer used, but is retained for compatibility.
10877c478bd9Sstevel@tonic-gate  */
10887c478bd9Sstevel@tonic-gate /* ARGSUSED */
10897c478bd9Sstevel@tonic-gate bufcall_id_t
10907c478bd9Sstevel@tonic-gate bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
10917c478bd9Sstevel@tonic-gate {
10927c478bd9Sstevel@tonic-gate 	static long bid = 1;	/* always odd to save checking for zero */
10937c478bd9Sstevel@tonic-gate 	bufcall_id_t bc_id;
10947c478bd9Sstevel@tonic-gate 	struct strbufcall *bcp;
10957c478bd9Sstevel@tonic-gate 
10967c478bd9Sstevel@tonic-gate 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
10977c478bd9Sstevel@tonic-gate 		return (0);
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	bcp->bc_func = func;
11007c478bd9Sstevel@tonic-gate 	bcp->bc_arg = arg;
11017c478bd9Sstevel@tonic-gate 	bcp->bc_size = size;
11027c478bd9Sstevel@tonic-gate 	bcp->bc_next = NULL;
11037c478bd9Sstevel@tonic-gate 	bcp->bc_executor = NULL;
11047c478bd9Sstevel@tonic-gate 
11057c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11067c478bd9Sstevel@tonic-gate 	/*
11077c478bd9Sstevel@tonic-gate 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
11087c478bd9Sstevel@tonic-gate 	 * should be no references to bcp since it may be freed by
11097c478bd9Sstevel@tonic-gate 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
11107c478bd9Sstevel@tonic-gate 	 * the local var.
11117c478bd9Sstevel@tonic-gate 	 */
11127c478bd9Sstevel@tonic-gate 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 	/*
11157c478bd9Sstevel@tonic-gate 	 * add newly allocated stream event to existing
11167c478bd9Sstevel@tonic-gate 	 * linked list of events.
11177c478bd9Sstevel@tonic-gate 	 */
11187c478bd9Sstevel@tonic-gate 	if (strbcalls.bc_head == NULL) {
11197c478bd9Sstevel@tonic-gate 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
11207c478bd9Sstevel@tonic-gate 	} else {
11217c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail->bc_next = bcp;
11227c478bd9Sstevel@tonic-gate 		strbcalls.bc_tail = bcp;
11237c478bd9Sstevel@tonic-gate 	}
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	cv_signal(&strbcall_cv);
11267c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11277c478bd9Sstevel@tonic-gate 	return (bc_id);
11287c478bd9Sstevel@tonic-gate }
11297c478bd9Sstevel@tonic-gate 
11307c478bd9Sstevel@tonic-gate /*
11317c478bd9Sstevel@tonic-gate  * Cancel a bufcall request.
11327c478bd9Sstevel@tonic-gate  */
11337c478bd9Sstevel@tonic-gate void
11347c478bd9Sstevel@tonic-gate unbufcall(bufcall_id_t id)
11357c478bd9Sstevel@tonic-gate {
11367c478bd9Sstevel@tonic-gate 	strbufcall_t *bcp, *pbcp;
11377c478bd9Sstevel@tonic-gate 
11387c478bd9Sstevel@tonic-gate 	mutex_enter(&strbcall_lock);
11397c478bd9Sstevel@tonic-gate again:
11407c478bd9Sstevel@tonic-gate 	pbcp = NULL;
11417c478bd9Sstevel@tonic-gate 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
11427c478bd9Sstevel@tonic-gate 		if (id == bcp->bc_id)
11437c478bd9Sstevel@tonic-gate 			break;
11447c478bd9Sstevel@tonic-gate 		pbcp = bcp;
11457c478bd9Sstevel@tonic-gate 	}
11467c478bd9Sstevel@tonic-gate 	if (bcp) {
11477c478bd9Sstevel@tonic-gate 		if (bcp->bc_executor != NULL) {
11487c478bd9Sstevel@tonic-gate 			if (bcp->bc_executor != curthread) {
11497c478bd9Sstevel@tonic-gate 				cv_wait(&bcall_cv, &strbcall_lock);
11507c478bd9Sstevel@tonic-gate 				goto again;
11517c478bd9Sstevel@tonic-gate 			}
11527c478bd9Sstevel@tonic-gate 		} else {
11537c478bd9Sstevel@tonic-gate 			if (pbcp)
11547c478bd9Sstevel@tonic-gate 				pbcp->bc_next = bcp->bc_next;
11557c478bd9Sstevel@tonic-gate 			else
11567c478bd9Sstevel@tonic-gate 				strbcalls.bc_head = bcp->bc_next;
11577c478bd9Sstevel@tonic-gate 			if (bcp == strbcalls.bc_tail)
11587c478bd9Sstevel@tonic-gate 				strbcalls.bc_tail = pbcp;
11597c478bd9Sstevel@tonic-gate 			kmem_free(bcp, sizeof (strbufcall_t));
11607c478bd9Sstevel@tonic-gate 		}
11617c478bd9Sstevel@tonic-gate 	}
11627c478bd9Sstevel@tonic-gate 	mutex_exit(&strbcall_lock);
11637c478bd9Sstevel@tonic-gate }
11647c478bd9Sstevel@tonic-gate 
11657c478bd9Sstevel@tonic-gate /*
11667c478bd9Sstevel@tonic-gate  * Duplicate a message block by block (uses dupb), returning
11677c478bd9Sstevel@tonic-gate  * a pointer to the duplicate message.
11687c478bd9Sstevel@tonic-gate  * Returns a non-NULL value only if the entire message
11697c478bd9Sstevel@tonic-gate  * was dup'd.
11707c478bd9Sstevel@tonic-gate  */
11717c478bd9Sstevel@tonic-gate mblk_t *
11727c478bd9Sstevel@tonic-gate dupmsg(mblk_t *bp)
11737c478bd9Sstevel@tonic-gate {
11747c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
11757c478bd9Sstevel@tonic-gate 
11767c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = dupb(bp)))
11777c478bd9Sstevel@tonic-gate 		return (NULL);
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
11807c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
11817c478bd9Sstevel@tonic-gate 			freemsg(head);
11827c478bd9Sstevel@tonic-gate 			return (NULL);
11837c478bd9Sstevel@tonic-gate 		}
11847c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
11857c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
11867c478bd9Sstevel@tonic-gate 	}
11877c478bd9Sstevel@tonic-gate 	return (head);
11887c478bd9Sstevel@tonic-gate }
11897c478bd9Sstevel@tonic-gate 
11907c478bd9Sstevel@tonic-gate #define	DUPB_NOLOAN(bp) \
11917c478bd9Sstevel@tonic-gate 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
11927c478bd9Sstevel@tonic-gate 	copyb((bp)) : dupb((bp)))
11937c478bd9Sstevel@tonic-gate 
11947c478bd9Sstevel@tonic-gate mblk_t *
11957c478bd9Sstevel@tonic-gate dupmsg_noloan(mblk_t *bp)
11967c478bd9Sstevel@tonic-gate {
11977c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
11987c478bd9Sstevel@tonic-gate 
11997c478bd9Sstevel@tonic-gate 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
12007c478bd9Sstevel@tonic-gate 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
12017c478bd9Sstevel@tonic-gate 		return (NULL);
12027c478bd9Sstevel@tonic-gate 
12037c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
12047c478bd9Sstevel@tonic-gate 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
12057c478bd9Sstevel@tonic-gate 			freemsg(head);
12067c478bd9Sstevel@tonic-gate 			return (NULL);
12077c478bd9Sstevel@tonic-gate 		}
12087c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
12097c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
12107c478bd9Sstevel@tonic-gate 	}
12117c478bd9Sstevel@tonic-gate 	return (head);
12127c478bd9Sstevel@tonic-gate }
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate /*
12157c478bd9Sstevel@tonic-gate  * Copy data from message and data block to newly allocated message and
12167c478bd9Sstevel@tonic-gate  * data block. Returns new message block pointer, or NULL if error.
12177c478bd9Sstevel@tonic-gate  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
12187c478bd9Sstevel@tonic-gate  * as in the original even when db_base is not word aligned. (bug 1052877)
12197c478bd9Sstevel@tonic-gate  */
12207c478bd9Sstevel@tonic-gate mblk_t *
12217c478bd9Sstevel@tonic-gate copyb(mblk_t *bp)
12227c478bd9Sstevel@tonic-gate {
12237c478bd9Sstevel@tonic-gate 	mblk_t	*nbp;
12247c478bd9Sstevel@tonic-gate 	dblk_t	*dp, *ndp;
12257c478bd9Sstevel@tonic-gate 	uchar_t *base;
12267c478bd9Sstevel@tonic-gate 	size_t	size;
12277c478bd9Sstevel@tonic-gate 	size_t	unaligned;
12287c478bd9Sstevel@tonic-gate 
12297c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_wptr >= bp->b_rptr);
12307c478bd9Sstevel@tonic-gate 
12317c478bd9Sstevel@tonic-gate 	dp = bp->b_datap;
12327c478bd9Sstevel@tonic-gate 	if (dp->db_fthdr != NULL)
12337c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 	/*
12367c478bd9Sstevel@tonic-gate 	 * Special handling for Multidata message; this should be
12377c478bd9Sstevel@tonic-gate 	 * removed once a copy-callback routine is made available.
12387c478bd9Sstevel@tonic-gate 	 */
12397c478bd9Sstevel@tonic-gate 	if (dp->db_type == M_MULTIDATA) {
12407c478bd9Sstevel@tonic-gate 		cred_t *cr;
12417c478bd9Sstevel@tonic-gate 
12427c478bd9Sstevel@tonic-gate 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
12437c478bd9Sstevel@tonic-gate 			return (NULL);
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 		nbp->b_flag = bp->b_flag;
12467c478bd9Sstevel@tonic-gate 		nbp->b_band = bp->b_band;
12477c478bd9Sstevel@tonic-gate 		ndp = nbp->b_datap;
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate 		/* See comments below on potential issues. */
12507c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 		ASSERT(ndp->db_type == dp->db_type);
12537c478bd9Sstevel@tonic-gate 		cr = dp->db_credp;
12547c478bd9Sstevel@tonic-gate 		if (cr != NULL)
12557c478bd9Sstevel@tonic-gate 			crhold(ndp->db_credp = cr);
12567c478bd9Sstevel@tonic-gate 		ndp->db_cpid = dp->db_cpid;
12577c478bd9Sstevel@tonic-gate 		return (nbp);
12587c478bd9Sstevel@tonic-gate 	}
12597c478bd9Sstevel@tonic-gate 
12607c478bd9Sstevel@tonic-gate 	size = dp->db_lim - dp->db_base;
12617c478bd9Sstevel@tonic-gate 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
12627c478bd9Sstevel@tonic-gate 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
12637c478bd9Sstevel@tonic-gate 		return (NULL);
12647c478bd9Sstevel@tonic-gate 	nbp->b_flag = bp->b_flag;
12657c478bd9Sstevel@tonic-gate 	nbp->b_band = bp->b_band;
12667c478bd9Sstevel@tonic-gate 	ndp = nbp->b_datap;
12677c478bd9Sstevel@tonic-gate 
12687c478bd9Sstevel@tonic-gate 	/*
12697c478bd9Sstevel@tonic-gate 	 * Well, here is a potential issue.  If we are trying to
12707c478bd9Sstevel@tonic-gate 	 * trace a flow, and we copy the message, we might lose
12717c478bd9Sstevel@tonic-gate 	 * information about where this message might have been.
12727c478bd9Sstevel@tonic-gate 	 * So we should inherit the FT data.  On the other hand,
12737c478bd9Sstevel@tonic-gate 	 * a user might be interested only in alloc to free data.
12747c478bd9Sstevel@tonic-gate 	 * So I guess the real answer is to provide a tunable.
12757c478bd9Sstevel@tonic-gate 	 */
12767c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
12777c478bd9Sstevel@tonic-gate 
12787c478bd9Sstevel@tonic-gate 	base = ndp->db_base + unaligned;
12797c478bd9Sstevel@tonic-gate 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
12807c478bd9Sstevel@tonic-gate 
12817c478bd9Sstevel@tonic-gate 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
12827c478bd9Sstevel@tonic-gate 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	return (nbp);
12857c478bd9Sstevel@tonic-gate }
12867c478bd9Sstevel@tonic-gate 
12877c478bd9Sstevel@tonic-gate /*
12887c478bd9Sstevel@tonic-gate  * Copy data from message to newly allocated message using new
12897c478bd9Sstevel@tonic-gate  * data blocks.  Returns a pointer to the new message, or NULL if error.
12907c478bd9Sstevel@tonic-gate  */
12917c478bd9Sstevel@tonic-gate mblk_t *
12927c478bd9Sstevel@tonic-gate copymsg(mblk_t *bp)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	mblk_t *head, *nbp;
12957c478bd9Sstevel@tonic-gate 
12967c478bd9Sstevel@tonic-gate 	if (!bp || !(nbp = head = copyb(bp)))
12977c478bd9Sstevel@tonic-gate 		return (NULL);
12987c478bd9Sstevel@tonic-gate 
12997c478bd9Sstevel@tonic-gate 	while (bp->b_cont) {
13007c478bd9Sstevel@tonic-gate 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
13017c478bd9Sstevel@tonic-gate 			freemsg(head);
13027c478bd9Sstevel@tonic-gate 			return (NULL);
13037c478bd9Sstevel@tonic-gate 		}
13047c478bd9Sstevel@tonic-gate 		nbp = nbp->b_cont;
13057c478bd9Sstevel@tonic-gate 		bp = bp->b_cont;
13067c478bd9Sstevel@tonic-gate 	}
13077c478bd9Sstevel@tonic-gate 	return (head);
13087c478bd9Sstevel@tonic-gate }
13097c478bd9Sstevel@tonic-gate 
13107c478bd9Sstevel@tonic-gate /*
13117c478bd9Sstevel@tonic-gate  * link a message block to tail of message
13127c478bd9Sstevel@tonic-gate  */
13137c478bd9Sstevel@tonic-gate void
13147c478bd9Sstevel@tonic-gate linkb(mblk_t *mp, mblk_t *bp)
13157c478bd9Sstevel@tonic-gate {
13167c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13177c478bd9Sstevel@tonic-gate 
13187c478bd9Sstevel@tonic-gate 	for (; mp->b_cont; mp = mp->b_cont)
13197c478bd9Sstevel@tonic-gate 		;
13207c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;
13217c478bd9Sstevel@tonic-gate }
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate /*
13247c478bd9Sstevel@tonic-gate  * unlink a message block from head of message
13257c478bd9Sstevel@tonic-gate  * return pointer to new message.
13267c478bd9Sstevel@tonic-gate  * NULL if message becomes empty.
13277c478bd9Sstevel@tonic-gate  */
13287c478bd9Sstevel@tonic-gate mblk_t *
13297c478bd9Sstevel@tonic-gate unlinkb(mblk_t *bp)
13307c478bd9Sstevel@tonic-gate {
13317c478bd9Sstevel@tonic-gate 	mblk_t *bp1;
13327c478bd9Sstevel@tonic-gate 
13337c478bd9Sstevel@tonic-gate 	bp1 = bp->b_cont;
13347c478bd9Sstevel@tonic-gate 	bp->b_cont = NULL;
13357c478bd9Sstevel@tonic-gate 	return (bp1);
13367c478bd9Sstevel@tonic-gate }
13377c478bd9Sstevel@tonic-gate 
13387c478bd9Sstevel@tonic-gate /*
13397c478bd9Sstevel@tonic-gate  * remove a message block "bp" from message "mp"
13407c478bd9Sstevel@tonic-gate  *
13417c478bd9Sstevel@tonic-gate  * Return pointer to new message or NULL if no message remains.
13427c478bd9Sstevel@tonic-gate  * Return -1 if bp is not found in message.
13437c478bd9Sstevel@tonic-gate  */
13447c478bd9Sstevel@tonic-gate mblk_t *
13457c478bd9Sstevel@tonic-gate rmvb(mblk_t *mp, mblk_t *bp)
13467c478bd9Sstevel@tonic-gate {
13477c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
13487c478bd9Sstevel@tonic-gate 	mblk_t *lastp = NULL;
13497c478bd9Sstevel@tonic-gate 
13507c478bd9Sstevel@tonic-gate 	ASSERT(mp && bp);
13517c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
13527c478bd9Sstevel@tonic-gate 		if (tmp == bp) {
13537c478bd9Sstevel@tonic-gate 			if (lastp)
13547c478bd9Sstevel@tonic-gate 				lastp->b_cont = tmp->b_cont;
13557c478bd9Sstevel@tonic-gate 			else
13567c478bd9Sstevel@tonic-gate 				mp = tmp->b_cont;
13577c478bd9Sstevel@tonic-gate 			tmp->b_cont = NULL;
13587c478bd9Sstevel@tonic-gate 			return (mp);
13597c478bd9Sstevel@tonic-gate 		}
13607c478bd9Sstevel@tonic-gate 		lastp = tmp;
13617c478bd9Sstevel@tonic-gate 	}
13627c478bd9Sstevel@tonic-gate 	return ((mblk_t *)-1);
13637c478bd9Sstevel@tonic-gate }
13647c478bd9Sstevel@tonic-gate 
13657c478bd9Sstevel@tonic-gate /*
13667c478bd9Sstevel@tonic-gate  * Concatenate and align first len bytes of common
13677c478bd9Sstevel@tonic-gate  * message type.  Len == -1, means concat everything.
13687c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure
13697c478bd9Sstevel@tonic-gate  * After the pullup, mp points to the pulled up data.
13707c478bd9Sstevel@tonic-gate  */
13717c478bd9Sstevel@tonic-gate int
13727c478bd9Sstevel@tonic-gate pullupmsg(mblk_t *mp, ssize_t len)
13737c478bd9Sstevel@tonic-gate {
13747c478bd9Sstevel@tonic-gate 	mblk_t *bp, *b_cont;
13757c478bd9Sstevel@tonic-gate 	dblk_t *dbp;
13767c478bd9Sstevel@tonic-gate 	ssize_t n;
13777c478bd9Sstevel@tonic-gate 
13787c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_ref > 0);
13797c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
13807c478bd9Sstevel@tonic-gate 
13817c478bd9Sstevel@tonic-gate 	/*
13827c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
13837c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
13847c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
13857c478bd9Sstevel@tonic-gate 	 */
13867c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
13877c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
13887c478bd9Sstevel@tonic-gate 		return (0);
13897c478bd9Sstevel@tonic-gate 
13907c478bd9Sstevel@tonic-gate 	if (len == -1) {
13917c478bd9Sstevel@tonic-gate 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
13927c478bd9Sstevel@tonic-gate 			return (1);
13937c478bd9Sstevel@tonic-gate 		len = xmsgsize(mp);
13947c478bd9Sstevel@tonic-gate 	} else {
13957c478bd9Sstevel@tonic-gate 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
13967c478bd9Sstevel@tonic-gate 		ASSERT(first_mblk_len >= 0);
13977c478bd9Sstevel@tonic-gate 		/*
13987c478bd9Sstevel@tonic-gate 		 * If the length is less than that of the first mblk,
13997c478bd9Sstevel@tonic-gate 		 * we want to pull up the message into an aligned mblk.
14007c478bd9Sstevel@tonic-gate 		 * Though not part of the spec, some callers assume it.
14017c478bd9Sstevel@tonic-gate 		 */
14027c478bd9Sstevel@tonic-gate 		if (len <= first_mblk_len) {
14037c478bd9Sstevel@tonic-gate 			if (str_aligned(mp->b_rptr))
14047c478bd9Sstevel@tonic-gate 				return (1);
14057c478bd9Sstevel@tonic-gate 			len = first_mblk_len;
14067c478bd9Sstevel@tonic-gate 		} else if (xmsgsize(mp) < len)
14077c478bd9Sstevel@tonic-gate 			return (0);
14087c478bd9Sstevel@tonic-gate 	}
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 	if ((bp = allocb_tmpl(len, mp)) == NULL)
14117c478bd9Sstevel@tonic-gate 		return (0);
14127c478bd9Sstevel@tonic-gate 
14137c478bd9Sstevel@tonic-gate 	dbp = bp->b_datap;
14147c478bd9Sstevel@tonic-gate 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
14157c478bd9Sstevel@tonic-gate 	mp->b_datap = dbp;	/* ... and mp heads the new message */
14167c478bd9Sstevel@tonic-gate 	mp->b_datap->db_mblk = mp;
14177c478bd9Sstevel@tonic-gate 	bp->b_datap->db_mblk = bp;
14187c478bd9Sstevel@tonic-gate 	mp->b_rptr = mp->b_wptr = dbp->db_base;
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 	do {
14217c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_datap->db_ref > 0);
14227c478bd9Sstevel@tonic-gate 		ASSERT(bp->b_wptr >= bp->b_rptr);
14237c478bd9Sstevel@tonic-gate 		n = MIN(bp->b_wptr - bp->b_rptr, len);
14247c478bd9Sstevel@tonic-gate 		bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
14257c478bd9Sstevel@tonic-gate 		mp->b_wptr += n;
14267c478bd9Sstevel@tonic-gate 		bp->b_rptr += n;
14277c478bd9Sstevel@tonic-gate 		len -= n;
14287c478bd9Sstevel@tonic-gate 		if (bp->b_rptr != bp->b_wptr)
14297c478bd9Sstevel@tonic-gate 			break;
14307c478bd9Sstevel@tonic-gate 		b_cont = bp->b_cont;
14317c478bd9Sstevel@tonic-gate 		freeb(bp);
14327c478bd9Sstevel@tonic-gate 		bp = b_cont;
14337c478bd9Sstevel@tonic-gate 	} while (len && bp);
14347c478bd9Sstevel@tonic-gate 
14357c478bd9Sstevel@tonic-gate 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
14367c478bd9Sstevel@tonic-gate 
14377c478bd9Sstevel@tonic-gate 	return (1);
14387c478bd9Sstevel@tonic-gate }
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate /*
14417c478bd9Sstevel@tonic-gate  * Concatenate and align at least the first len bytes of common message
14427c478bd9Sstevel@tonic-gate  * type.  Len == -1 means concatenate everything.  The original message is
14437c478bd9Sstevel@tonic-gate  * unaltered.  Returns a pointer to a new message on success, otherwise
14447c478bd9Sstevel@tonic-gate  * returns NULL.
14457c478bd9Sstevel@tonic-gate  */
14467c478bd9Sstevel@tonic-gate mblk_t *
14477c478bd9Sstevel@tonic-gate msgpullup(mblk_t *mp, ssize_t len)
14487c478bd9Sstevel@tonic-gate {
14497c478bd9Sstevel@tonic-gate 	mblk_t	*newmp;
14507c478bd9Sstevel@tonic-gate 	ssize_t	totlen;
14517c478bd9Sstevel@tonic-gate 	ssize_t	n;
14527c478bd9Sstevel@tonic-gate 
14537c478bd9Sstevel@tonic-gate 	/*
14547c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
14557c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
14567c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
14577c478bd9Sstevel@tonic-gate 	 */
14587c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
14597c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
14607c478bd9Sstevel@tonic-gate 		return (NULL);
14617c478bd9Sstevel@tonic-gate 
14627c478bd9Sstevel@tonic-gate 	totlen = xmsgsize(mp);
14637c478bd9Sstevel@tonic-gate 
14647c478bd9Sstevel@tonic-gate 	if ((len > 0) && (len > totlen))
14657c478bd9Sstevel@tonic-gate 		return (NULL);
14667c478bd9Sstevel@tonic-gate 
14677c478bd9Sstevel@tonic-gate 	/*
14687c478bd9Sstevel@tonic-gate 	 * Copy all of the first msg type into one new mblk, then dupmsg
14697c478bd9Sstevel@tonic-gate 	 * and link the rest onto this.
14707c478bd9Sstevel@tonic-gate 	 */
14717c478bd9Sstevel@tonic-gate 
14727c478bd9Sstevel@tonic-gate 	len = totlen;
14737c478bd9Sstevel@tonic-gate 
14747c478bd9Sstevel@tonic-gate 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
14757c478bd9Sstevel@tonic-gate 		return (NULL);
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 	newmp->b_flag = mp->b_flag;
14787c478bd9Sstevel@tonic-gate 	newmp->b_band = mp->b_band;
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate 	while (len > 0) {
14817c478bd9Sstevel@tonic-gate 		n = mp->b_wptr - mp->b_rptr;
14827c478bd9Sstevel@tonic-gate 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
14837c478bd9Sstevel@tonic-gate 		if (n > 0)
14847c478bd9Sstevel@tonic-gate 			bcopy(mp->b_rptr, newmp->b_wptr, n);
14857c478bd9Sstevel@tonic-gate 		newmp->b_wptr += n;
14867c478bd9Sstevel@tonic-gate 		len -= n;
14877c478bd9Sstevel@tonic-gate 		mp = mp->b_cont;
14887c478bd9Sstevel@tonic-gate 	}
14897c478bd9Sstevel@tonic-gate 
14907c478bd9Sstevel@tonic-gate 	if (mp != NULL) {
14917c478bd9Sstevel@tonic-gate 		newmp->b_cont = dupmsg(mp);
14927c478bd9Sstevel@tonic-gate 		if (newmp->b_cont == NULL) {
14937c478bd9Sstevel@tonic-gate 			freemsg(newmp);
14947c478bd9Sstevel@tonic-gate 			return (NULL);
14957c478bd9Sstevel@tonic-gate 		}
14967c478bd9Sstevel@tonic-gate 	}
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	return (newmp);
14997c478bd9Sstevel@tonic-gate }
15007c478bd9Sstevel@tonic-gate 
15017c478bd9Sstevel@tonic-gate /*
15027c478bd9Sstevel@tonic-gate  * Trim bytes from message
15037c478bd9Sstevel@tonic-gate  *  len > 0, trim from head
15047c478bd9Sstevel@tonic-gate  *  len < 0, trim from tail
15057c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
15067c478bd9Sstevel@tonic-gate  */
15077c478bd9Sstevel@tonic-gate int
15087c478bd9Sstevel@tonic-gate adjmsg(mblk_t *mp, ssize_t len)
15097c478bd9Sstevel@tonic-gate {
15107c478bd9Sstevel@tonic-gate 	mblk_t *bp;
15117c478bd9Sstevel@tonic-gate 	mblk_t *save_bp = NULL;
15127c478bd9Sstevel@tonic-gate 	mblk_t *prev_bp;
15137c478bd9Sstevel@tonic-gate 	mblk_t *bcont;
15147c478bd9Sstevel@tonic-gate 	unsigned char type;
15157c478bd9Sstevel@tonic-gate 	ssize_t n;
15167c478bd9Sstevel@tonic-gate 	int fromhead;
15177c478bd9Sstevel@tonic-gate 	int first;
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
15207c478bd9Sstevel@tonic-gate 	/*
15217c478bd9Sstevel@tonic-gate 	 * We won't handle Multidata message, since it contains
15227c478bd9Sstevel@tonic-gate 	 * metadata which this function has no knowledge of; we
15237c478bd9Sstevel@tonic-gate 	 * assert on DEBUG, and return failure otherwise.
15247c478bd9Sstevel@tonic-gate 	 */
15257c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
15267c478bd9Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_MULTIDATA)
15277c478bd9Sstevel@tonic-gate 		return (0);
15287c478bd9Sstevel@tonic-gate 
15297c478bd9Sstevel@tonic-gate 	if (len < 0) {
15307c478bd9Sstevel@tonic-gate 		fromhead = 0;
15317c478bd9Sstevel@tonic-gate 		len = -len;
15327c478bd9Sstevel@tonic-gate 	} else {
15337c478bd9Sstevel@tonic-gate 		fromhead = 1;
15347c478bd9Sstevel@tonic-gate 	}
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	if (xmsgsize(mp) < len)
15377c478bd9Sstevel@tonic-gate 		return (0);
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate 
15407c478bd9Sstevel@tonic-gate 	if (fromhead) {
15417c478bd9Sstevel@tonic-gate 		first = 1;
15427c478bd9Sstevel@tonic-gate 		while (len) {
15437c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_wptr >= mp->b_rptr);
15447c478bd9Sstevel@tonic-gate 			n = MIN(mp->b_wptr - mp->b_rptr, len);
15457c478bd9Sstevel@tonic-gate 			mp->b_rptr += n;
15467c478bd9Sstevel@tonic-gate 			len -= n;
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate 			/*
15497c478bd9Sstevel@tonic-gate 			 * If this is not the first zero length
15507c478bd9Sstevel@tonic-gate 			 * message remove it
15517c478bd9Sstevel@tonic-gate 			 */
15527c478bd9Sstevel@tonic-gate 			if (!first && (mp->b_wptr == mp->b_rptr)) {
15537c478bd9Sstevel@tonic-gate 				bcont = mp->b_cont;
15547c478bd9Sstevel@tonic-gate 				freeb(mp);
15557c478bd9Sstevel@tonic-gate 				mp = save_bp->b_cont = bcont;
15567c478bd9Sstevel@tonic-gate 			} else {
15577c478bd9Sstevel@tonic-gate 				save_bp = mp;
15587c478bd9Sstevel@tonic-gate 				mp = mp->b_cont;
15597c478bd9Sstevel@tonic-gate 			}
15607c478bd9Sstevel@tonic-gate 			first = 0;
15617c478bd9Sstevel@tonic-gate 		}
15627c478bd9Sstevel@tonic-gate 	} else {
15637c478bd9Sstevel@tonic-gate 		type = mp->b_datap->db_type;
15647c478bd9Sstevel@tonic-gate 		while (len) {
15657c478bd9Sstevel@tonic-gate 			bp = mp;
15667c478bd9Sstevel@tonic-gate 			save_bp = NULL;
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate 			/*
15697c478bd9Sstevel@tonic-gate 			 * Find the last message of same type
15707c478bd9Sstevel@tonic-gate 			 */
15717c478bd9Sstevel@tonic-gate 
15727c478bd9Sstevel@tonic-gate 			while (bp && bp->b_datap->db_type == type) {
15737c478bd9Sstevel@tonic-gate 				ASSERT(bp->b_wptr >= bp->b_rptr);
15747c478bd9Sstevel@tonic-gate 				prev_bp = save_bp;
15757c478bd9Sstevel@tonic-gate 				save_bp = bp;
15767c478bd9Sstevel@tonic-gate 				bp = bp->b_cont;
15777c478bd9Sstevel@tonic-gate 			}
15787c478bd9Sstevel@tonic-gate 			if (save_bp == NULL)
15797c478bd9Sstevel@tonic-gate 				break;
15807c478bd9Sstevel@tonic-gate 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
15817c478bd9Sstevel@tonic-gate 			save_bp->b_wptr -= n;
15827c478bd9Sstevel@tonic-gate 			len -= n;
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate 			/*
15857c478bd9Sstevel@tonic-gate 			 * If this is not the first message
15867c478bd9Sstevel@tonic-gate 			 * and we have taken away everything
15877c478bd9Sstevel@tonic-gate 			 * from this message, remove it
15887c478bd9Sstevel@tonic-gate 			 */
15897c478bd9Sstevel@tonic-gate 
15907c478bd9Sstevel@tonic-gate 			if ((save_bp != mp) &&
15917c478bd9Sstevel@tonic-gate 				(save_bp->b_wptr == save_bp->b_rptr)) {
15927c478bd9Sstevel@tonic-gate 				bcont = save_bp->b_cont;
15937c478bd9Sstevel@tonic-gate 				freeb(save_bp);
15947c478bd9Sstevel@tonic-gate 				prev_bp->b_cont = bcont;
15957c478bd9Sstevel@tonic-gate 			}
15967c478bd9Sstevel@tonic-gate 		}
15977c478bd9Sstevel@tonic-gate 	}
15987c478bd9Sstevel@tonic-gate 	return (1);
15997c478bd9Sstevel@tonic-gate }
16007c478bd9Sstevel@tonic-gate 
16017c478bd9Sstevel@tonic-gate /*
16027c478bd9Sstevel@tonic-gate  * get number of data bytes in message
16037c478bd9Sstevel@tonic-gate  */
16047c478bd9Sstevel@tonic-gate size_t
16057c478bd9Sstevel@tonic-gate msgdsize(mblk_t *bp)
16067c478bd9Sstevel@tonic-gate {
16077c478bd9Sstevel@tonic-gate 	size_t count = 0;
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate 	for (; bp; bp = bp->b_cont)
16107c478bd9Sstevel@tonic-gate 		if (bp->b_datap->db_type == M_DATA) {
16117c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_wptr >= bp->b_rptr);
16127c478bd9Sstevel@tonic-gate 			count += bp->b_wptr - bp->b_rptr;
16137c478bd9Sstevel@tonic-gate 		}
16147c478bd9Sstevel@tonic-gate 	return (count);
16157c478bd9Sstevel@tonic-gate }
16167c478bd9Sstevel@tonic-gate 
16177c478bd9Sstevel@tonic-gate /*
16187c478bd9Sstevel@tonic-gate  * Get a message off head of queue
16197c478bd9Sstevel@tonic-gate  *
16207c478bd9Sstevel@tonic-gate  * If queue has no buffers then mark queue
16217c478bd9Sstevel@tonic-gate  * with QWANTR. (queue wants to be read by
16227c478bd9Sstevel@tonic-gate  * someone when data becomes available)
16237c478bd9Sstevel@tonic-gate  *
16247c478bd9Sstevel@tonic-gate  * If there is something to take off then do so.
16257c478bd9Sstevel@tonic-gate  * If queue falls below hi water mark turn off QFULL
16267c478bd9Sstevel@tonic-gate  * flag.  Decrement weighted count of queue.
16277c478bd9Sstevel@tonic-gate  * Also turn off QWANTR because queue is being read.
16287c478bd9Sstevel@tonic-gate  *
16297c478bd9Sstevel@tonic-gate  * The queue count is maintained on a per-band basis.
16307c478bd9Sstevel@tonic-gate  * Priority band 0 (normal messages) uses q_count,
16317c478bd9Sstevel@tonic-gate  * q_lowat, etc.  Non-zero priority bands use the
16327c478bd9Sstevel@tonic-gate  * fields in their respective qband structures
16337c478bd9Sstevel@tonic-gate  * (qb_count, qb_lowat, etc.)  All messages appear
16347c478bd9Sstevel@tonic-gate  * on the same list, linked via their b_next pointers.
16357c478bd9Sstevel@tonic-gate  * q_first is the head of the list.  q_count does
16367c478bd9Sstevel@tonic-gate  * not reflect the size of all the messages on the
16377c478bd9Sstevel@tonic-gate  * queue.  It only reflects those messages in the
16387c478bd9Sstevel@tonic-gate  * normal band of flow.  The one exception to this
16397c478bd9Sstevel@tonic-gate  * deals with high priority messages.  They are in
16407c478bd9Sstevel@tonic-gate  * their own conceptual "band", but are accounted
16417c478bd9Sstevel@tonic-gate  * against q_count.
16427c478bd9Sstevel@tonic-gate  *
16437c478bd9Sstevel@tonic-gate  * If queue count is below the lo water mark and QWANTW
16447c478bd9Sstevel@tonic-gate  * is set, enable the closest backq which has a service
16457c478bd9Sstevel@tonic-gate  * procedure and turn off the QWANTW flag.
16467c478bd9Sstevel@tonic-gate  *
16477c478bd9Sstevel@tonic-gate  * getq could be built on top of rmvq, but isn't because
16487c478bd9Sstevel@tonic-gate  * of performance considerations.
16497c478bd9Sstevel@tonic-gate  *
16507c478bd9Sstevel@tonic-gate  * A note on the use of q_count and q_mblkcnt:
16517c478bd9Sstevel@tonic-gate  *   q_count is the traditional byte count for messages that
16527c478bd9Sstevel@tonic-gate  *   have been put on a queue.  Documentation tells us that
16537c478bd9Sstevel@tonic-gate  *   we shouldn't rely on that count, but some drivers/modules
16547c478bd9Sstevel@tonic-gate  *   do.  What was needed, however, is a mechanism to prevent
16557c478bd9Sstevel@tonic-gate  *   runaway streams from consuming all of the resources,
16567c478bd9Sstevel@tonic-gate  *   and particularly be able to flow control zero-length
16577c478bd9Sstevel@tonic-gate  *   messages.  q_mblkcnt is used for this purpose.  It
16587c478bd9Sstevel@tonic-gate  *   counts the number of mblk's that are being put on
16597c478bd9Sstevel@tonic-gate  *   the queue.  The intention here, is that each mblk should
16607c478bd9Sstevel@tonic-gate  *   contain one byte of data and, for the purpose of
16617c478bd9Sstevel@tonic-gate  *   flow-control, logically does.  A queue will become
16627c478bd9Sstevel@tonic-gate  *   full when EITHER of these values (q_count and q_mblkcnt)
16637c478bd9Sstevel@tonic-gate  *   reach the highwater mark.  It will clear when BOTH
16647c478bd9Sstevel@tonic-gate  *   of them drop below the highwater mark.  And it will
16657c478bd9Sstevel@tonic-gate  *   backenable when BOTH of them drop below the lowwater
16667c478bd9Sstevel@tonic-gate  *   mark.
16677c478bd9Sstevel@tonic-gate  *   With this algorithm, a driver/module might be able
16687c478bd9Sstevel@tonic-gate  *   to find a reasonably accurate q_count, and the
16697c478bd9Sstevel@tonic-gate  *   framework can still try and limit resource usage.
16707c478bd9Sstevel@tonic-gate  */
16717c478bd9Sstevel@tonic-gate mblk_t *
16727c478bd9Sstevel@tonic-gate getq(queue_t *q)
16737c478bd9Sstevel@tonic-gate {
16747c478bd9Sstevel@tonic-gate 	mblk_t *bp;
1675*116094b2Smicheng 	uchar_t band = 0;
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 	bp = getq_noenab(q);
16787c478bd9Sstevel@tonic-gate 	if (bp != NULL)
16797c478bd9Sstevel@tonic-gate 		band = bp->b_band;
16807c478bd9Sstevel@tonic-gate 
16817c478bd9Sstevel@tonic-gate 	/*
16827c478bd9Sstevel@tonic-gate 	 * Inlined from qbackenable().
16837c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
16847c478bd9Sstevel@tonic-gate 	 */
16857c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
16867c478bd9Sstevel@tonic-gate 		return (bp);
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	qbackenable(q, band);
16897c478bd9Sstevel@tonic-gate 	return (bp);
16907c478bd9Sstevel@tonic-gate }
16917c478bd9Sstevel@tonic-gate 
16927c478bd9Sstevel@tonic-gate /*
16937c478bd9Sstevel@tonic-gate  * Like getq() but does not backenable.  This is used by the stream
16947c478bd9Sstevel@tonic-gate  * head when a putback() is likely.  The caller must call qbackenable()
16957c478bd9Sstevel@tonic-gate  * after it is done with accessing the queue.
16967c478bd9Sstevel@tonic-gate  */
16977c478bd9Sstevel@tonic-gate mblk_t *
16987c478bd9Sstevel@tonic-gate getq_noenab(queue_t *q)
16997c478bd9Sstevel@tonic-gate {
17007c478bd9Sstevel@tonic-gate 	mblk_t *bp;
17017c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
17027c478bd9Sstevel@tonic-gate 	qband_t *qbp;
17037c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
17047c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
17077c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
17087c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
17097c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
17107c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
17117c478bd9Sstevel@tonic-gate 	} else
17127c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	if ((bp = q->q_first) == 0) {
17157c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTR;
17167c478bd9Sstevel@tonic-gate 	} else {
17177c478bd9Sstevel@tonic-gate 		if ((q->q_first = bp->b_next) == NULL)
17187c478bd9Sstevel@tonic-gate 			q->q_last = NULL;
17197c478bd9Sstevel@tonic-gate 		else
17207c478bd9Sstevel@tonic-gate 			q->q_first->b_prev = NULL;
17217c478bd9Sstevel@tonic-gate 
17227c478bd9Sstevel@tonic-gate 		/* Get message byte count for q_count accounting */
17237c478bd9Sstevel@tonic-gate 		for (tmp = bp; tmp; tmp = tmp->b_cont) {
17247c478bd9Sstevel@tonic-gate 			bytecnt += (tmp->b_wptr - tmp->b_rptr);
17257c478bd9Sstevel@tonic-gate 			mblkcnt++;
17267c478bd9Sstevel@tonic-gate 		}
17277c478bd9Sstevel@tonic-gate 
17287c478bd9Sstevel@tonic-gate 		if (bp->b_band == 0) {
17297c478bd9Sstevel@tonic-gate 			q->q_count -= bytecnt;
17307c478bd9Sstevel@tonic-gate 			q->q_mblkcnt -= mblkcnt;
17317c478bd9Sstevel@tonic-gate 			if ((q->q_count < q->q_hiwat) &&
17327c478bd9Sstevel@tonic-gate 			    (q->q_mblkcnt < q->q_hiwat)) {
17337c478bd9Sstevel@tonic-gate 				q->q_flag &= ~QFULL;
17347c478bd9Sstevel@tonic-gate 			}
17357c478bd9Sstevel@tonic-gate 		} else {
17367c478bd9Sstevel@tonic-gate 			int i;
17377c478bd9Sstevel@tonic-gate 
17387c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_band <= q->q_nband);
17397c478bd9Sstevel@tonic-gate 			ASSERT(q->q_bandp != NULL);
17407c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(QLOCK(q)));
17417c478bd9Sstevel@tonic-gate 			qbp = q->q_bandp;
17427c478bd9Sstevel@tonic-gate 			i = bp->b_band;
17437c478bd9Sstevel@tonic-gate 			while (--i > 0)
17447c478bd9Sstevel@tonic-gate 				qbp = qbp->qb_next;
17457c478bd9Sstevel@tonic-gate 			if (qbp->qb_first == qbp->qb_last) {
17467c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
17477c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
17487c478bd9Sstevel@tonic-gate 			} else {
17497c478bd9Sstevel@tonic-gate 				qbp->qb_first = bp->b_next;
17507c478bd9Sstevel@tonic-gate 			}
17517c478bd9Sstevel@tonic-gate 			qbp->qb_count -= bytecnt;
17527c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt -= mblkcnt;
17537c478bd9Sstevel@tonic-gate 			if ((qbp->qb_count < qbp->qb_hiwat) &&
17547c478bd9Sstevel@tonic-gate 			    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
17557c478bd9Sstevel@tonic-gate 				qbp->qb_flag &= ~QB_FULL;
17567c478bd9Sstevel@tonic-gate 			}
17577c478bd9Sstevel@tonic-gate 		}
17587c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTR;
17597c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
17607c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
17617c478bd9Sstevel@tonic-gate 	}
17627c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
17637c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
17647c478bd9Sstevel@tonic-gate 
17657c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
17667c478bd9Sstevel@tonic-gate 
17677c478bd9Sstevel@tonic-gate 	return (bp);
17687c478bd9Sstevel@tonic-gate }
17697c478bd9Sstevel@tonic-gate 
17707c478bd9Sstevel@tonic-gate /*
17717c478bd9Sstevel@tonic-gate  * Determine if a backenable is needed after removing a message in the
17727c478bd9Sstevel@tonic-gate  * specified band.
17737c478bd9Sstevel@tonic-gate  * NOTE: This routine assumes that something like getq_noenab() has been
17747c478bd9Sstevel@tonic-gate  * already called.
17757c478bd9Sstevel@tonic-gate  *
17767c478bd9Sstevel@tonic-gate  * For the read side it is ok to hold sd_lock across calling this (and the
17777c478bd9Sstevel@tonic-gate  * stream head often does).
17787c478bd9Sstevel@tonic-gate  * But for the write side strwakeq might be invoked and it acquires sd_lock.
17797c478bd9Sstevel@tonic-gate  */
17807c478bd9Sstevel@tonic-gate void
1781*116094b2Smicheng qbackenable(queue_t *q, uchar_t band)
17827c478bd9Sstevel@tonic-gate {
17837c478bd9Sstevel@tonic-gate 	int backenab = 0;
17847c478bd9Sstevel@tonic-gate 	qband_t *qbp;
17857c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate 	ASSERT(q);
17887c478bd9Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
17897c478bd9Sstevel@tonic-gate 
17907c478bd9Sstevel@tonic-gate 	/*
17917c478bd9Sstevel@tonic-gate 	 * Quick check without holding the lock.
17927c478bd9Sstevel@tonic-gate 	 * OK since after getq() has lowered the q_count these flags
17937c478bd9Sstevel@tonic-gate 	 * would not change unless either the qbackenable() is done by
17947c478bd9Sstevel@tonic-gate 	 * another thread (which is ok) or the queue has gotten QFULL
17957c478bd9Sstevel@tonic-gate 	 * in which case another backenable will take place when the queue
17967c478bd9Sstevel@tonic-gate 	 * drops below q_lowat.
17977c478bd9Sstevel@tonic-gate 	 */
17987c478bd9Sstevel@tonic-gate 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
17997c478bd9Sstevel@tonic-gate 		return;
18007c478bd9Sstevel@tonic-gate 
18017c478bd9Sstevel@tonic-gate 	/* freezestr should allow its caller to call getq/putq */
18027c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
18037c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
18047c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
18057c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
18067c478bd9Sstevel@tonic-gate 	} else
18077c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18087c478bd9Sstevel@tonic-gate 
18097c478bd9Sstevel@tonic-gate 	if (band == 0) {
18107c478bd9Sstevel@tonic-gate 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
18117c478bd9Sstevel@tonic-gate 		    q->q_mblkcnt < q->q_lowat)) {
18127c478bd9Sstevel@tonic-gate 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
18137c478bd9Sstevel@tonic-gate 		}
18147c478bd9Sstevel@tonic-gate 	} else {
18157c478bd9Sstevel@tonic-gate 		int i;
18167c478bd9Sstevel@tonic-gate 
18177c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)band <= q->q_nband);
18187c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
18217c478bd9Sstevel@tonic-gate 		i = band;
18227c478bd9Sstevel@tonic-gate 		while (--i > 0)
18237c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
18247c478bd9Sstevel@tonic-gate 
18257c478bd9Sstevel@tonic-gate 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
18267c478bd9Sstevel@tonic-gate 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
18277c478bd9Sstevel@tonic-gate 			backenab = qbp->qb_flag & QB_WANTW;
18287c478bd9Sstevel@tonic-gate 		}
18297c478bd9Sstevel@tonic-gate 	}
18307c478bd9Sstevel@tonic-gate 
18317c478bd9Sstevel@tonic-gate 	if (backenab == 0) {
18327c478bd9Sstevel@tonic-gate 		if (freezer != curthread)
18337c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
18347c478bd9Sstevel@tonic-gate 		return;
18357c478bd9Sstevel@tonic-gate 	}
18367c478bd9Sstevel@tonic-gate 
18377c478bd9Sstevel@tonic-gate 	/* Have to drop the lock across strwakeq and backenable */
18387c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18397c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTWSYNC;
18407c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW)) {
18417c478bd9Sstevel@tonic-gate 		if (band != 0)
18427c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
18437c478bd9Sstevel@tonic-gate 		else {
18447c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
18457c478bd9Sstevel@tonic-gate 		}
18467c478bd9Sstevel@tonic-gate 	}
18477c478bd9Sstevel@tonic-gate 
18487c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
18497c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18507c478bd9Sstevel@tonic-gate 
18517c478bd9Sstevel@tonic-gate 	if (backenab & QWANTWSYNC)
18527c478bd9Sstevel@tonic-gate 		strwakeq(q, QWANTWSYNC);
18537c478bd9Sstevel@tonic-gate 	if (backenab & (QWANTW|QB_WANTW))
18547c478bd9Sstevel@tonic-gate 		backenable(q, band);
18557c478bd9Sstevel@tonic-gate }
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate /*
18587c478bd9Sstevel@tonic-gate  * Remove a message from a queue.  The queue count and other
18597c478bd9Sstevel@tonic-gate  * flow control parameters are adjusted and the back queue
18607c478bd9Sstevel@tonic-gate  * enabled if necessary.
18617c478bd9Sstevel@tonic-gate  *
18627c478bd9Sstevel@tonic-gate  * rmvq can be called with the stream frozen, but other utility functions
18637c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
18647c478bd9Sstevel@tonic-gate  */
18657c478bd9Sstevel@tonic-gate void
18667c478bd9Sstevel@tonic-gate rmvq(queue_t *q, mblk_t *mp)
18677c478bd9Sstevel@tonic-gate {
18687c478bd9Sstevel@tonic-gate 	ASSERT(mp != NULL);
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate 	rmvq_noenab(q, mp);
18717c478bd9Sstevel@tonic-gate 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
18727c478bd9Sstevel@tonic-gate 		/*
18737c478bd9Sstevel@tonic-gate 		 * qbackenable can handle a frozen stream but not a "random"
18747c478bd9Sstevel@tonic-gate 		 * qlock being held. Drop lock across qbackenable.
18757c478bd9Sstevel@tonic-gate 		 */
18767c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
18777c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18787c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
18797c478bd9Sstevel@tonic-gate 	} else {
18807c478bd9Sstevel@tonic-gate 		qbackenable(q, mp->b_band);
18817c478bd9Sstevel@tonic-gate 	}
18827c478bd9Sstevel@tonic-gate }
18837c478bd9Sstevel@tonic-gate 
18847c478bd9Sstevel@tonic-gate /*
18857c478bd9Sstevel@tonic-gate  * Like rmvq() but without any backenabling.
18867c478bd9Sstevel@tonic-gate  * This exists to handle SR_CONSOL_DATA in strrput().
18877c478bd9Sstevel@tonic-gate  */
18887c478bd9Sstevel@tonic-gate void
18897c478bd9Sstevel@tonic-gate rmvq_noenab(queue_t *q, mblk_t *mp)
18907c478bd9Sstevel@tonic-gate {
18917c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
18927c478bd9Sstevel@tonic-gate 	int i;
18937c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
18947c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
18957c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
18967c478bd9Sstevel@tonic-gate 
18977c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
18987c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
18997c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
19007c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
19017c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
19027c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
19037c478bd9Sstevel@tonic-gate 		freezer = curthread;
19047c478bd9Sstevel@tonic-gate 	} else
19057c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
19067c478bd9Sstevel@tonic-gate 
19077c478bd9Sstevel@tonic-gate 	ASSERT(mp->b_band <= q->q_nband);
19087c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {		/* Adjust band pointers */
19097c478bd9Sstevel@tonic-gate 		ASSERT(q->q_bandp != NULL);
19107c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
19117c478bd9Sstevel@tonic-gate 		i = mp->b_band;
19127c478bd9Sstevel@tonic-gate 		while (--i > 0)
19137c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
19147c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_first) {
19157c478bd9Sstevel@tonic-gate 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
19167c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp->b_next;
19177c478bd9Sstevel@tonic-gate 			else
19187c478bd9Sstevel@tonic-gate 				qbp->qb_first = NULL;
19197c478bd9Sstevel@tonic-gate 		}
19207c478bd9Sstevel@tonic-gate 		if (mp == qbp->qb_last) {
19217c478bd9Sstevel@tonic-gate 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
19227c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp->b_prev;
19237c478bd9Sstevel@tonic-gate 			else
19247c478bd9Sstevel@tonic-gate 				qbp->qb_last = NULL;
19257c478bd9Sstevel@tonic-gate 		}
19267c478bd9Sstevel@tonic-gate 	}
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate 	/*
19297c478bd9Sstevel@tonic-gate 	 * Remove the message from the list.
19307c478bd9Sstevel@tonic-gate 	 */
19317c478bd9Sstevel@tonic-gate 	if (mp->b_prev)
19327c478bd9Sstevel@tonic-gate 		mp->b_prev->b_next = mp->b_next;
19337c478bd9Sstevel@tonic-gate 	else
19347c478bd9Sstevel@tonic-gate 		q->q_first = mp->b_next;
19357c478bd9Sstevel@tonic-gate 	if (mp->b_next)
19367c478bd9Sstevel@tonic-gate 		mp->b_next->b_prev = mp->b_prev;
19377c478bd9Sstevel@tonic-gate 	else
19387c478bd9Sstevel@tonic-gate 		q->q_last = mp->b_prev;
19397c478bd9Sstevel@tonic-gate 	mp->b_next = NULL;
19407c478bd9Sstevel@tonic-gate 	mp->b_prev = NULL;
19417c478bd9Sstevel@tonic-gate 
19427c478bd9Sstevel@tonic-gate 	/* Get the size of the message for q_count accounting */
19437c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
19447c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
19457c478bd9Sstevel@tonic-gate 		mblkcnt++;
19467c478bd9Sstevel@tonic-gate 	}
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 	if (mp->b_band == 0) {		/* Perform q_count accounting */
19497c478bd9Sstevel@tonic-gate 		q->q_count -= bytecnt;
19507c478bd9Sstevel@tonic-gate 		q->q_mblkcnt -= mblkcnt;
19517c478bd9Sstevel@tonic-gate 		if ((q->q_count < q->q_hiwat) &&
19527c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_hiwat)) {
19537c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QFULL;
19547c478bd9Sstevel@tonic-gate 		}
19557c478bd9Sstevel@tonic-gate 	} else {			/* Perform qb_count accounting */
19567c478bd9Sstevel@tonic-gate 		qbp->qb_count -= bytecnt;
19577c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt -= mblkcnt;
19587c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count < qbp->qb_hiwat) &&
19597c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_hiwat)) {
19607c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
19617c478bd9Sstevel@tonic-gate 		}
19627c478bd9Sstevel@tonic-gate 	}
19637c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
19647c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
19677c478bd9Sstevel@tonic-gate }
19687c478bd9Sstevel@tonic-gate 
19697c478bd9Sstevel@tonic-gate /*
19707c478bd9Sstevel@tonic-gate  * Empty a queue.
19717c478bd9Sstevel@tonic-gate  * If flag is set, remove all messages.  Otherwise, remove
19727c478bd9Sstevel@tonic-gate  * only non-control messages.  If queue falls below its low
19737c478bd9Sstevel@tonic-gate  * water mark, and QWANTW is set, enable the nearest upstream
19747c478bd9Sstevel@tonic-gate  * service procedure.
19757c478bd9Sstevel@tonic-gate  *
19767c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
19777c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushq did not have a check
19787c478bd9Sstevel@tonic-gate  * for q_lowat == 0 in the backenabling test.
19797c478bd9Sstevel@tonic-gate  *
19807c478bd9Sstevel@tonic-gate  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
19817c478bd9Sstevel@tonic-gate  * if one exists on the queue.
19827c478bd9Sstevel@tonic-gate  */
19837c478bd9Sstevel@tonic-gate void
19847c478bd9Sstevel@tonic-gate flushq_common(queue_t *q, int flag, int pcproto_flag)
19857c478bd9Sstevel@tonic-gate {
19867c478bd9Sstevel@tonic-gate 	mblk_t *mp, *nmp;
19877c478bd9Sstevel@tonic-gate 	qband_t *qbp;
19887c478bd9Sstevel@tonic-gate 	int backenab = 0;
19897c478bd9Sstevel@tonic-gate 	unsigned char bpri;
19907c478bd9Sstevel@tonic-gate 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
19917c478bd9Sstevel@tonic-gate 
19927c478bd9Sstevel@tonic-gate 	if (q->q_first == NULL)
19937c478bd9Sstevel@tonic-gate 		return;
19947c478bd9Sstevel@tonic-gate 
19957c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
19967c478bd9Sstevel@tonic-gate 	mp = q->q_first;
19977c478bd9Sstevel@tonic-gate 	q->q_first = NULL;
19987c478bd9Sstevel@tonic-gate 	q->q_last = NULL;
19997c478bd9Sstevel@tonic-gate 	q->q_count = 0;
20007c478bd9Sstevel@tonic-gate 	q->q_mblkcnt = 0;
20017c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20027c478bd9Sstevel@tonic-gate 		qbp->qb_first = NULL;
20037c478bd9Sstevel@tonic-gate 		qbp->qb_last = NULL;
20047c478bd9Sstevel@tonic-gate 		qbp->qb_count = 0;
20057c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt = 0;
20067c478bd9Sstevel@tonic-gate 		qbp->qb_flag &= ~QB_FULL;
20077c478bd9Sstevel@tonic-gate 	}
20087c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QFULL;
20097c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
20107c478bd9Sstevel@tonic-gate 	while (mp) {
20117c478bd9Sstevel@tonic-gate 		nmp = mp->b_next;
20127c478bd9Sstevel@tonic-gate 		mp->b_next = mp->b_prev = NULL;
20137c478bd9Sstevel@tonic-gate 
20147c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
20157c478bd9Sstevel@tonic-gate 
20167c478bd9Sstevel@tonic-gate 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
20177c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20187c478bd9Sstevel@tonic-gate 		else if (flag || datamsg(mp->b_datap->db_type))
20197c478bd9Sstevel@tonic-gate 			freemsg(mp);
20207c478bd9Sstevel@tonic-gate 		else
20217c478bd9Sstevel@tonic-gate 			(void) putq(q, mp);
20227c478bd9Sstevel@tonic-gate 		mp = nmp;
20237c478bd9Sstevel@tonic-gate 	}
20247c478bd9Sstevel@tonic-gate 	bpri = 1;
20257c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
20267c478bd9Sstevel@tonic-gate 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
20277c478bd9Sstevel@tonic-gate 		if ((qbp->qb_flag & QB_WANTW) &&
20287c478bd9Sstevel@tonic-gate 		    (((qbp->qb_count < qbp->qb_lowat) &&
20297c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
20307c478bd9Sstevel@tonic-gate 		    qbp->qb_lowat == 0)) {
20317c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_WANTW;
20327c478bd9Sstevel@tonic-gate 			backenab = 1;
20337c478bd9Sstevel@tonic-gate 			qbf[bpri] = 1;
20347c478bd9Sstevel@tonic-gate 		} else
20357c478bd9Sstevel@tonic-gate 			qbf[bpri] = 0;
20367c478bd9Sstevel@tonic-gate 		bpri++;
20377c478bd9Sstevel@tonic-gate 	}
20387c478bd9Sstevel@tonic-gate 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
20397c478bd9Sstevel@tonic-gate 	if ((q->q_flag & QWANTW) &&
20407c478bd9Sstevel@tonic-gate 	    (((q->q_count < q->q_lowat) &&
20417c478bd9Sstevel@tonic-gate 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
20427c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QWANTW;
20437c478bd9Sstevel@tonic-gate 		backenab = 1;
20447c478bd9Sstevel@tonic-gate 		qbf[0] = 1;
20457c478bd9Sstevel@tonic-gate 	} else
20467c478bd9Sstevel@tonic-gate 		qbf[0] = 0;
20477c478bd9Sstevel@tonic-gate 
20487c478bd9Sstevel@tonic-gate 	/*
20497c478bd9Sstevel@tonic-gate 	 * If any band can now be written to, and there is a writer
20507c478bd9Sstevel@tonic-gate 	 * for that band, then backenable the closest service procedure.
20517c478bd9Sstevel@tonic-gate 	 */
20527c478bd9Sstevel@tonic-gate 	if (backenab) {
20537c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20547c478bd9Sstevel@tonic-gate 		for (bpri = q->q_nband; bpri != 0; bpri--)
20557c478bd9Sstevel@tonic-gate 			if (qbf[bpri])
2056*116094b2Smicheng 				backenable(q, bpri);
20577c478bd9Sstevel@tonic-gate 		if (qbf[0])
20587c478bd9Sstevel@tonic-gate 			backenable(q, 0);
20597c478bd9Sstevel@tonic-gate 	} else
20607c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
20617c478bd9Sstevel@tonic-gate }
20627c478bd9Sstevel@tonic-gate 
20637c478bd9Sstevel@tonic-gate /*
20647c478bd9Sstevel@tonic-gate  * The real flushing takes place in flushq_common. This is done so that
20657c478bd9Sstevel@tonic-gate  * a flag which specifies whether or not M_PCPROTO messages should be flushed
20667c478bd9Sstevel@tonic-gate  * or not. Currently the only place that uses this flag is the stream head.
20677c478bd9Sstevel@tonic-gate  */
20687c478bd9Sstevel@tonic-gate void
20697c478bd9Sstevel@tonic-gate flushq(queue_t *q, int flag)
20707c478bd9Sstevel@tonic-gate {
20717c478bd9Sstevel@tonic-gate 	flushq_common(q, flag, 0);
20727c478bd9Sstevel@tonic-gate }
20737c478bd9Sstevel@tonic-gate 
20747c478bd9Sstevel@tonic-gate /*
20757c478bd9Sstevel@tonic-gate  * Flush the queue of messages of the given priority band.
20767c478bd9Sstevel@tonic-gate  * There is some duplication of code between flushq and flushband.
20777c478bd9Sstevel@tonic-gate  * This is because we want to optimize the code as much as possible.
20787c478bd9Sstevel@tonic-gate  * The assumption is that there will be more messages in the normal
20797c478bd9Sstevel@tonic-gate  * (priority 0) band than in any other.
20807c478bd9Sstevel@tonic-gate  *
20817c478bd9Sstevel@tonic-gate  * Historical note: when merging the M_FLUSH code in strrput with this
20827c478bd9Sstevel@tonic-gate  * code one difference was discovered. flushband had an extra check for
20837c478bd9Sstevel@tonic-gate  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
20847c478bd9Sstevel@tonic-gate  * case. That check does not match the man page for flushband and was not
20857c478bd9Sstevel@tonic-gate  * in the strrput flush code hence it was removed.
20867c478bd9Sstevel@tonic-gate  */
20877c478bd9Sstevel@tonic-gate void
20887c478bd9Sstevel@tonic-gate flushband(queue_t *q, unsigned char pri, int flag)
20897c478bd9Sstevel@tonic-gate {
20907c478bd9Sstevel@tonic-gate 	mblk_t *mp;
20917c478bd9Sstevel@tonic-gate 	mblk_t *nmp;
20927c478bd9Sstevel@tonic-gate 	mblk_t *last;
20937c478bd9Sstevel@tonic-gate 	qband_t *qbp;
20947c478bd9Sstevel@tonic-gate 	int band;
20957c478bd9Sstevel@tonic-gate 
20967c478bd9Sstevel@tonic-gate 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
20977c478bd9Sstevel@tonic-gate 	if (pri > q->q_nband) {
20987c478bd9Sstevel@tonic-gate 		return;
20997c478bd9Sstevel@tonic-gate 	}
21007c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
21017c478bd9Sstevel@tonic-gate 	if (pri == 0) {
21027c478bd9Sstevel@tonic-gate 		mp = q->q_first;
21037c478bd9Sstevel@tonic-gate 		q->q_first = NULL;
21047c478bd9Sstevel@tonic-gate 		q->q_last = NULL;
21057c478bd9Sstevel@tonic-gate 		q->q_count = 0;
21067c478bd9Sstevel@tonic-gate 		q->q_mblkcnt = 0;
21077c478bd9Sstevel@tonic-gate 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
21087c478bd9Sstevel@tonic-gate 			qbp->qb_first = NULL;
21097c478bd9Sstevel@tonic-gate 			qbp->qb_last = NULL;
21107c478bd9Sstevel@tonic-gate 			qbp->qb_count = 0;
21117c478bd9Sstevel@tonic-gate 			qbp->qb_mblkcnt = 0;
21127c478bd9Sstevel@tonic-gate 			qbp->qb_flag &= ~QB_FULL;
21137c478bd9Sstevel@tonic-gate 		}
21147c478bd9Sstevel@tonic-gate 		q->q_flag &= ~QFULL;
21157c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21167c478bd9Sstevel@tonic-gate 		while (mp) {
21177c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21187c478bd9Sstevel@tonic-gate 			mp->b_next = mp->b_prev = NULL;
21197c478bd9Sstevel@tonic-gate 			if ((mp->b_band == 0) &&
21207c478bd9Sstevel@tonic-gate 				((flag == FLUSHALL) ||
21217c478bd9Sstevel@tonic-gate 				datamsg(mp->b_datap->db_type)))
21227c478bd9Sstevel@tonic-gate 				freemsg(mp);
21237c478bd9Sstevel@tonic-gate 			else
21247c478bd9Sstevel@tonic-gate 				(void) putq(q, mp);
21257c478bd9Sstevel@tonic-gate 			mp = nmp;
21267c478bd9Sstevel@tonic-gate 		}
21277c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
21287c478bd9Sstevel@tonic-gate 		if ((q->q_flag & QWANTW) &&
21297c478bd9Sstevel@tonic-gate 		    (((q->q_count < q->q_lowat) &&
21307c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
21317c478bd9Sstevel@tonic-gate 			q->q_flag &= ~QWANTW;
21327c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21337c478bd9Sstevel@tonic-gate 
2134*116094b2Smicheng 			backenable(q, pri);
21357c478bd9Sstevel@tonic-gate 		} else
21367c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21377c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
21387c478bd9Sstevel@tonic-gate 		boolean_t flushed = B_FALSE;
21397c478bd9Sstevel@tonic-gate 		band = pri;
21407c478bd9Sstevel@tonic-gate 
21417c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
21427c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
21437c478bd9Sstevel@tonic-gate 		while (--band > 0)
21447c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
21457c478bd9Sstevel@tonic-gate 		mp = qbp->qb_first;
21467c478bd9Sstevel@tonic-gate 		if (mp == NULL) {
21477c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
21487c478bd9Sstevel@tonic-gate 			return;
21497c478bd9Sstevel@tonic-gate 		}
21507c478bd9Sstevel@tonic-gate 		last = qbp->qb_last->b_next;
21517c478bd9Sstevel@tonic-gate 		/*
21527c478bd9Sstevel@tonic-gate 		 * rmvq_noenab() and freemsg() are called for each mblk that
21537c478bd9Sstevel@tonic-gate 		 * meets the criteria.  The loop is executed until the last
21547c478bd9Sstevel@tonic-gate 		 * mblk has been processed.
21557c478bd9Sstevel@tonic-gate 		 */
21567c478bd9Sstevel@tonic-gate 		while (mp != last) {
21577c478bd9Sstevel@tonic-gate 			ASSERT(mp->b_band == pri);
21587c478bd9Sstevel@tonic-gate 			nmp = mp->b_next;
21597c478bd9Sstevel@tonic-gate 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
21607c478bd9Sstevel@tonic-gate 				rmvq_noenab(q, mp);
21617c478bd9Sstevel@tonic-gate 				freemsg(mp);
21627c478bd9Sstevel@tonic-gate 				flushed = B_TRUE;
21637c478bd9Sstevel@tonic-gate 			}
21647c478bd9Sstevel@tonic-gate 			mp = nmp;
21657c478bd9Sstevel@tonic-gate 		}
21667c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
21677c478bd9Sstevel@tonic-gate 
21687c478bd9Sstevel@tonic-gate 		/*
21697c478bd9Sstevel@tonic-gate 		 * If any mblk(s) has been freed, we know that qbackenable()
21707c478bd9Sstevel@tonic-gate 		 * will need to be called.
21717c478bd9Sstevel@tonic-gate 		 */
21727c478bd9Sstevel@tonic-gate 		if (flushed)
2173*116094b2Smicheng 			qbackenable(q, pri);
21747c478bd9Sstevel@tonic-gate 	}
21757c478bd9Sstevel@tonic-gate }
21767c478bd9Sstevel@tonic-gate 
21777c478bd9Sstevel@tonic-gate /*
21787c478bd9Sstevel@tonic-gate  * Return 1 if the queue is not full.  If the queue is full, return
21797c478bd9Sstevel@tonic-gate  * 0 (may not put message) and set QWANTW flag (caller wants to write
21807c478bd9Sstevel@tonic-gate  * to the queue).
21817c478bd9Sstevel@tonic-gate  */
21827c478bd9Sstevel@tonic-gate int
21837c478bd9Sstevel@tonic-gate canput(queue_t *q)
21847c478bd9Sstevel@tonic-gate {
21857c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate 	/* this is for loopback transports, they should not do a canput */
21887c478bd9Sstevel@tonic-gate 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
21897c478bd9Sstevel@tonic-gate 
21907c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
21917c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 	if (!(q->q_flag & QFULL)) {
21947c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
21957c478bd9Sstevel@tonic-gate 		return (1);
21967c478bd9Sstevel@tonic-gate 	}
21977c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
21987c478bd9Sstevel@tonic-gate 	if (q->q_flag & QFULL) {
21997c478bd9Sstevel@tonic-gate 		q->q_flag |= QWANTW;
22007c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
22017c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
22027c478bd9Sstevel@tonic-gate 		return (0);
22037c478bd9Sstevel@tonic-gate 	}
22047c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22057c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
22067c478bd9Sstevel@tonic-gate 	return (1);
22077c478bd9Sstevel@tonic-gate }
22087c478bd9Sstevel@tonic-gate 
22097c478bd9Sstevel@tonic-gate /*
22107c478bd9Sstevel@tonic-gate  * This is the new canput for use with priority bands.  Return 1 if the
22117c478bd9Sstevel@tonic-gate  * band is not full.  If the band is full, return 0 (may not put message)
22127c478bd9Sstevel@tonic-gate  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
22137c478bd9Sstevel@tonic-gate  * write to the queue).
22147c478bd9Sstevel@tonic-gate  */
22157c478bd9Sstevel@tonic-gate int
22167c478bd9Sstevel@tonic-gate bcanput(queue_t *q, unsigned char pri)
22177c478bd9Sstevel@tonic-gate {
22187c478bd9Sstevel@tonic-gate 	qband_t *qbp;
22197c478bd9Sstevel@tonic-gate 
22207c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
22217c478bd9Sstevel@tonic-gate 	if (!q)
22227c478bd9Sstevel@tonic-gate 		return (0);
22237c478bd9Sstevel@tonic-gate 
22247c478bd9Sstevel@tonic-gate 	/* Find next forward module that has a service procedure */
22257c478bd9Sstevel@tonic-gate 	q = q->q_nfsrv;
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
22287c478bd9Sstevel@tonic-gate 	if (pri == 0) {
22297c478bd9Sstevel@tonic-gate 		if (q->q_flag & QFULL) {
22307c478bd9Sstevel@tonic-gate 			q->q_flag |= QWANTW;
22317c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22327c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22337c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22347c478bd9Sstevel@tonic-gate 			return (0);
22357c478bd9Sstevel@tonic-gate 		}
22367c478bd9Sstevel@tonic-gate 	} else {	/* pri != 0 */
22377c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
22387c478bd9Sstevel@tonic-gate 			/*
22397c478bd9Sstevel@tonic-gate 			 * No band exists yet, so return success.
22407c478bd9Sstevel@tonic-gate 			 */
22417c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22427c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22437c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 1);
22447c478bd9Sstevel@tonic-gate 			return (1);
22457c478bd9Sstevel@tonic-gate 		}
22467c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
22477c478bd9Sstevel@tonic-gate 		while (--pri)
22487c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
22497c478bd9Sstevel@tonic-gate 		if (qbp->qb_flag & QB_FULL) {
22507c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_WANTW;
22517c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
22527c478bd9Sstevel@tonic-gate 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22537c478bd9Sstevel@tonic-gate 				"bcanput:%p %X %d", q, pri, 0);
22547c478bd9Sstevel@tonic-gate 			return (0);
22557c478bd9Sstevel@tonic-gate 		}
22567c478bd9Sstevel@tonic-gate 	}
22577c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
22587c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
22597c478bd9Sstevel@tonic-gate 		"bcanput:%p %X %d", q, pri, 1);
22607c478bd9Sstevel@tonic-gate 	return (1);
22617c478bd9Sstevel@tonic-gate }
22627c478bd9Sstevel@tonic-gate 
22637c478bd9Sstevel@tonic-gate /*
22647c478bd9Sstevel@tonic-gate  * Put a message on a queue.
22657c478bd9Sstevel@tonic-gate  *
22667c478bd9Sstevel@tonic-gate  * Messages are enqueued on a priority basis.  The priority classes
22677c478bd9Sstevel@tonic-gate  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
22687c478bd9Sstevel@tonic-gate  * and B_NORMAL (type < QPCTL && band == 0).
22697c478bd9Sstevel@tonic-gate  *
22707c478bd9Sstevel@tonic-gate  * Add appropriate weighted data block sizes to queue count.
22717c478bd9Sstevel@tonic-gate  * If queue hits high water mark then set QFULL flag.
22727c478bd9Sstevel@tonic-gate  *
22737c478bd9Sstevel@tonic-gate  * If QNOENAB is not set (putq is allowed to enable the queue),
22747c478bd9Sstevel@tonic-gate  * enable the queue only if the message is PRIORITY,
22757c478bd9Sstevel@tonic-gate  * or the QWANTR flag is set (indicating that the service procedure
22767c478bd9Sstevel@tonic-gate  * is ready to read the queue.  This implies that a service
22777c478bd9Sstevel@tonic-gate  * procedure must NEVER put a high priority message back on its own
22787c478bd9Sstevel@tonic-gate  * queue, as this would result in an infinite loop (!).
22797c478bd9Sstevel@tonic-gate  */
22807c478bd9Sstevel@tonic-gate int
22817c478bd9Sstevel@tonic-gate putq(queue_t *q, mblk_t *bp)
22827c478bd9Sstevel@tonic-gate {
22837c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
22847c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
22857c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
22867c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
22877c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
22887c478bd9Sstevel@tonic-gate 
22897c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
22907c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
22917c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
22927c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
22937c478bd9Sstevel@tonic-gate 	} else
22947c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
22957c478bd9Sstevel@tonic-gate 
22967c478bd9Sstevel@tonic-gate 	/*
22977c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
22987c478bd9Sstevel@tonic-gate 	 * allocated, do so.
22997c478bd9Sstevel@tonic-gate 	 */
23007c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
23017c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
23027c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
23037c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
23047c478bd9Sstevel@tonic-gate 		int i;
23057c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
23087c478bd9Sstevel@tonic-gate 
23097c478bd9Sstevel@tonic-gate 			/*
23107c478bd9Sstevel@tonic-gate 			 * The qband structure for this priority band is
23117c478bd9Sstevel@tonic-gate 			 * not on the queue yet, so we have to allocate
23127c478bd9Sstevel@tonic-gate 			 * one on the fly.  It would be wasteful to
23137c478bd9Sstevel@tonic-gate 			 * associate the qband structures with every
23147c478bd9Sstevel@tonic-gate 			 * queue when the queues are allocated.  This is
23157c478bd9Sstevel@tonic-gate 			 * because most queues will only need the normal
23167c478bd9Sstevel@tonic-gate 			 * band of flow which can be described entirely
23177c478bd9Sstevel@tonic-gate 			 * by the queue itself.
23187c478bd9Sstevel@tonic-gate 			 */
23197c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
23207c478bd9Sstevel@tonic-gate 			while (*qbpp)
23217c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23227c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
23237c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
23247c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
23257c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
23267c478bd9Sstevel@tonic-gate 					return (0);
23277c478bd9Sstevel@tonic-gate 				}
23287c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
23297c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
23307c478bd9Sstevel@tonic-gate 				q->q_nband++;
23317c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
23327c478bd9Sstevel@tonic-gate 			}
23337c478bd9Sstevel@tonic-gate 		}
23347c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
23357c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
23367c478bd9Sstevel@tonic-gate 		i = bp->b_band;
23377c478bd9Sstevel@tonic-gate 		while (--i)
23387c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
23397c478bd9Sstevel@tonic-gate 	}
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate 	/*
23427c478bd9Sstevel@tonic-gate 	 * If queue is empty, add the message and initialize the pointers.
23437c478bd9Sstevel@tonic-gate 	 * Otherwise, adjust message pointers and queue pointers based on
23447c478bd9Sstevel@tonic-gate 	 * the type of the message and where it belongs on the queue.  Some
23457c478bd9Sstevel@tonic-gate 	 * code is duplicated to minimize the number of conditionals and
23467c478bd9Sstevel@tonic-gate 	 * hopefully minimize the amount of time this routine takes.
23477c478bd9Sstevel@tonic-gate 	 */
23487c478bd9Sstevel@tonic-gate 	if (!q->q_first) {
23497c478bd9Sstevel@tonic-gate 		bp->b_next = NULL;
23507c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
23517c478bd9Sstevel@tonic-gate 		q->q_first = bp;
23527c478bd9Sstevel@tonic-gate 		q->q_last = bp;
23537c478bd9Sstevel@tonic-gate 		if (qbp) {
23547c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
23557c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
23567c478bd9Sstevel@tonic-gate 		}
23577c478bd9Sstevel@tonic-gate 	} else if (!qbp) {	/* bp->b_band == 0 */
23587c478bd9Sstevel@tonic-gate 
23597c478bd9Sstevel@tonic-gate 		/*
23607c478bd9Sstevel@tonic-gate 		 * If queue class of message is less than or equal to
23617c478bd9Sstevel@tonic-gate 		 * that of the last one on the queue, tack on to the end.
23627c478bd9Sstevel@tonic-gate 		 */
23637c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
23647c478bd9Sstevel@tonic-gate 		if (mcls <= (int)queclass(tmp)) {
23657c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
23667c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
23677c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
23687c478bd9Sstevel@tonic-gate 			q->q_last = bp;
23697c478bd9Sstevel@tonic-gate 		} else {
23707c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
23717c478bd9Sstevel@tonic-gate 			while ((int)queclass(tmp) >= mcls)
23727c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
23737c478bd9Sstevel@tonic-gate 
23747c478bd9Sstevel@tonic-gate 			/*
23757c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
23767c478bd9Sstevel@tonic-gate 			 */
23777c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
23787c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
23797c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
23807c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
23817c478bd9Sstevel@tonic-gate 			else
23827c478bd9Sstevel@tonic-gate 				q->q_first = bp;
23837c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
23847c478bd9Sstevel@tonic-gate 		}
23857c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band != 0 */
23867c478bd9Sstevel@tonic-gate 		if (qbp->qb_first) {
23877c478bd9Sstevel@tonic-gate 			tmp = qbp->qb_last;
23887c478bd9Sstevel@tonic-gate 
23897c478bd9Sstevel@tonic-gate 			/*
23907c478bd9Sstevel@tonic-gate 			 * Insert bp after the last message in this band.
23917c478bd9Sstevel@tonic-gate 			 */
23927c478bd9Sstevel@tonic-gate 			bp->b_next = tmp->b_next;
23937c478bd9Sstevel@tonic-gate 			if (tmp->b_next)
23947c478bd9Sstevel@tonic-gate 				tmp->b_next->b_prev = bp;
23957c478bd9Sstevel@tonic-gate 			else
23967c478bd9Sstevel@tonic-gate 				q->q_last = bp;
23977c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
23987c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
23997c478bd9Sstevel@tonic-gate 		} else {
24007c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
24017c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
24027c478bd9Sstevel@tonic-gate 			    (bp->b_band <= tmp->b_band)) {
24037c478bd9Sstevel@tonic-gate 
24047c478bd9Sstevel@tonic-gate 				/*
24057c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
24067c478bd9Sstevel@tonic-gate 				 */
24077c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
24087c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
24097c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
24107c478bd9Sstevel@tonic-gate 				q->q_last = bp;
24117c478bd9Sstevel@tonic-gate 			} else {
24127c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
24137c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
24147c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24157c478bd9Sstevel@tonic-gate 				while (tmp->b_band >= bp->b_band)
24167c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
24177c478bd9Sstevel@tonic-gate 
24187c478bd9Sstevel@tonic-gate 				/*
24197c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
24207c478bd9Sstevel@tonic-gate 				 */
24217c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
24227c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
24237c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
24247c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
24257c478bd9Sstevel@tonic-gate 				else
24267c478bd9Sstevel@tonic-gate 					q->q_first = bp;
24277c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
24287c478bd9Sstevel@tonic-gate 			}
24297c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
24307c478bd9Sstevel@tonic-gate 		}
24317c478bd9Sstevel@tonic-gate 		qbp->qb_last = bp;
24327c478bd9Sstevel@tonic-gate 	}
24337c478bd9Sstevel@tonic-gate 
24347c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
24357c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
24367c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
24377c478bd9Sstevel@tonic-gate 		mblkcnt++;
24387c478bd9Sstevel@tonic-gate 	}
24397c478bd9Sstevel@tonic-gate 	if (qbp) {
24407c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
24417c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
24427c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
24437c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
24447c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
24457c478bd9Sstevel@tonic-gate 		}
24467c478bd9Sstevel@tonic-gate 	} else {
24477c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
24487c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
24497c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
24507c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
24517c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
24527c478bd9Sstevel@tonic-gate 		}
24537c478bd9Sstevel@tonic-gate 	}
24547c478bd9Sstevel@tonic-gate 
24557c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) ||
24587c478bd9Sstevel@tonic-gate 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
24597c478bd9Sstevel@tonic-gate 		qenable_locked(q);
24607c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
24617c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
24627c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
24637c478bd9Sstevel@tonic-gate 
24647c478bd9Sstevel@tonic-gate 	return (1);
24657c478bd9Sstevel@tonic-gate }
24667c478bd9Sstevel@tonic-gate 
24677c478bd9Sstevel@tonic-gate /*
24687c478bd9Sstevel@tonic-gate  * Put stuff back at beginning of Q according to priority order.
24697c478bd9Sstevel@tonic-gate  * See comment on putq above for details.
24707c478bd9Sstevel@tonic-gate  */
24717c478bd9Sstevel@tonic-gate int
24727c478bd9Sstevel@tonic-gate putbq(queue_t *q, mblk_t *bp)
24737c478bd9Sstevel@tonic-gate {
24747c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
24757c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
24767c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(bp);
24777c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
24787c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
24797c478bd9Sstevel@tonic-gate 
24807c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
24817c478bd9Sstevel@tonic-gate 	ASSERT(bp->b_next == NULL);
24827c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
24837c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
24847c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
24857c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
24867c478bd9Sstevel@tonic-gate 	} else
24877c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
24887c478bd9Sstevel@tonic-gate 
24897c478bd9Sstevel@tonic-gate 	/*
24907c478bd9Sstevel@tonic-gate 	 * Make sanity checks and if qband structure is not yet
24917c478bd9Sstevel@tonic-gate 	 * allocated, do so.
24927c478bd9Sstevel@tonic-gate 	 */
24937c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
24947c478bd9Sstevel@tonic-gate 		if (bp->b_band != 0)
24957c478bd9Sstevel@tonic-gate 			bp->b_band = 0;		/* force to be correct */
24967c478bd9Sstevel@tonic-gate 	} else if (bp->b_band != 0) {
24977c478bd9Sstevel@tonic-gate 		int i;
24987c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
24997c478bd9Sstevel@tonic-gate 
25007c478bd9Sstevel@tonic-gate 		if (bp->b_band > q->q_nband) {
25017c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
25027c478bd9Sstevel@tonic-gate 			while (*qbpp)
25037c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25047c478bd9Sstevel@tonic-gate 			while (bp->b_band > q->q_nband) {
25057c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
25067c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
25077c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
25087c478bd9Sstevel@tonic-gate 					return (0);
25097c478bd9Sstevel@tonic-gate 				}
25107c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
25117c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
25127c478bd9Sstevel@tonic-gate 				q->q_nband++;
25137c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
25147c478bd9Sstevel@tonic-gate 			}
25157c478bd9Sstevel@tonic-gate 		}
25167c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
25177c478bd9Sstevel@tonic-gate 		i = bp->b_band;
25187c478bd9Sstevel@tonic-gate 		while (--i)
25197c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
25207c478bd9Sstevel@tonic-gate 	}
25217c478bd9Sstevel@tonic-gate 
25227c478bd9Sstevel@tonic-gate 	/*
25237c478bd9Sstevel@tonic-gate 	 * If queue is empty or if message is high priority,
25247c478bd9Sstevel@tonic-gate 	 * place on the front of the queue.
25257c478bd9Sstevel@tonic-gate 	 */
25267c478bd9Sstevel@tonic-gate 	tmp = q->q_first;
25277c478bd9Sstevel@tonic-gate 	if ((!tmp) || (mcls == QPCTL)) {
25287c478bd9Sstevel@tonic-gate 		bp->b_next = tmp;
25297c478bd9Sstevel@tonic-gate 		if (tmp)
25307c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25317c478bd9Sstevel@tonic-gate 		else
25327c478bd9Sstevel@tonic-gate 			q->q_last = bp;
25337c478bd9Sstevel@tonic-gate 		q->q_first = bp;
25347c478bd9Sstevel@tonic-gate 		bp->b_prev = NULL;
25357c478bd9Sstevel@tonic-gate 		if (qbp) {
25367c478bd9Sstevel@tonic-gate 			qbp->qb_first = bp;
25377c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
25387c478bd9Sstevel@tonic-gate 		}
25397c478bd9Sstevel@tonic-gate 	} else if (qbp) {	/* bp->b_band != 0 */
25407c478bd9Sstevel@tonic-gate 		tmp = qbp->qb_first;
25417c478bd9Sstevel@tonic-gate 		if (tmp) {
25427c478bd9Sstevel@tonic-gate 
25437c478bd9Sstevel@tonic-gate 			/*
25447c478bd9Sstevel@tonic-gate 			 * Insert bp before the first message in this band.
25457c478bd9Sstevel@tonic-gate 			 */
25467c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
25477c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
25487c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
25497c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
25507c478bd9Sstevel@tonic-gate 			else
25517c478bd9Sstevel@tonic-gate 				q->q_first = bp;
25527c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
25537c478bd9Sstevel@tonic-gate 		} else {
25547c478bd9Sstevel@tonic-gate 			tmp = q->q_last;
25557c478bd9Sstevel@tonic-gate 			if ((mcls < (int)queclass(tmp)) ||
25567c478bd9Sstevel@tonic-gate 			    (bp->b_band < tmp->b_band)) {
25577c478bd9Sstevel@tonic-gate 
25587c478bd9Sstevel@tonic-gate 				/*
25597c478bd9Sstevel@tonic-gate 				 * Tack bp on end of queue.
25607c478bd9Sstevel@tonic-gate 				 */
25617c478bd9Sstevel@tonic-gate 				bp->b_next = NULL;
25627c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp;
25637c478bd9Sstevel@tonic-gate 				tmp->b_next = bp;
25647c478bd9Sstevel@tonic-gate 				q->q_last = bp;
25657c478bd9Sstevel@tonic-gate 			} else {
25667c478bd9Sstevel@tonic-gate 				tmp = q->q_first;
25677c478bd9Sstevel@tonic-gate 				while (tmp->b_datap->db_type >= QPCTL)
25687c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25697c478bd9Sstevel@tonic-gate 				while (tmp->b_band > bp->b_band)
25707c478bd9Sstevel@tonic-gate 					tmp = tmp->b_next;
25717c478bd9Sstevel@tonic-gate 
25727c478bd9Sstevel@tonic-gate 				/*
25737c478bd9Sstevel@tonic-gate 				 * Insert bp before tmp.
25747c478bd9Sstevel@tonic-gate 				 */
25757c478bd9Sstevel@tonic-gate 				bp->b_next = tmp;
25767c478bd9Sstevel@tonic-gate 				bp->b_prev = tmp->b_prev;
25777c478bd9Sstevel@tonic-gate 				if (tmp->b_prev)
25787c478bd9Sstevel@tonic-gate 					tmp->b_prev->b_next = bp;
25797c478bd9Sstevel@tonic-gate 				else
25807c478bd9Sstevel@tonic-gate 					q->q_first = bp;
25817c478bd9Sstevel@tonic-gate 				tmp->b_prev = bp;
25827c478bd9Sstevel@tonic-gate 			}
25837c478bd9Sstevel@tonic-gate 			qbp->qb_last = bp;
25847c478bd9Sstevel@tonic-gate 		}
25857c478bd9Sstevel@tonic-gate 		qbp->qb_first = bp;
25867c478bd9Sstevel@tonic-gate 	} else {		/* bp->b_band == 0 && !QPCTL */
25877c478bd9Sstevel@tonic-gate 
25887c478bd9Sstevel@tonic-gate 		/*
25897c478bd9Sstevel@tonic-gate 		 * If the queue class or band is less than that of the last
25907c478bd9Sstevel@tonic-gate 		 * message on the queue, tack bp on the end of the queue.
25917c478bd9Sstevel@tonic-gate 		 */
25927c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
25937c478bd9Sstevel@tonic-gate 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
25947c478bd9Sstevel@tonic-gate 			bp->b_next = NULL;
25957c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp;
25967c478bd9Sstevel@tonic-gate 			tmp->b_next = bp;
25977c478bd9Sstevel@tonic-gate 			q->q_last = bp;
25987c478bd9Sstevel@tonic-gate 		} else {
25997c478bd9Sstevel@tonic-gate 			tmp = q->q_first;
26007c478bd9Sstevel@tonic-gate 			while (tmp->b_datap->db_type >= QPCTL)
26017c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26027c478bd9Sstevel@tonic-gate 			while (tmp->b_band > bp->b_band)
26037c478bd9Sstevel@tonic-gate 				tmp = tmp->b_next;
26047c478bd9Sstevel@tonic-gate 
26057c478bd9Sstevel@tonic-gate 			/*
26067c478bd9Sstevel@tonic-gate 			 * Insert bp before tmp.
26077c478bd9Sstevel@tonic-gate 			 */
26087c478bd9Sstevel@tonic-gate 			bp->b_next = tmp;
26097c478bd9Sstevel@tonic-gate 			bp->b_prev = tmp->b_prev;
26107c478bd9Sstevel@tonic-gate 			if (tmp->b_prev)
26117c478bd9Sstevel@tonic-gate 				tmp->b_prev->b_next = bp;
26127c478bd9Sstevel@tonic-gate 			else
26137c478bd9Sstevel@tonic-gate 				q->q_first = bp;
26147c478bd9Sstevel@tonic-gate 			tmp->b_prev = bp;
26157c478bd9Sstevel@tonic-gate 		}
26167c478bd9Sstevel@tonic-gate 	}
26177c478bd9Sstevel@tonic-gate 
26187c478bd9Sstevel@tonic-gate 	/* Get message byte count for q_count accounting */
26197c478bd9Sstevel@tonic-gate 	for (tmp = bp; tmp; tmp = tmp->b_cont) {
26207c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
26217c478bd9Sstevel@tonic-gate 		mblkcnt++;
26227c478bd9Sstevel@tonic-gate 	}
26237c478bd9Sstevel@tonic-gate 	if (qbp) {
26247c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
26257c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
26267c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
26277c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
26287c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
26297c478bd9Sstevel@tonic-gate 		}
26307c478bd9Sstevel@tonic-gate 	} else {
26317c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
26327c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
26337c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
26347c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
26357c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
26367c478bd9Sstevel@tonic-gate 		}
26377c478bd9Sstevel@tonic-gate 	}
26387c478bd9Sstevel@tonic-gate 
26397c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
26407c478bd9Sstevel@tonic-gate 
26417c478bd9Sstevel@tonic-gate 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
26427c478bd9Sstevel@tonic-gate 		qenable_locked(q);
26437c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
26447c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
26457c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
26467c478bd9Sstevel@tonic-gate 
26477c478bd9Sstevel@tonic-gate 	return (1);
26487c478bd9Sstevel@tonic-gate }
26497c478bd9Sstevel@tonic-gate 
26507c478bd9Sstevel@tonic-gate /*
26517c478bd9Sstevel@tonic-gate  * Insert a message before an existing message on the queue.  If the
26527c478bd9Sstevel@tonic-gate  * existing message is NULL, the new messages is placed on the end of
26537c478bd9Sstevel@tonic-gate  * the queue.  The queue class of the new message is ignored.  However,
26547c478bd9Sstevel@tonic-gate  * the priority band of the new message must adhere to the following
26557c478bd9Sstevel@tonic-gate  * ordering:
26567c478bd9Sstevel@tonic-gate  *
26577c478bd9Sstevel@tonic-gate  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
26587c478bd9Sstevel@tonic-gate  *
26597c478bd9Sstevel@tonic-gate  * All flow control parameters are updated.
26607c478bd9Sstevel@tonic-gate  *
26617c478bd9Sstevel@tonic-gate  * insq can be called with the stream frozen, but other utility functions
26627c478bd9Sstevel@tonic-gate  * holding QLOCK, and by streams modules without any locks/frozen.
26637c478bd9Sstevel@tonic-gate  */
26647c478bd9Sstevel@tonic-gate int
26657c478bd9Sstevel@tonic-gate insq(queue_t *q, mblk_t *emp, mblk_t *mp)
26667c478bd9Sstevel@tonic-gate {
26677c478bd9Sstevel@tonic-gate 	mblk_t *tmp;
26687c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
26697c478bd9Sstevel@tonic-gate 	int mcls = (int)queclass(mp);
26707c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
26717c478bd9Sstevel@tonic-gate 	int	bytecnt = 0, mblkcnt = 0;
26727c478bd9Sstevel@tonic-gate 
26737c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
26747c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
26757c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
26767c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
26777c478bd9Sstevel@tonic-gate 	} else if (MUTEX_HELD(QLOCK(q))) {
26787c478bd9Sstevel@tonic-gate 		/* Don't drop lock on exit */
26797c478bd9Sstevel@tonic-gate 		freezer = curthread;
26807c478bd9Sstevel@tonic-gate 	} else
26817c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
26827c478bd9Sstevel@tonic-gate 
26837c478bd9Sstevel@tonic-gate 	if (mcls == QPCTL) {
26847c478bd9Sstevel@tonic-gate 		if (mp->b_band != 0)
26857c478bd9Sstevel@tonic-gate 			mp->b_band = 0;		/* force to be correct */
26867c478bd9Sstevel@tonic-gate 		if (emp && emp->b_prev &&
26877c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_datap->db_type < QPCTL))
26887c478bd9Sstevel@tonic-gate 			goto badord;
26897c478bd9Sstevel@tonic-gate 	}
26907c478bd9Sstevel@tonic-gate 	if (emp) {
26917c478bd9Sstevel@tonic-gate 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
26927c478bd9Sstevel@tonic-gate 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
26937c478bd9Sstevel@tonic-gate 		    (emp->b_prev->b_band < mp->b_band))) {
26947c478bd9Sstevel@tonic-gate 			goto badord;
26957c478bd9Sstevel@tonic-gate 		}
26967c478bd9Sstevel@tonic-gate 	} else {
26977c478bd9Sstevel@tonic-gate 		tmp = q->q_last;
26987c478bd9Sstevel@tonic-gate 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
26997c478bd9Sstevel@tonic-gate badord:
27007c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
27017c478bd9Sstevel@tonic-gate 			    "insq: attempt to insert message out of order "
27027c478bd9Sstevel@tonic-gate 			    "on q %p", (void *)q);
27037c478bd9Sstevel@tonic-gate 			if (freezer != curthread)
27047c478bd9Sstevel@tonic-gate 				mutex_exit(QLOCK(q));
27057c478bd9Sstevel@tonic-gate 			return (0);
27067c478bd9Sstevel@tonic-gate 		}
27077c478bd9Sstevel@tonic-gate 	}
27087c478bd9Sstevel@tonic-gate 
27097c478bd9Sstevel@tonic-gate 	if (mp->b_band != 0) {
27107c478bd9Sstevel@tonic-gate 		int i;
27117c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
27127c478bd9Sstevel@tonic-gate 
27137c478bd9Sstevel@tonic-gate 		if (mp->b_band > q->q_nband) {
27147c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
27157c478bd9Sstevel@tonic-gate 			while (*qbpp)
27167c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27177c478bd9Sstevel@tonic-gate 			while (mp->b_band > q->q_nband) {
27187c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
27197c478bd9Sstevel@tonic-gate 					if (freezer != curthread)
27207c478bd9Sstevel@tonic-gate 						mutex_exit(QLOCK(q));
27217c478bd9Sstevel@tonic-gate 					return (0);
27227c478bd9Sstevel@tonic-gate 				}
27237c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
27247c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
27257c478bd9Sstevel@tonic-gate 				q->q_nband++;
27267c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
27277c478bd9Sstevel@tonic-gate 			}
27287c478bd9Sstevel@tonic-gate 		}
27297c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
27307c478bd9Sstevel@tonic-gate 		i = mp->b_band;
27317c478bd9Sstevel@tonic-gate 		while (--i)
27327c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
27337c478bd9Sstevel@tonic-gate 	}
27347c478bd9Sstevel@tonic-gate 
27357c478bd9Sstevel@tonic-gate 	if ((mp->b_next = emp) != NULL) {
27367c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = emp->b_prev) != NULL)
27377c478bd9Sstevel@tonic-gate 			emp->b_prev->b_next = mp;
27387c478bd9Sstevel@tonic-gate 		else
27397c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27407c478bd9Sstevel@tonic-gate 		emp->b_prev = mp;
27417c478bd9Sstevel@tonic-gate 	} else {
27427c478bd9Sstevel@tonic-gate 		if ((mp->b_prev = q->q_last) != NULL)
27437c478bd9Sstevel@tonic-gate 			q->q_last->b_next = mp;
27447c478bd9Sstevel@tonic-gate 		else
27457c478bd9Sstevel@tonic-gate 			q->q_first = mp;
27467c478bd9Sstevel@tonic-gate 		q->q_last = mp;
27477c478bd9Sstevel@tonic-gate 	}
27487c478bd9Sstevel@tonic-gate 
27497c478bd9Sstevel@tonic-gate 	/* Get mblk and byte count for q_count accounting */
27507c478bd9Sstevel@tonic-gate 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
27517c478bd9Sstevel@tonic-gate 		bytecnt += (tmp->b_wptr - tmp->b_rptr);
27527c478bd9Sstevel@tonic-gate 		mblkcnt++;
27537c478bd9Sstevel@tonic-gate 	}
27547c478bd9Sstevel@tonic-gate 
27557c478bd9Sstevel@tonic-gate 	if (qbp) {	/* adjust qband pointers and count */
27567c478bd9Sstevel@tonic-gate 		if (!qbp->qb_first) {
27577c478bd9Sstevel@tonic-gate 			qbp->qb_first = mp;
27587c478bd9Sstevel@tonic-gate 			qbp->qb_last = mp;
27597c478bd9Sstevel@tonic-gate 		} else {
27607c478bd9Sstevel@tonic-gate 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
27617c478bd9Sstevel@tonic-gate 			    (mp->b_prev->b_band != mp->b_band)))
27627c478bd9Sstevel@tonic-gate 				qbp->qb_first = mp;
27637c478bd9Sstevel@tonic-gate 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
27647c478bd9Sstevel@tonic-gate 			    (mp->b_next->b_band != mp->b_band)))
27657c478bd9Sstevel@tonic-gate 				qbp->qb_last = mp;
27667c478bd9Sstevel@tonic-gate 		}
27677c478bd9Sstevel@tonic-gate 		qbp->qb_count += bytecnt;
27687c478bd9Sstevel@tonic-gate 		qbp->qb_mblkcnt += mblkcnt;
27697c478bd9Sstevel@tonic-gate 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
27707c478bd9Sstevel@tonic-gate 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
27717c478bd9Sstevel@tonic-gate 			qbp->qb_flag |= QB_FULL;
27727c478bd9Sstevel@tonic-gate 		}
27737c478bd9Sstevel@tonic-gate 	} else {
27747c478bd9Sstevel@tonic-gate 		q->q_count += bytecnt;
27757c478bd9Sstevel@tonic-gate 		q->q_mblkcnt += mblkcnt;
27767c478bd9Sstevel@tonic-gate 		if ((q->q_count >= q->q_hiwat) ||
27777c478bd9Sstevel@tonic-gate 		    (q->q_mblkcnt >= q->q_hiwat)) {
27787c478bd9Sstevel@tonic-gate 			q->q_flag |= QFULL;
27797c478bd9Sstevel@tonic-gate 		}
27807c478bd9Sstevel@tonic-gate 	}
27817c478bd9Sstevel@tonic-gate 
27827c478bd9Sstevel@tonic-gate 	STR_FTEVENT_MSG(mp, q, FTEV_INSQ, NULL);
27837c478bd9Sstevel@tonic-gate 
27847c478bd9Sstevel@tonic-gate 	if (canenable(q) && (q->q_flag & QWANTR))
27857c478bd9Sstevel@tonic-gate 		qenable_locked(q);
27867c478bd9Sstevel@tonic-gate 
27877c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(QLOCK(q)));
27887c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
27897c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
27907c478bd9Sstevel@tonic-gate 
27917c478bd9Sstevel@tonic-gate 	return (1);
27927c478bd9Sstevel@tonic-gate }
27937c478bd9Sstevel@tonic-gate 
27947c478bd9Sstevel@tonic-gate /*
27957c478bd9Sstevel@tonic-gate  * Create and put a control message on queue.
27967c478bd9Sstevel@tonic-gate  */
27977c478bd9Sstevel@tonic-gate int
27987c478bd9Sstevel@tonic-gate putctl(queue_t *q, int type)
27997c478bd9Sstevel@tonic-gate {
28007c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28017c478bd9Sstevel@tonic-gate 
28027c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28037c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(0)) == NULL)
28047c478bd9Sstevel@tonic-gate 		return (0);
28057c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char) type;
28067c478bd9Sstevel@tonic-gate 
28077c478bd9Sstevel@tonic-gate 	put(q, bp);
28087c478bd9Sstevel@tonic-gate 
28097c478bd9Sstevel@tonic-gate 	return (1);
28107c478bd9Sstevel@tonic-gate }
28117c478bd9Sstevel@tonic-gate 
28127c478bd9Sstevel@tonic-gate /*
28137c478bd9Sstevel@tonic-gate  * Control message with a single-byte parameter
28147c478bd9Sstevel@tonic-gate  */
28157c478bd9Sstevel@tonic-gate int
28167c478bd9Sstevel@tonic-gate putctl1(queue_t *q, int type, int param)
28177c478bd9Sstevel@tonic-gate {
28187c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28197c478bd9Sstevel@tonic-gate 
28207c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28217c478bd9Sstevel@tonic-gate 	    (bp = allocb_tryhard(1)) == NULL)
28227c478bd9Sstevel@tonic-gate 		return (0);
28237c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28247c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28257c478bd9Sstevel@tonic-gate 
28267c478bd9Sstevel@tonic-gate 	put(q, bp);
28277c478bd9Sstevel@tonic-gate 
28287c478bd9Sstevel@tonic-gate 	return (1);
28297c478bd9Sstevel@tonic-gate }
28307c478bd9Sstevel@tonic-gate 
28317c478bd9Sstevel@tonic-gate int
28327c478bd9Sstevel@tonic-gate putnextctl1(queue_t *q, int type, int param)
28337c478bd9Sstevel@tonic-gate {
28347c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28357c478bd9Sstevel@tonic-gate 
28367c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28377c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(1)) == NULL))
28387c478bd9Sstevel@tonic-gate 		return (0);
28397c478bd9Sstevel@tonic-gate 
28407c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28417c478bd9Sstevel@tonic-gate 	*bp->b_wptr++ = (unsigned char)param;
28427c478bd9Sstevel@tonic-gate 
28437c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 	return (1);
28467c478bd9Sstevel@tonic-gate }
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate int
28497c478bd9Sstevel@tonic-gate putnextctl(queue_t *q, int type)
28507c478bd9Sstevel@tonic-gate {
28517c478bd9Sstevel@tonic-gate 	mblk_t *bp;
28527c478bd9Sstevel@tonic-gate 
28537c478bd9Sstevel@tonic-gate 	if ((datamsg(type) && (type != M_DELAY)) ||
28547c478bd9Sstevel@tonic-gate 		((bp = allocb_tryhard(0)) == NULL))
28557c478bd9Sstevel@tonic-gate 		return (0);
28567c478bd9Sstevel@tonic-gate 	bp->b_datap->db_type = (unsigned char)type;
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 	putnext(q, bp);
28597c478bd9Sstevel@tonic-gate 
28607c478bd9Sstevel@tonic-gate 	return (1);
28617c478bd9Sstevel@tonic-gate }
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate /*
28647c478bd9Sstevel@tonic-gate  * Return the queue upstream from this one
28657c478bd9Sstevel@tonic-gate  */
28667c478bd9Sstevel@tonic-gate queue_t *
28677c478bd9Sstevel@tonic-gate backq(queue_t *q)
28687c478bd9Sstevel@tonic-gate {
28697c478bd9Sstevel@tonic-gate 	q = _OTHERQ(q);
28707c478bd9Sstevel@tonic-gate 	if (q->q_next) {
28717c478bd9Sstevel@tonic-gate 		q = q->q_next;
28727c478bd9Sstevel@tonic-gate 		return (_OTHERQ(q));
28737c478bd9Sstevel@tonic-gate 	}
28747c478bd9Sstevel@tonic-gate 	return (NULL);
28757c478bd9Sstevel@tonic-gate }
28767c478bd9Sstevel@tonic-gate 
28777c478bd9Sstevel@tonic-gate /*
28787c478bd9Sstevel@tonic-gate  * Send a block back up the queue in reverse from this
28797c478bd9Sstevel@tonic-gate  * one (e.g. to respond to ioctls)
28807c478bd9Sstevel@tonic-gate  */
28817c478bd9Sstevel@tonic-gate void
28827c478bd9Sstevel@tonic-gate qreply(queue_t *q, mblk_t *bp)
28837c478bd9Sstevel@tonic-gate {
28847c478bd9Sstevel@tonic-gate 	ASSERT(q && bp);
28857c478bd9Sstevel@tonic-gate 
28867c478bd9Sstevel@tonic-gate 	putnext(_OTHERQ(q), bp);
28877c478bd9Sstevel@tonic-gate }
28887c478bd9Sstevel@tonic-gate 
28897c478bd9Sstevel@tonic-gate /*
28907c478bd9Sstevel@tonic-gate  * Streams Queue Scheduling
28917c478bd9Sstevel@tonic-gate  *
28927c478bd9Sstevel@tonic-gate  * Queues are enabled through qenable() when they have messages to
28937c478bd9Sstevel@tonic-gate  * process.  They are serviced by queuerun(), which runs each enabled
28947c478bd9Sstevel@tonic-gate  * queue's service procedure.  The call to queuerun() is processor
28957c478bd9Sstevel@tonic-gate  * dependent - the general principle is that it be run whenever a queue
28967c478bd9Sstevel@tonic-gate  * is enabled but before returning to user level.  For system calls,
28977c478bd9Sstevel@tonic-gate  * the function runqueues() is called if their action causes a queue
28987c478bd9Sstevel@tonic-gate  * to be enabled.  For device interrupts, queuerun() should be
28997c478bd9Sstevel@tonic-gate  * called before returning from the last level of interrupt.  Beyond
29007c478bd9Sstevel@tonic-gate  * this, no timing assumptions should be made about queue scheduling.
29017c478bd9Sstevel@tonic-gate  */
29027c478bd9Sstevel@tonic-gate 
29037c478bd9Sstevel@tonic-gate /*
29047c478bd9Sstevel@tonic-gate  * Enable a queue: put it on list of those whose service procedures are
29057c478bd9Sstevel@tonic-gate  * ready to run and set up the scheduling mechanism.
29067c478bd9Sstevel@tonic-gate  * The broadcast is done outside the mutex -> to avoid the woken thread
29077c478bd9Sstevel@tonic-gate  * from contending with the mutex. This is OK 'cos the queue has been
29087c478bd9Sstevel@tonic-gate  * enqueued on the runlist and flagged safely at this point.
29097c478bd9Sstevel@tonic-gate  */
29107c478bd9Sstevel@tonic-gate void
29117c478bd9Sstevel@tonic-gate qenable(queue_t *q)
29127c478bd9Sstevel@tonic-gate {
29137c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29147c478bd9Sstevel@tonic-gate 	qenable_locked(q);
29157c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29167c478bd9Sstevel@tonic-gate }
29177c478bd9Sstevel@tonic-gate /*
29187c478bd9Sstevel@tonic-gate  * Return number of messages on queue
29197c478bd9Sstevel@tonic-gate  */
29207c478bd9Sstevel@tonic-gate int
29217c478bd9Sstevel@tonic-gate qsize(queue_t *qp)
29227c478bd9Sstevel@tonic-gate {
29237c478bd9Sstevel@tonic-gate 	int count = 0;
29247c478bd9Sstevel@tonic-gate 	mblk_t *mp;
29257c478bd9Sstevel@tonic-gate 
29267c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
29277c478bd9Sstevel@tonic-gate 	for (mp = qp->q_first; mp; mp = mp->b_next)
29287c478bd9Sstevel@tonic-gate 		count++;
29297c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
29307c478bd9Sstevel@tonic-gate 	return (count);
29317c478bd9Sstevel@tonic-gate }
29327c478bd9Sstevel@tonic-gate 
29337c478bd9Sstevel@tonic-gate /*
29347c478bd9Sstevel@tonic-gate  * noenable - set queue so that putq() will not enable it.
29357c478bd9Sstevel@tonic-gate  * enableok - set queue so that putq() can enable it.
29367c478bd9Sstevel@tonic-gate  */
29377c478bd9Sstevel@tonic-gate void
29387c478bd9Sstevel@tonic-gate noenable(queue_t *q)
29397c478bd9Sstevel@tonic-gate {
29407c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29417c478bd9Sstevel@tonic-gate 	q->q_flag |= QNOENB;
29427c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29437c478bd9Sstevel@tonic-gate }
29447c478bd9Sstevel@tonic-gate 
29457c478bd9Sstevel@tonic-gate void
29467c478bd9Sstevel@tonic-gate enableok(queue_t *q)
29477c478bd9Sstevel@tonic-gate {
29487c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(q));
29497c478bd9Sstevel@tonic-gate 	q->q_flag &= ~QNOENB;
29507c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(q));
29517c478bd9Sstevel@tonic-gate }
29527c478bd9Sstevel@tonic-gate 
29537c478bd9Sstevel@tonic-gate /*
29547c478bd9Sstevel@tonic-gate  * Set queue fields.
29557c478bd9Sstevel@tonic-gate  */
29567c478bd9Sstevel@tonic-gate int
29577c478bd9Sstevel@tonic-gate strqset(queue_t *q, qfields_t what, unsigned char pri, intptr_t val)
29587c478bd9Sstevel@tonic-gate {
29597c478bd9Sstevel@tonic-gate 	qband_t *qbp = NULL;
29607c478bd9Sstevel@tonic-gate 	queue_t	*wrq;
29617c478bd9Sstevel@tonic-gate 	int error = 0;
29627c478bd9Sstevel@tonic-gate 	kthread_id_t freezer;
29637c478bd9Sstevel@tonic-gate 
29647c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
29657c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
29667c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
29677c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
29687c478bd9Sstevel@tonic-gate 	} else
29697c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
29707c478bd9Sstevel@tonic-gate 
29717c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
29727c478bd9Sstevel@tonic-gate 		error = EINVAL;
29737c478bd9Sstevel@tonic-gate 		goto done;
29747c478bd9Sstevel@tonic-gate 	}
29757c478bd9Sstevel@tonic-gate 	if (pri != 0) {
29767c478bd9Sstevel@tonic-gate 		int i;
29777c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
29787c478bd9Sstevel@tonic-gate 
29797c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
29807c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
29817c478bd9Sstevel@tonic-gate 			while (*qbpp)
29827c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
29837c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
29847c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
29857c478bd9Sstevel@tonic-gate 					error = EAGAIN;
29867c478bd9Sstevel@tonic-gate 					goto done;
29877c478bd9Sstevel@tonic-gate 				}
29887c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
29897c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
29907c478bd9Sstevel@tonic-gate 				q->q_nband++;
29917c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
29927c478bd9Sstevel@tonic-gate 			}
29937c478bd9Sstevel@tonic-gate 		}
29947c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
29957c478bd9Sstevel@tonic-gate 		i = pri;
29967c478bd9Sstevel@tonic-gate 		while (--i)
29977c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
29987c478bd9Sstevel@tonic-gate 	}
29997c478bd9Sstevel@tonic-gate 	switch (what) {
30007c478bd9Sstevel@tonic-gate 
30017c478bd9Sstevel@tonic-gate 	case QHIWAT:
30027c478bd9Sstevel@tonic-gate 		if (qbp)
30037c478bd9Sstevel@tonic-gate 			qbp->qb_hiwat = (size_t)val;
30047c478bd9Sstevel@tonic-gate 		else
30057c478bd9Sstevel@tonic-gate 			q->q_hiwat = (size_t)val;
30067c478bd9Sstevel@tonic-gate 		break;
30077c478bd9Sstevel@tonic-gate 
30087c478bd9Sstevel@tonic-gate 	case QLOWAT:
30097c478bd9Sstevel@tonic-gate 		if (qbp)
30107c478bd9Sstevel@tonic-gate 			qbp->qb_lowat = (size_t)val;
30117c478bd9Sstevel@tonic-gate 		else
30127c478bd9Sstevel@tonic-gate 			q->q_lowat = (size_t)val;
30137c478bd9Sstevel@tonic-gate 		break;
30147c478bd9Sstevel@tonic-gate 
30157c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
30167c478bd9Sstevel@tonic-gate 		if (qbp)
30177c478bd9Sstevel@tonic-gate 			error = EINVAL;
30187c478bd9Sstevel@tonic-gate 		else
30197c478bd9Sstevel@tonic-gate 			q->q_maxpsz = (ssize_t)val;
30207c478bd9Sstevel@tonic-gate 
30217c478bd9Sstevel@tonic-gate 		/*
30227c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30237c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30247c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30257c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30267c478bd9Sstevel@tonic-gate 		 */
30277c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30287c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30297c478bd9Sstevel@tonic-gate 			break;
30307c478bd9Sstevel@tonic-gate 
30317c478bd9Sstevel@tonic-gate 		/*
30327c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30337c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30347c478bd9Sstevel@tonic-gate 		 */
30357c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30367c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30377c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30387c478bd9Sstevel@tonic-gate 		}
30397c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(wrq)));
30407c478bd9Sstevel@tonic-gate 
30417c478bd9Sstevel@tonic-gate 		if (strmsgsz != 0) {
30427c478bd9Sstevel@tonic-gate 			if (val == INFPSZ)
30437c478bd9Sstevel@tonic-gate 				val = strmsgsz;
30447c478bd9Sstevel@tonic-gate 			else  {
30457c478bd9Sstevel@tonic-gate 				if (STREAM(q)->sd_vnode->v_type == VFIFO)
30467c478bd9Sstevel@tonic-gate 					val = MIN(PIPE_BUF, val);
30477c478bd9Sstevel@tonic-gate 				else
30487c478bd9Sstevel@tonic-gate 					val = MIN(strmsgsz, val);
30497c478bd9Sstevel@tonic-gate 			}
30507c478bd9Sstevel@tonic-gate 		}
30517c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_maxpsz = val;
30527c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30537c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30547c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
30557c478bd9Sstevel@tonic-gate 		}
30567c478bd9Sstevel@tonic-gate 		break;
30577c478bd9Sstevel@tonic-gate 
30587c478bd9Sstevel@tonic-gate 	case QMINPSZ:
30597c478bd9Sstevel@tonic-gate 		if (qbp)
30607c478bd9Sstevel@tonic-gate 			error = EINVAL;
30617c478bd9Sstevel@tonic-gate 		else
30627c478bd9Sstevel@tonic-gate 			q->q_minpsz = (ssize_t)val;
30637c478bd9Sstevel@tonic-gate 
30647c478bd9Sstevel@tonic-gate 		/*
30657c478bd9Sstevel@tonic-gate 		 * Performance concern, strwrite looks at the module below
30667c478bd9Sstevel@tonic-gate 		 * the stream head for the maxpsz each time it does a write
30677c478bd9Sstevel@tonic-gate 		 * we now cache it at the stream head.  Check to see if this
30687c478bd9Sstevel@tonic-gate 		 * queue is sitting directly below the stream head.
30697c478bd9Sstevel@tonic-gate 		 */
30707c478bd9Sstevel@tonic-gate 		wrq = STREAM(q)->sd_wrq;
30717c478bd9Sstevel@tonic-gate 		if (q != wrq->q_next)
30727c478bd9Sstevel@tonic-gate 			break;
30737c478bd9Sstevel@tonic-gate 
30747c478bd9Sstevel@tonic-gate 		/*
30757c478bd9Sstevel@tonic-gate 		 * If the stream is not frozen drop the current QLOCK and
30767c478bd9Sstevel@tonic-gate 		 * acquire the sd_wrq QLOCK which protects sd_qn_*
30777c478bd9Sstevel@tonic-gate 		 */
30787c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30797c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(q));
30807c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(wrq));
30817c478bd9Sstevel@tonic-gate 		}
30827c478bd9Sstevel@tonic-gate 		STREAM(q)->sd_qn_minpsz = (ssize_t)val;
30837c478bd9Sstevel@tonic-gate 
30847c478bd9Sstevel@tonic-gate 		if (freezer != curthread) {
30857c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(wrq));
30867c478bd9Sstevel@tonic-gate 			mutex_enter(QLOCK(q));
30877c478bd9Sstevel@tonic-gate 		}
30887c478bd9Sstevel@tonic-gate 		break;
30897c478bd9Sstevel@tonic-gate 
30907c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
30917c478bd9Sstevel@tonic-gate 		if (qbp)
30927c478bd9Sstevel@tonic-gate 			error = EINVAL;
30937c478bd9Sstevel@tonic-gate 		else
30947c478bd9Sstevel@tonic-gate 			q->q_struiot = (ushort_t)val;
30957c478bd9Sstevel@tonic-gate 		break;
30967c478bd9Sstevel@tonic-gate 
30977c478bd9Sstevel@tonic-gate 	case QCOUNT:
30987c478bd9Sstevel@tonic-gate 	case QFIRST:
30997c478bd9Sstevel@tonic-gate 	case QLAST:
31007c478bd9Sstevel@tonic-gate 	case QFLAG:
31017c478bd9Sstevel@tonic-gate 		error = EPERM;
31027c478bd9Sstevel@tonic-gate 		break;
31037c478bd9Sstevel@tonic-gate 
31047c478bd9Sstevel@tonic-gate 	default:
31057c478bd9Sstevel@tonic-gate 		error = EINVAL;
31067c478bd9Sstevel@tonic-gate 		break;
31077c478bd9Sstevel@tonic-gate 	}
31087c478bd9Sstevel@tonic-gate done:
31097c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
31107c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
31117c478bd9Sstevel@tonic-gate 	return (error);
31127c478bd9Sstevel@tonic-gate }
31137c478bd9Sstevel@tonic-gate 
31147c478bd9Sstevel@tonic-gate /*
31157c478bd9Sstevel@tonic-gate  * Get queue fields.
31167c478bd9Sstevel@tonic-gate  */
31177c478bd9Sstevel@tonic-gate int
31187c478bd9Sstevel@tonic-gate strqget(queue_t *q, qfields_t what, unsigned char pri, void *valp)
31197c478bd9Sstevel@tonic-gate {
31207c478bd9Sstevel@tonic-gate 	qband_t 	*qbp = NULL;
31217c478bd9Sstevel@tonic-gate 	int 		error = 0;
31227c478bd9Sstevel@tonic-gate 	kthread_id_t 	freezer;
31237c478bd9Sstevel@tonic-gate 
31247c478bd9Sstevel@tonic-gate 	freezer = STREAM(q)->sd_freezer;
31257c478bd9Sstevel@tonic-gate 	if (freezer == curthread) {
31267c478bd9Sstevel@tonic-gate 		ASSERT(frozenstr(q));
31277c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_HELD(QLOCK(q)));
31287c478bd9Sstevel@tonic-gate 	} else
31297c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(q));
31307c478bd9Sstevel@tonic-gate 	if (what >= QBAD) {
31317c478bd9Sstevel@tonic-gate 		error = EINVAL;
31327c478bd9Sstevel@tonic-gate 		goto done;
31337c478bd9Sstevel@tonic-gate 	}
31347c478bd9Sstevel@tonic-gate 	if (pri != 0) {
31357c478bd9Sstevel@tonic-gate 		int i;
31367c478bd9Sstevel@tonic-gate 		qband_t **qbpp;
31377c478bd9Sstevel@tonic-gate 
31387c478bd9Sstevel@tonic-gate 		if (pri > q->q_nband) {
31397c478bd9Sstevel@tonic-gate 			qbpp = &q->q_bandp;
31407c478bd9Sstevel@tonic-gate 			while (*qbpp)
31417c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31427c478bd9Sstevel@tonic-gate 			while (pri > q->q_nband) {
31437c478bd9Sstevel@tonic-gate 				if ((*qbpp = allocband()) == NULL) {
31447c478bd9Sstevel@tonic-gate 					error = EAGAIN;
31457c478bd9Sstevel@tonic-gate 					goto done;
31467c478bd9Sstevel@tonic-gate 				}
31477c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_hiwat = q->q_hiwat;
31487c478bd9Sstevel@tonic-gate 				(*qbpp)->qb_lowat = q->q_lowat;
31497c478bd9Sstevel@tonic-gate 				q->q_nband++;
31507c478bd9Sstevel@tonic-gate 				qbpp = &(*qbpp)->qb_next;
31517c478bd9Sstevel@tonic-gate 			}
31527c478bd9Sstevel@tonic-gate 		}
31537c478bd9Sstevel@tonic-gate 		qbp = q->q_bandp;
31547c478bd9Sstevel@tonic-gate 		i = pri;
31557c478bd9Sstevel@tonic-gate 		while (--i)
31567c478bd9Sstevel@tonic-gate 			qbp = qbp->qb_next;
31577c478bd9Sstevel@tonic-gate 	}
31587c478bd9Sstevel@tonic-gate 	switch (what) {
31597c478bd9Sstevel@tonic-gate 	case QHIWAT:
31607c478bd9Sstevel@tonic-gate 		if (qbp)
31617c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_hiwat;
31627c478bd9Sstevel@tonic-gate 		else
31637c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_hiwat;
31647c478bd9Sstevel@tonic-gate 		break;
31657c478bd9Sstevel@tonic-gate 
31667c478bd9Sstevel@tonic-gate 	case QLOWAT:
31677c478bd9Sstevel@tonic-gate 		if (qbp)
31687c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_lowat;
31697c478bd9Sstevel@tonic-gate 		else
31707c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_lowat;
31717c478bd9Sstevel@tonic-gate 		break;
31727c478bd9Sstevel@tonic-gate 
31737c478bd9Sstevel@tonic-gate 	case QMAXPSZ:
31747c478bd9Sstevel@tonic-gate 		if (qbp)
31757c478bd9Sstevel@tonic-gate 			error = EINVAL;
31767c478bd9Sstevel@tonic-gate 		else
31777c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_maxpsz;
31787c478bd9Sstevel@tonic-gate 		break;
31797c478bd9Sstevel@tonic-gate 
31807c478bd9Sstevel@tonic-gate 	case QMINPSZ:
31817c478bd9Sstevel@tonic-gate 		if (qbp)
31827c478bd9Sstevel@tonic-gate 			error = EINVAL;
31837c478bd9Sstevel@tonic-gate 		else
31847c478bd9Sstevel@tonic-gate 			*(ssize_t *)valp = q->q_minpsz;
31857c478bd9Sstevel@tonic-gate 		break;
31867c478bd9Sstevel@tonic-gate 
31877c478bd9Sstevel@tonic-gate 	case QCOUNT:
31887c478bd9Sstevel@tonic-gate 		if (qbp)
31897c478bd9Sstevel@tonic-gate 			*(size_t *)valp = qbp->qb_count;
31907c478bd9Sstevel@tonic-gate 		else
31917c478bd9Sstevel@tonic-gate 			*(size_t *)valp = q->q_count;
31927c478bd9Sstevel@tonic-gate 		break;
31937c478bd9Sstevel@tonic-gate 
31947c478bd9Sstevel@tonic-gate 	case QFIRST:
31957c478bd9Sstevel@tonic-gate 		if (qbp)
31967c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_first;
31977c478bd9Sstevel@tonic-gate 		else
31987c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_first;
31997c478bd9Sstevel@tonic-gate 		break;
32007c478bd9Sstevel@tonic-gate 
32017c478bd9Sstevel@tonic-gate 	case QLAST:
32027c478bd9Sstevel@tonic-gate 		if (qbp)
32037c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = qbp->qb_last;
32047c478bd9Sstevel@tonic-gate 		else
32057c478bd9Sstevel@tonic-gate 			*(mblk_t **)valp = q->q_last;
32067c478bd9Sstevel@tonic-gate 		break;
32077c478bd9Sstevel@tonic-gate 
32087c478bd9Sstevel@tonic-gate 	case QFLAG:
32097c478bd9Sstevel@tonic-gate 		if (qbp)
32107c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = qbp->qb_flag;
32117c478bd9Sstevel@tonic-gate 		else
32127c478bd9Sstevel@tonic-gate 			*(uint_t *)valp = q->q_flag;
32137c478bd9Sstevel@tonic-gate 		break;
32147c478bd9Sstevel@tonic-gate 
32157c478bd9Sstevel@tonic-gate 	case QSTRUIOT:
32167c478bd9Sstevel@tonic-gate 		if (qbp)
32177c478bd9Sstevel@tonic-gate 			error = EINVAL;
32187c478bd9Sstevel@tonic-gate 		else
32197c478bd9Sstevel@tonic-gate 			*(short *)valp = q->q_struiot;
32207c478bd9Sstevel@tonic-gate 		break;
32217c478bd9Sstevel@tonic-gate 
32227c478bd9Sstevel@tonic-gate 	default:
32237c478bd9Sstevel@tonic-gate 		error = EINVAL;
32247c478bd9Sstevel@tonic-gate 		break;
32257c478bd9Sstevel@tonic-gate 	}
32267c478bd9Sstevel@tonic-gate done:
32277c478bd9Sstevel@tonic-gate 	if (freezer != curthread)
32287c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(q));
32297c478bd9Sstevel@tonic-gate 	return (error);
32307c478bd9Sstevel@tonic-gate }
32317c478bd9Sstevel@tonic-gate 
32327c478bd9Sstevel@tonic-gate /*
32337c478bd9Sstevel@tonic-gate  * Function awakes all in cvwait/sigwait/pollwait, on one of:
32347c478bd9Sstevel@tonic-gate  *	QWANTWSYNC or QWANTR or QWANTW,
32357c478bd9Sstevel@tonic-gate  *
32367c478bd9Sstevel@tonic-gate  * Note: for QWANTWSYNC/QWANTW and QWANTR, if no WSLEEPer or RSLEEPer then a
32377c478bd9Sstevel@tonic-gate  *	 deferred wakeup will be done. Also if strpoll() in progress then a
32387c478bd9Sstevel@tonic-gate  *	 deferred pollwakeup will be done.
32397c478bd9Sstevel@tonic-gate  */
32407c478bd9Sstevel@tonic-gate void
32417c478bd9Sstevel@tonic-gate strwakeq(queue_t *q, int flag)
32427c478bd9Sstevel@tonic-gate {
32437c478bd9Sstevel@tonic-gate 	stdata_t 	*stp = STREAM(q);
32447c478bd9Sstevel@tonic-gate 	pollhead_t 	*pl;
32457c478bd9Sstevel@tonic-gate 
32467c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
32477c478bd9Sstevel@tonic-gate 	pl = &stp->sd_pollist;
32487c478bd9Sstevel@tonic-gate 	if (flag & QWANTWSYNC) {
32497c478bd9Sstevel@tonic-gate 		ASSERT(!(q->q_flag & QREADR));
32507c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32517c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32527c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32537c478bd9Sstevel@tonic-gate 		} else {
32547c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= WSLEEP;
32557c478bd9Sstevel@tonic-gate 		}
32567c478bd9Sstevel@tonic-gate 
32577c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32587c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
32597c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32607c478bd9Sstevel@tonic-gate 
32617c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
32627c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
32637c478bd9Sstevel@tonic-gate 	} else if (flag & QWANTR) {
32647c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & RSLEEP) {
32657c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~RSLEEP;
32667c478bd9Sstevel@tonic-gate 			cv_broadcast(&_RD(stp->sd_wrq)->q_wait);
32677c478bd9Sstevel@tonic-gate 		} else {
32687c478bd9Sstevel@tonic-gate 			stp->sd_wakeq |= RSLEEP;
32697c478bd9Sstevel@tonic-gate 		}
32707c478bd9Sstevel@tonic-gate 
32717c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32727c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLIN | POLLRDNORM);
32737c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32747c478bd9Sstevel@tonic-gate 
32757c478bd9Sstevel@tonic-gate 		{
32767c478bd9Sstevel@tonic-gate 			int events = stp->sd_sigflags & (S_INPUT | S_RDNORM);
32777c478bd9Sstevel@tonic-gate 
32787c478bd9Sstevel@tonic-gate 			if (events)
32797c478bd9Sstevel@tonic-gate 				strsendsig(stp->sd_siglist, events, 0, 0);
32807c478bd9Sstevel@tonic-gate 		}
32817c478bd9Sstevel@tonic-gate 	} else {
32827c478bd9Sstevel@tonic-gate 		if (stp->sd_flag & WSLEEP) {
32837c478bd9Sstevel@tonic-gate 			stp->sd_flag &= ~WSLEEP;
32847c478bd9Sstevel@tonic-gate 			cv_broadcast(&stp->sd_wrq->q_wait);
32857c478bd9Sstevel@tonic-gate 		}
32867c478bd9Sstevel@tonic-gate 
32877c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
32887c478bd9Sstevel@tonic-gate 		pollwakeup(pl, POLLWRNORM);
32897c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
32907c478bd9Sstevel@tonic-gate 
32917c478bd9Sstevel@tonic-gate 		if (stp->sd_sigflags & S_WRNORM)
32927c478bd9Sstevel@tonic-gate 			strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
32937c478bd9Sstevel@tonic-gate 	}
32947c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
32957c478bd9Sstevel@tonic-gate }
32967c478bd9Sstevel@tonic-gate 
32977c478bd9Sstevel@tonic-gate int
32987c478bd9Sstevel@tonic-gate struioget(queue_t *q, mblk_t *mp, struiod_t *dp, int noblock)
32997c478bd9Sstevel@tonic-gate {
33007c478bd9Sstevel@tonic-gate 	stdata_t *stp = STREAM(q);
33017c478bd9Sstevel@tonic-gate 	int typ  = STRUIOT_STANDARD;
33027c478bd9Sstevel@tonic-gate 	uio_t	 *uiop = &dp->d_uio;
33037c478bd9Sstevel@tonic-gate 	dblk_t	 *dbp;
33047c478bd9Sstevel@tonic-gate 	ssize_t	 uiocnt;
33057c478bd9Sstevel@tonic-gate 	ssize_t	 cnt;
33067c478bd9Sstevel@tonic-gate 	unsigned char *ptr;
33077c478bd9Sstevel@tonic-gate 	ssize_t	 resid;
33087c478bd9Sstevel@tonic-gate 	int	 error = 0;
33097c478bd9Sstevel@tonic-gate 	on_trap_data_t otd;
33107c478bd9Sstevel@tonic-gate 	queue_t	*stwrq;
33117c478bd9Sstevel@tonic-gate 
33127c478bd9Sstevel@tonic-gate 	/*
33137c478bd9Sstevel@tonic-gate 	 * Plumbing may change while taking the type so store the
33147c478bd9Sstevel@tonic-gate 	 * queue in a temporary variable. It doesn't matter even
33157c478bd9Sstevel@tonic-gate 	 * if the we take the type from the previous plumbing,
33167c478bd9Sstevel@tonic-gate 	 * that's because if the plumbing has changed when we were
33177c478bd9Sstevel@tonic-gate 	 * holding the queue in a temporary variable, we can continue
33187c478bd9Sstevel@tonic-gate 	 * processing the message the way it would have been processed
33197c478bd9Sstevel@tonic-gate 	 * in the old plumbing, without any side effects but a bit
33207c478bd9Sstevel@tonic-gate 	 * extra processing for partial ip header checksum.
33217c478bd9Sstevel@tonic-gate 	 *
33227c478bd9Sstevel@tonic-gate 	 * This has been done to avoid holding the sd_lock which is
33237c478bd9Sstevel@tonic-gate 	 * very hot.
33247c478bd9Sstevel@tonic-gate 	 */
33257c478bd9Sstevel@tonic-gate 
33267c478bd9Sstevel@tonic-gate 	stwrq = stp->sd_struiowrq;
33277c478bd9Sstevel@tonic-gate 	if (stwrq)
33287c478bd9Sstevel@tonic-gate 		typ = stwrq->q_struiot;
33297c478bd9Sstevel@tonic-gate 
33307c478bd9Sstevel@tonic-gate 	for (; (resid = uiop->uio_resid) > 0 && mp; mp = mp->b_cont) {
33317c478bd9Sstevel@tonic-gate 		dbp = mp->b_datap;
33327c478bd9Sstevel@tonic-gate 		ptr = (uchar_t *)(mp->b_rptr + dbp->db_cksumstuff);
33337c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33347c478bd9Sstevel@tonic-gate 		cnt = MIN(uiocnt, uiop->uio_resid);
33357c478bd9Sstevel@tonic-gate 		if (!(dbp->db_struioflag & STRUIO_SPEC) ||
33367c478bd9Sstevel@tonic-gate 		    (dbp->db_struioflag & STRUIO_DONE) || cnt == 0) {
33377c478bd9Sstevel@tonic-gate 			/*
33387c478bd9Sstevel@tonic-gate 			 * Either this mblk has already been processed
33397c478bd9Sstevel@tonic-gate 			 * or there is no more room in this mblk (?).
33407c478bd9Sstevel@tonic-gate 			 */
33417c478bd9Sstevel@tonic-gate 			continue;
33427c478bd9Sstevel@tonic-gate 		}
33437c478bd9Sstevel@tonic-gate 		switch (typ) {
33447c478bd9Sstevel@tonic-gate 		case STRUIOT_STANDARD:
33457c478bd9Sstevel@tonic-gate 			if (noblock) {
33467c478bd9Sstevel@tonic-gate 				if (on_trap(&otd, OT_DATA_ACCESS)) {
33477c478bd9Sstevel@tonic-gate 					no_trap();
33487c478bd9Sstevel@tonic-gate 					error = EWOULDBLOCK;
33497c478bd9Sstevel@tonic-gate 					goto out;
33507c478bd9Sstevel@tonic-gate 				}
33517c478bd9Sstevel@tonic-gate 			}
33527c478bd9Sstevel@tonic-gate 			if (error = uiomove(ptr, cnt, UIO_WRITE, uiop)) {
33537c478bd9Sstevel@tonic-gate 				if (noblock)
33547c478bd9Sstevel@tonic-gate 					no_trap();
33557c478bd9Sstevel@tonic-gate 				goto out;
33567c478bd9Sstevel@tonic-gate 			}
33577c478bd9Sstevel@tonic-gate 			if (noblock)
33587c478bd9Sstevel@tonic-gate 				no_trap();
33597c478bd9Sstevel@tonic-gate 			break;
33607c478bd9Sstevel@tonic-gate 
33617c478bd9Sstevel@tonic-gate 		default:
33627c478bd9Sstevel@tonic-gate 			error = EIO;
33637c478bd9Sstevel@tonic-gate 			goto out;
33647c478bd9Sstevel@tonic-gate 		}
33657c478bd9Sstevel@tonic-gate 		dbp->db_struioflag |= STRUIO_DONE;
33667c478bd9Sstevel@tonic-gate 		dbp->db_cksumstuff += cnt;
33677c478bd9Sstevel@tonic-gate 	}
33687c478bd9Sstevel@tonic-gate out:
33697c478bd9Sstevel@tonic-gate 	if (error == EWOULDBLOCK && (resid -= uiop->uio_resid) > 0) {
33707c478bd9Sstevel@tonic-gate 		/*
33717c478bd9Sstevel@tonic-gate 		 * A fault has occured and some bytes were moved to the
33727c478bd9Sstevel@tonic-gate 		 * current mblk, the uio_t has already been updated by
33737c478bd9Sstevel@tonic-gate 		 * the appropriate uio routine, so also update the mblk
33747c478bd9Sstevel@tonic-gate 		 * to reflect this in case this same mblk chain is used
33757c478bd9Sstevel@tonic-gate 		 * again (after the fault has been handled).
33767c478bd9Sstevel@tonic-gate 		 */
33777c478bd9Sstevel@tonic-gate 		uiocnt = dbp->db_cksumend - dbp->db_cksumstuff;
33787c478bd9Sstevel@tonic-gate 		if (uiocnt >= resid)
33797c478bd9Sstevel@tonic-gate 			dbp->db_cksumstuff += resid;
33807c478bd9Sstevel@tonic-gate 	}
33817c478bd9Sstevel@tonic-gate 	return (error);
33827c478bd9Sstevel@tonic-gate }
33837c478bd9Sstevel@tonic-gate 
33847c478bd9Sstevel@tonic-gate /*
33857c478bd9Sstevel@tonic-gate  * Try to enter queue synchronously. Any attempt to enter a closing queue will
33867c478bd9Sstevel@tonic-gate  * fails. The qp->q_rwcnt keeps track of the number of successful entries so
33877c478bd9Sstevel@tonic-gate  * that removeq() will not try to close the queue while a thread is inside the
33887c478bd9Sstevel@tonic-gate  * queue.
33897c478bd9Sstevel@tonic-gate  */
33907c478bd9Sstevel@tonic-gate static boolean_t
33917c478bd9Sstevel@tonic-gate rwnext_enter(queue_t *qp)
33927c478bd9Sstevel@tonic-gate {
33937c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
33947c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWCLOSE) {
33957c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
33967c478bd9Sstevel@tonic-gate 		return (B_FALSE);
33977c478bd9Sstevel@tonic-gate 	}
33987c478bd9Sstevel@tonic-gate 	qp->q_rwcnt++;
33997c478bd9Sstevel@tonic-gate 	ASSERT(qp->q_rwcnt != 0);
34007c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34017c478bd9Sstevel@tonic-gate 	return (B_TRUE);
34027c478bd9Sstevel@tonic-gate }
34037c478bd9Sstevel@tonic-gate 
34047c478bd9Sstevel@tonic-gate /*
34057c478bd9Sstevel@tonic-gate  * Decrease the count of threads running in sync stream queue and wake up any
34067c478bd9Sstevel@tonic-gate  * threads blocked in removeq().
34077c478bd9Sstevel@tonic-gate  */
34087c478bd9Sstevel@tonic-gate static void
34097c478bd9Sstevel@tonic-gate rwnext_exit(queue_t *qp)
34107c478bd9Sstevel@tonic-gate {
34117c478bd9Sstevel@tonic-gate 	mutex_enter(QLOCK(qp));
34127c478bd9Sstevel@tonic-gate 	qp->q_rwcnt--;
34137c478bd9Sstevel@tonic-gate 	if (qp->q_flag & QWANTRMQSYNC) {
34147c478bd9Sstevel@tonic-gate 		qp->q_flag &= ~QWANTRMQSYNC;
34157c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->q_wait);
34167c478bd9Sstevel@tonic-gate 	}
34177c478bd9Sstevel@tonic-gate 	mutex_exit(QLOCK(qp));
34187c478bd9Sstevel@tonic-gate }
34197c478bd9Sstevel@tonic-gate 
34207c478bd9Sstevel@tonic-gate /*
34217c478bd9Sstevel@tonic-gate  * The purpose of rwnext() is to call the rw procedure of the next
34227c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
34237c478bd9Sstevel@tonic-gate  *
34247c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
34257c478bd9Sstevel@tonic-gate  *
34267c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
34277c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue sq_count is incremented and it does
34287c478bd9Sstevel@tonic-gate  * not matter if any regular put entrypoints have been already entered. We
34297c478bd9Sstevel@tonic-gate  * can't increment one of the sq_putcounts (instead of sq_count) because
34307c478bd9Sstevel@tonic-gate  * qwait_rw won't know which counter to decrement.
34317c478bd9Sstevel@tonic-gate  *
34327c478bd9Sstevel@tonic-gate  * It would be reasonable to add the lockless FASTPUT logic.
34337c478bd9Sstevel@tonic-gate  */
34347c478bd9Sstevel@tonic-gate int
34357c478bd9Sstevel@tonic-gate rwnext(queue_t *qp, struiod_t *dp)
34367c478bd9Sstevel@tonic-gate {
34377c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
34387c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
34397c478bd9Sstevel@tonic-gate 	uint16_t	count;
34407c478bd9Sstevel@tonic-gate 	uint16_t	flags;
34417c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
34427c478bd9Sstevel@tonic-gate 	int		(*proc)();
34437c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
34447c478bd9Sstevel@tonic-gate 	int		isread;
34457c478bd9Sstevel@tonic-gate 	int		rval;
34467c478bd9Sstevel@tonic-gate 
34477c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
34487c478bd9Sstevel@tonic-gate 	/*
34497c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until acquiring
34507c478bd9Sstevel@tonic-gate 	 * SQLOCK. Note that a read-side rwnext from the streamhead will
34517c478bd9Sstevel@tonic-gate 	 * already have sd_lock acquired. In either case sd_lock is always
34527c478bd9Sstevel@tonic-gate 	 * released after acquiring SQLOCK.
34537c478bd9Sstevel@tonic-gate 	 *
34547c478bd9Sstevel@tonic-gate 	 * The streamhead read-side holding sd_lock when calling rwnext is
34557c478bd9Sstevel@tonic-gate 	 * required to prevent a race condition were M_DATA mblks flowing
34567c478bd9Sstevel@tonic-gate 	 * up the read-side of the stream could be bypassed by a rwnext()
34577c478bd9Sstevel@tonic-gate 	 * down-call. In this case sd_lock acts as the streamhead perimeter.
34587c478bd9Sstevel@tonic-gate 	 */
34597c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
34607c478bd9Sstevel@tonic-gate 		isread = 0;
34617c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
34627c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
34637c478bd9Sstevel@tonic-gate 	} else {
34647c478bd9Sstevel@tonic-gate 		isread = 1;
34657c478bd9Sstevel@tonic-gate 		if (nqp != stp->sd_wrq)
34667c478bd9Sstevel@tonic-gate 			/* Not streamhead */
34677c478bd9Sstevel@tonic-gate 			mutex_enter(&stp->sd_lock);
34687c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
34697c478bd9Sstevel@tonic-gate 	}
34707c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
34717c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_rwp)) {
34727c478bd9Sstevel@tonic-gate 		/*
34737c478bd9Sstevel@tonic-gate 		 * Not a synchronous module or no r/w procedure for this
34747c478bd9Sstevel@tonic-gate 		 * queue, so just return EINVAL and let the caller handle it.
34757c478bd9Sstevel@tonic-gate 		 */
34767c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
34777c478bd9Sstevel@tonic-gate 		return (EINVAL);
34787c478bd9Sstevel@tonic-gate 	}
34797c478bd9Sstevel@tonic-gate 
34807c478bd9Sstevel@tonic-gate 	if (rwnext_enter(qp) == B_FALSE) {
34817c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
34827c478bd9Sstevel@tonic-gate 		return (EINVAL);
34837c478bd9Sstevel@tonic-gate 	}
34847c478bd9Sstevel@tonic-gate 
34857c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
34867c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
34877c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
34887c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
34897c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
34907c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
34917c478bd9Sstevel@tonic-gate 
34927c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
34937c478bd9Sstevel@tonic-gate 		/*
34947c478bd9Sstevel@tonic-gate 		 * if this queue is being closed, return.
34957c478bd9Sstevel@tonic-gate 		 */
34967c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QWCLOSE) {
34977c478bd9Sstevel@tonic-gate 			mutex_exit(SQLOCK(sq));
34987c478bd9Sstevel@tonic-gate 			rwnext_exit(qp);
34997c478bd9Sstevel@tonic-gate 			return (EINVAL);
35007c478bd9Sstevel@tonic-gate 		}
35017c478bd9Sstevel@tonic-gate 
35027c478bd9Sstevel@tonic-gate 		/*
35037c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
35047c478bd9Sstevel@tonic-gate 		 */
35057c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
35067c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
35077c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
35087c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
35097c478bd9Sstevel@tonic-gate 	}
35107c478bd9Sstevel@tonic-gate 
35117c478bd9Sstevel@tonic-gate 	if (isread == 0 && stp->sd_struiowrq == NULL ||
35127c478bd9Sstevel@tonic-gate 	    isread == 1 && stp->sd_struiordq == NULL) {
35137c478bd9Sstevel@tonic-gate 		/*
35147c478bd9Sstevel@tonic-gate 		 * Stream plumbing changed while waiting for inner perimeter
35157c478bd9Sstevel@tonic-gate 		 * so just return EINVAL and let the caller handle it.
35167c478bd9Sstevel@tonic-gate 		 */
35177c478bd9Sstevel@tonic-gate 		mutex_exit(SQLOCK(sq));
35187c478bd9Sstevel@tonic-gate 		rwnext_exit(qp);
35197c478bd9Sstevel@tonic-gate 		return (EINVAL);
35207c478bd9Sstevel@tonic-gate 	}
35217c478bd9Sstevel@tonic-gate 	if (!(flags & SQ_CIPUT))
35227c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
35237c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
35247c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
35257c478bd9Sstevel@tonic-gate 	/*
35267c478bd9Sstevel@tonic-gate 	 * Note: The only message ordering guarantee that rwnext() makes is
35277c478bd9Sstevel@tonic-gate 	 *	 for the write queue flow-control case. All others (r/w queue
35287c478bd9Sstevel@tonic-gate 	 *	 with q_count > 0 (or q_first != 0)) are the resposibilty of
35297c478bd9Sstevel@tonic-gate 	 *	 the queue's rw procedure. This could be genralized here buy
35307c478bd9Sstevel@tonic-gate 	 *	 running the queue's service procedure, but that wouldn't be
35317c478bd9Sstevel@tonic-gate 	 *	 the most efficent for all cases.
35327c478bd9Sstevel@tonic-gate 	 */
35337c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
35347c478bd9Sstevel@tonic-gate 	if (! isread && (qp->q_flag & QFULL)) {
35357c478bd9Sstevel@tonic-gate 		/*
35367c478bd9Sstevel@tonic-gate 		 * Write queue may be flow controlled. If so,
35377c478bd9Sstevel@tonic-gate 		 * mark the queue for wakeup when it's not.
35387c478bd9Sstevel@tonic-gate 		 */
35397c478bd9Sstevel@tonic-gate 		mutex_enter(QLOCK(qp));
35407c478bd9Sstevel@tonic-gate 		if (qp->q_flag & QFULL) {
35417c478bd9Sstevel@tonic-gate 			qp->q_flag |= QWANTWSYNC;
35427c478bd9Sstevel@tonic-gate 			mutex_exit(QLOCK(qp));
35437c478bd9Sstevel@tonic-gate 			rval = EWOULDBLOCK;
35447c478bd9Sstevel@tonic-gate 			goto out;
35457c478bd9Sstevel@tonic-gate 		}
35467c478bd9Sstevel@tonic-gate 		mutex_exit(QLOCK(qp));
35477c478bd9Sstevel@tonic-gate 	}
35487c478bd9Sstevel@tonic-gate 
35497c478bd9Sstevel@tonic-gate 	if (! isread && dp->d_mp)
35507c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, nqp, FTEV_RWNEXT, dp->d_mp->b_rptr -
35517c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_datap->db_base);
35527c478bd9Sstevel@tonic-gate 
35537c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, dp);
35547c478bd9Sstevel@tonic-gate 
35557c478bd9Sstevel@tonic-gate 	if (isread && dp->d_mp)
35567c478bd9Sstevel@tonic-gate 		STR_FTEVENT_MSG(dp->d_mp, _RD(nqp), FTEV_RWNEXT,
35577c478bd9Sstevel@tonic-gate 		    dp->d_mp->b_rptr - dp->d_mp->b_datap->db_base);
35587c478bd9Sstevel@tonic-gate out:
35597c478bd9Sstevel@tonic-gate 	/*
35607c478bd9Sstevel@tonic-gate 	 * The queue is protected from being freed by sq_count, so it is
35617c478bd9Sstevel@tonic-gate 	 * safe to call rwnext_exit and reacquire SQLOCK(sq).
35627c478bd9Sstevel@tonic-gate 	 */
35637c478bd9Sstevel@tonic-gate 	rwnext_exit(qp);
35647c478bd9Sstevel@tonic-gate 
35657c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
35667c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
35677c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
35687c478bd9Sstevel@tonic-gate 	sq->sq_count--;
35697c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
35707c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
35717c478bd9Sstevel@tonic-gate 		/*
35727c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
35737c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
35747c478bd9Sstevel@tonic-gate 		 */
35757c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
35767c478bd9Sstevel@tonic-gate 		return (rval);
35777c478bd9Sstevel@tonic-gate 	}
35787c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
35797c478bd9Sstevel@tonic-gate 	/*
35807c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
35817c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
35827c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
35837c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
35847c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
35857c478bd9Sstevel@tonic-gate 	 *
35867c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
35877c478bd9Sstevel@tonic-gate 	 *
35887c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
35897c478bd9Sstevel@tonic-gate 	 * 	sq->sq_count == 0);
35907c478bd9Sstevel@tonic-gate 	 *
35917c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
35927c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
35937c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
35947c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
35957c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
35967c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
35977c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
35987c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
35997c478bd9Sstevel@tonic-gate 	 * test invalid.
36007c478bd9Sstevel@tonic-gate 	 */
36017c478bd9Sstevel@tonic-gate 
36027c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
36037c478bd9Sstevel@tonic-gate 	if (sq->sq_flags & SQ_WANTWAKEUP) {
36047c478bd9Sstevel@tonic-gate 		sq->sq_flags &= ~SQ_WANTWAKEUP;
36057c478bd9Sstevel@tonic-gate 		cv_broadcast(&sq->sq_wait);
36067c478bd9Sstevel@tonic-gate 	}
36077c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36087c478bd9Sstevel@tonic-gate 	return (rval);
36097c478bd9Sstevel@tonic-gate }
36107c478bd9Sstevel@tonic-gate 
36117c478bd9Sstevel@tonic-gate /*
36127c478bd9Sstevel@tonic-gate  * The purpose of infonext() is to call the info procedure of the next
36137c478bd9Sstevel@tonic-gate  * (downstream) modules queue.
36147c478bd9Sstevel@tonic-gate  *
36157c478bd9Sstevel@tonic-gate  * treated as put entrypoint for perimeter syncronization.
36167c478bd9Sstevel@tonic-gate  *
36177c478bd9Sstevel@tonic-gate  * There's no need to grab sq_putlocks here (which only exist for CIPUT
36187c478bd9Sstevel@tonic-gate  * sync queues). If it is CIPUT sync queue regular sq_count is incremented and
36197c478bd9Sstevel@tonic-gate  * it does not matter if any regular put entrypoints have been already
36207c478bd9Sstevel@tonic-gate  * entered.
36217c478bd9Sstevel@tonic-gate  */
36227c478bd9Sstevel@tonic-gate int
36237c478bd9Sstevel@tonic-gate infonext(queue_t *qp, infod_t *idp)
36247c478bd9Sstevel@tonic-gate {
36257c478bd9Sstevel@tonic-gate 	queue_t		*nqp;
36267c478bd9Sstevel@tonic-gate 	syncq_t		*sq;
36277c478bd9Sstevel@tonic-gate 	uint16_t	count;
36287c478bd9Sstevel@tonic-gate 	uint16_t 	flags;
36297c478bd9Sstevel@tonic-gate 	struct qinit	*qi;
36307c478bd9Sstevel@tonic-gate 	int		(*proc)();
36317c478bd9Sstevel@tonic-gate 	struct stdata	*stp;
36327c478bd9Sstevel@tonic-gate 	int		rval;
36337c478bd9Sstevel@tonic-gate 
36347c478bd9Sstevel@tonic-gate 	stp = STREAM(qp);
36357c478bd9Sstevel@tonic-gate 	/*
36367c478bd9Sstevel@tonic-gate 	 * Prevent q_next from changing by holding sd_lock until
36377c478bd9Sstevel@tonic-gate 	 * acquiring SQLOCK.
36387c478bd9Sstevel@tonic-gate 	 */
36397c478bd9Sstevel@tonic-gate 	mutex_enter(&stp->sd_lock);
36407c478bd9Sstevel@tonic-gate 	if ((nqp = _WR(qp)) == qp) {
36417c478bd9Sstevel@tonic-gate 		qp = nqp->q_next;
36427c478bd9Sstevel@tonic-gate 	} else {
36437c478bd9Sstevel@tonic-gate 		qp = _RD(nqp->q_next);
36447c478bd9Sstevel@tonic-gate 	}
36457c478bd9Sstevel@tonic-gate 	qi = qp->q_qinfo;
36467c478bd9Sstevel@tonic-gate 	if (qp->q_struiot == STRUIOT_NONE || ! (proc = qi->qi_infop)) {
36477c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
36487c478bd9Sstevel@tonic-gate 		return (EINVAL);
36497c478bd9Sstevel@tonic-gate 	}
36507c478bd9Sstevel@tonic-gate 	sq = qp->q_syncq;
36517c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36527c478bd9Sstevel@tonic-gate 	mutex_exit(&stp->sd_lock);
36537c478bd9Sstevel@tonic-gate 	count = sq->sq_count;
36547c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36557c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_ciputctrl == NULL || (flags & SQ_CIPUT));
36567c478bd9Sstevel@tonic-gate 
36577c478bd9Sstevel@tonic-gate 	while ((flags & SQ_GOAWAY) || (!(flags & SQ_CIPUT) && count != 0)) {
36587c478bd9Sstevel@tonic-gate 		/*
36597c478bd9Sstevel@tonic-gate 		 * Wait until we can enter the inner perimeter.
36607c478bd9Sstevel@tonic-gate 		 */
36617c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_WANTWAKEUP;
36627c478bd9Sstevel@tonic-gate 		cv_wait(&sq->sq_wait, SQLOCK(sq));
36637c478bd9Sstevel@tonic-gate 		count = sq->sq_count;
36647c478bd9Sstevel@tonic-gate 		flags = sq->sq_flags;
36657c478bd9Sstevel@tonic-gate 	}
36667c478bd9Sstevel@tonic-gate 
36677c478bd9Sstevel@tonic-gate 	if (! (flags & SQ_CIPUT))
36687c478bd9Sstevel@tonic-gate 		sq->sq_flags = flags | SQ_EXCL;
36697c478bd9Sstevel@tonic-gate 	sq->sq_count = count + 1;
36707c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);		/* Wraparound */
36717c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
36727c478bd9Sstevel@tonic-gate 
36737c478bd9Sstevel@tonic-gate 	rval = (*proc)(qp, idp);
36747c478bd9Sstevel@tonic-gate 
36757c478bd9Sstevel@tonic-gate 	mutex_enter(SQLOCK(sq));
36767c478bd9Sstevel@tonic-gate 	flags = sq->sq_flags;
36777c478bd9Sstevel@tonic-gate 	ASSERT(sq->sq_count != 0);
36787c478bd9Sstevel@tonic-gate 	sq->sq_count--;
36797c478bd9Sstevel@tonic-gate 	if (flags & SQ_TAIL) {
36807c478bd9Sstevel@tonic-gate 		putnext_tail(sq, qp, flags);
36817c478bd9Sstevel@tonic-gate 		/*
36827c478bd9Sstevel@tonic-gate 		 * The only purpose of this ASSERT is to preserve calling stack
36837c478bd9Sstevel@tonic-gate 		 * in DEBUG kernel.
36847c478bd9Sstevel@tonic-gate 		 */
36857c478bd9Sstevel@tonic-gate 		ASSERT(flags & SQ_TAIL);
36867c478bd9Sstevel@tonic-gate 		return (rval);
36877c478bd9Sstevel@tonic-gate 	}
36887c478bd9Sstevel@tonic-gate 	ASSERT(flags & (SQ_EXCL|SQ_CIPUT));
36897c478bd9Sstevel@tonic-gate /*
36907c478bd9Sstevel@tonic-gate  * XXXX
36917c478bd9Sstevel@tonic-gate  * I am not certain the next comment is correct here.  I need to consider
36927c478bd9Sstevel@tonic-gate  * why the infonext is called, and if dropping SQ_EXCL unless non-CIPUT
36937c478bd9Sstevel@tonic-gate  * might cause other problems.  It just might be safer to drop it if
36947c478bd9Sstevel@tonic-gate  * !SQ_CIPUT because that is when we set it.
36957c478bd9Sstevel@tonic-gate  */
36967c478bd9Sstevel@tonic-gate 	/*
36977c478bd9Sstevel@tonic-gate 	 * Safe to always drop SQ_EXCL:
36987c478bd9Sstevel@tonic-gate 	 *	Not SQ_CIPUT means we set SQ_EXCL above
36997c478bd9Sstevel@tonic-gate 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put procedure
37007c478bd9Sstevel@tonic-gate 	 *	did a qwriter(INNER) in which case nobody else
37017c478bd9Sstevel@tonic-gate 	 *	is in the inner perimeter and we are exiting.
37027c478bd9Sstevel@tonic-gate 	 *
37037c478bd9Sstevel@tonic-gate 	 * I would like to make the following assertion:
37047c478bd9Sstevel@tonic-gate 	 *
37057c478bd9Sstevel@tonic-gate 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
37067c478bd9Sstevel@tonic-gate 	 *	sq->sq_count == 0);
37077c478bd9Sstevel@tonic-gate 	 *
37087c478bd9Sstevel@tonic-gate 	 * which indicates that if we are both putshared and exclusive,
37097c478bd9Sstevel@tonic-gate 	 * we became exclusive while executing the putproc, and the only
37107c478bd9Sstevel@tonic-gate 	 * claim on the syncq was the one we dropped a few lines above.
37117c478bd9Sstevel@tonic-gate 	 * But other threads that enter putnext while the syncq is exclusive
37127c478bd9Sstevel@tonic-gate 	 * need to make a claim as they may need to drop SQLOCK in the
37137c478bd9Sstevel@tonic-gate 	 * has_writers case to avoid deadlocks.  If these threads are
37147c478bd9Sstevel@tonic-gate 	 * delayed or preempted, it is possible that the writer thread can
37157c478bd9Sstevel@tonic-gate 	 * find out that there are other claims making the (sq_count == 0)
37167c478bd9Sstevel@tonic-gate 	 * test invalid.
37177c478bd9Sstevel@tonic-gate 	 */
37187c478bd9Sstevel@tonic-gate 
37197c478bd9Sstevel@tonic-gate 	sq->sq_flags = flags & ~SQ_EXCL;
37207c478bd9Sstevel@tonic-gate 	mutex_exit(SQLOCK(sq));
37217c478bd9Sstevel@tonic-gate 	return (rval);
37227c478bd9Sstevel@tonic-gate }
37237c478bd9Sstevel@tonic-gate 
37247c478bd9Sstevel@tonic-gate /*
37257c478bd9Sstevel@tonic-gate  * Return nonzero if the queue is responsible for struio(), else return 0.
37267c478bd9Sstevel@tonic-gate  */
37277c478bd9Sstevel@tonic-gate int
37287c478bd9Sstevel@tonic-gate isuioq(queue_t *q)
37297c478bd9Sstevel@tonic-gate {
37307c478bd9Sstevel@tonic-gate 	if (q->q_flag & QREADR)
37317c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiordq == q);
37327c478bd9Sstevel@tonic-gate 	else
37337c478bd9Sstevel@tonic-gate 		return (STREAM(q)->sd_struiowrq == q);
37347c478bd9Sstevel@tonic-gate }
37357c478bd9Sstevel@tonic-gate 
37367c478bd9Sstevel@tonic-gate #if defined(__sparc)
37377c478bd9Sstevel@tonic-gate int disable_putlocks = 0;
37387c478bd9Sstevel@tonic-gate #else
37397c478bd9Sstevel@tonic-gate int disable_putlocks = 1;
37407c478bd9Sstevel@tonic-gate #endif
37417c478bd9Sstevel@tonic-gate 
37427c478bd9Sstevel@tonic-gate /*
37437c478bd9Sstevel@tonic-gate  * called by create_putlock.
37447c478bd9Sstevel@tonic-gate  */
37457c478bd9Sstevel@tonic-gate static void
37467c478bd9Sstevel@tonic-gate create_syncq_putlocks(queue_t *q)
37477c478bd9Sstevel@tonic-gate {
37487c478bd9Sstevel@tonic-gate 	syncq_t	*sq = q->q_syncq;
37497c478bd9Sstevel@tonic-gate 	ciputctrl_t *cip;
37507c478bd9Sstevel@tonic-gate 	int i;
37517c478bd9Sstevel@tonic-gate 
37527c478bd9Sstevel@tonic-gate 	ASSERT(sq != NULL);
37537c478bd9Sstevel@tonic-gate 
37547c478bd9Sstevel@tonic-gate 	ASSERT(disable_putlocks == 0);
37557c478bd9Sstevel@tonic-gate 	ASSERT(n_ciputctrl >= min_n_ciputctrl);
37567c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
37577c478bd9Sstevel@tonic-gate 
37587c478bd9Sstevel@tonic-gate 	if (!(sq->sq_type & SQ_CIPUT))
37597c478bd9Sstevel@tonic-gate 		return;
37607c478bd9Sstevel@tonic-gate 
37617c478bd9Sstevel@tonic-gate 	for (i = 0; i <= 1; i++) {
37627c478bd9Sstevel@tonic-gate 		if (sq->sq_ciputctrl == NULL) {
37637c478bd9Sstevel@tonic-gate 			cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
37647c478bd9Sstevel@tonic-gate 			SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
37657c478bd9Sstevel@tonic-gate 			mutex_enter(SQLOCK(sq));
37667c478bd9Sstevel@tonic-gate 			if (sq->sq_ciputctrl != NULL) {
37677c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
37687c478bd9Sstevel@tonic-gate 				kmem_cache_free(ciputctrl_cache, cip);
37697c478bd9Sstevel@tonic-gate 			} else {
37707c478bd9Sstevel@tonic-gate 				ASSERT(sq->sq_nciputctrl == 0);
37717c478bd9Sstevel@tonic-gate 				sq->sq_nciputctrl = n_ciputctrl - 1;
37727c478bd9Sstevel@tonic-gate 				/*
37737c478bd9Sstevel@tonic-gate 				 * putnext checks sq_ciputctrl without holding
37747c478bd9Sstevel@tonic-gate 				 * SQLOCK. if it is not NULL putnext assumes
37757c478bd9Sstevel@tonic-gate 				 * sq_nciputctrl is initialized. membar below
37767c478bd9Sstevel@tonic-gate 				 * insures that.
37777c478bd9Sstevel@tonic-gate 				 */
37787c478bd9Sstevel@tonic-gate 				membar_producer();
37797c478bd9Sstevel@tonic-gate 				sq->sq_ciputctrl = cip;
37807c478bd9Sstevel@tonic-gate 				mutex_exit(SQLOCK(sq));
37817c478bd9Sstevel@tonic-gate 			}
37827c478bd9Sstevel@tonic-gate 		}
37837c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_nciputctrl == n_ciputctrl - 1);
37847c478bd9Sstevel@tonic-gate 		if (i == 1)
37857c478bd9Sstevel@tonic-gate 			break;
37867c478bd9Sstevel@tonic-gate 		q = _OTHERQ(q);
37877c478bd9Sstevel@tonic-gate 		if (!(q->q_flag & QPERQ)) {
37887c478bd9Sstevel@tonic-gate 			ASSERT(sq == q->q_syncq);
37897c478bd9Sstevel@tonic-gate 			break;
37907c478bd9Sstevel@tonic-gate 		}
37917c478bd9Sstevel@tonic-gate 		ASSERT(q->q_syncq != NULL);
37927c478bd9Sstevel@tonic-gate 		ASSERT(sq != q->q_syncq);
37937c478bd9Sstevel@tonic-gate 		sq = q->q_syncq;
37947c478bd9Sstevel@tonic-gate 		ASSERT(sq->sq_type & SQ_CIPUT);
37957c478bd9Sstevel@tonic-gate 	}
37967c478bd9Sstevel@tonic-gate }
37977c478bd9Sstevel@tonic-gate 
37987c478bd9Sstevel@tonic-gate /*
37997c478bd9Sstevel@tonic-gate  * If stream argument is 0 only create per cpu sq_putlocks/sq_putcounts for
38007c478bd9Sstevel@tonic-gate  * syncq of q. If stream argument is not 0 create per cpu stream_putlocks for
38017c478bd9Sstevel@tonic-gate  * the stream of q and per cpu sq_putlocks/sq_putcounts for all syncq's
38027c478bd9Sstevel@tonic-gate  * starting from q and down to the driver.
38037c478bd9Sstevel@tonic-gate  *
38047c478bd9Sstevel@tonic-gate  * This should be called after the affected queues are part of stream
38057c478bd9Sstevel@tonic-gate  * geometry. It should be called from driver/module open routine after
38067c478bd9Sstevel@tonic-gate  * qprocson() call. It is also called from nfs syscall where it is known that
38077c478bd9Sstevel@tonic-gate  * stream is configured and won't change its geometry during create_putlock
38087c478bd9Sstevel@tonic-gate  * call.
38097c478bd9Sstevel@tonic-gate  *
38107c478bd9Sstevel@tonic-gate  * caller normally uses 0 value for the stream argument to speed up MT putnext
38117c478bd9Sstevel@tonic-gate  * into the perimeter of q for example because its perimeter is per module
38127c478bd9Sstevel@tonic-gate  * (e.g. IP).
38137c478bd9Sstevel@tonic-gate  *
38147c478bd9Sstevel@tonic-gate  * caller normally uses non 0 value for the stream argument to hint the system
38157c478bd9Sstevel@tonic-gate  * that the stream of q is a very contended global system stream
38167c478bd9Sstevel@tonic-gate  * (e.g. NFS/UDP) and the part of the stream from q to the driver is
38177c478bd9Sstevel@tonic-gate  * particularly MT hot.
38187c478bd9Sstevel@tonic-gate  *
38197c478bd9Sstevel@tonic-gate  * Caller insures stream plumbing won't happen while we are here and therefore
38207c478bd9Sstevel@tonic-gate  * q_next can be safely used.
38217c478bd9Sstevel@tonic-gate  */
38227c478bd9Sstevel@tonic-gate 
38237c478bd9Sstevel@tonic-gate void
38247c478bd9Sstevel@tonic-gate create_putlocks(queue_t *q, int stream)
38257c478bd9Sstevel@tonic-gate {
38267c478bd9Sstevel@tonic-gate 	ciputctrl_t	*cip;
38277c478bd9Sstevel@tonic-gate 	struct stdata	*stp = STREAM(q);
38287c478bd9Sstevel@tonic-gate 
38297c478bd9Sstevel@tonic-gate 	q = _WR(q);
38307c478bd9Sstevel@tonic-gate 	ASSERT(stp != NULL);
38317c478bd9Sstevel@tonic-gate 
38327c478bd9Sstevel@tonic-gate 	if (disable_putlocks != 0)
38337c478bd9Sstevel@tonic-gate 		return;
38347c478bd9Sstevel@tonic-gate 
38357c478bd9Sstevel@tonic-gate 	if (n_ciputctrl < min_n_ciputctrl)
38367c478bd9Sstevel@tonic-gate 		return;
38377c478bd9Sstevel@tonic-gate 
38387c478bd9Sstevel@tonic-gate 	ASSERT(ciputctrl_cache != NULL);
38397c478bd9Sstevel@tonic-gate 
38407c478bd9Sstevel@tonic-gate 	if (stream != 0 && stp->sd_ciputctrl == NULL) {
38417c478bd9Sstevel@tonic-gate 		cip = kmem_cache_alloc(ciputctrl_cache, KM_SLEEP);
38427c478bd9Sstevel@tonic-gate 		SUMCHECK_CIPUTCTRL_COUNTS(cip, n_ciputctrl - 1, 0);
38437c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
38447c478bd9Sstevel@tonic-gate 		if (stp->sd_ciputctrl != NULL) {
38457c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38467c478bd9Sstevel@tonic-gate 			kmem_cache_free(ciputctrl_cache, cip);
38477c478bd9Sstevel@tonic-gate 		} else {
38487c478bd9Sstevel@tonic-gate 			ASSERT(stp->sd_nciputctrl == 0);
38497c478bd9Sstevel@tonic-gate 			stp->sd_nciputctrl = n_ciputctrl - 1;
38507c478bd9Sstevel@tonic-gate 			/*
38517c478bd9Sstevel@tonic-gate 			 * putnext checks sd_ciputctrl without holding
38527c478bd9Sstevel@tonic-gate 			 * sd_lock. if it is not NULL putnext assumes
38537c478bd9Sstevel@tonic-gate 			 * sd_nciputctrl is initialized. membar below
38547c478bd9Sstevel@tonic-gate 			 * insures that.
38557c478bd9Sstevel@tonic-gate 			 */
38567c478bd9Sstevel@tonic-gate 			membar_producer();
38577c478bd9Sstevel@tonic-gate 			stp->sd_ciputctrl = cip;
38587c478bd9Sstevel@tonic-gate 			mutex_exit(&stp->sd_lock);
38597c478bd9Sstevel@tonic-gate 		}
38607c478bd9Sstevel@tonic-gate 	}
38617c478bd9Sstevel@tonic-gate 
38627c478bd9Sstevel@tonic-gate 	ASSERT(stream == 0 || stp->sd_nciputctrl == n_ciputctrl - 1);
38637c478bd9Sstevel@tonic-gate 
38647c478bd9Sstevel@tonic-gate 	while (_SAMESTR(q)) {
38657c478bd9Sstevel@tonic-gate 		create_syncq_putlocks(q);
38667c478bd9Sstevel@tonic-gate 		if (stream == 0)
38677c478bd9Sstevel@tonic-gate 			return;
38687c478bd9Sstevel@tonic-gate 		q = q->q_next;
38697c478bd9Sstevel@tonic-gate 	}
38707c478bd9Sstevel@tonic-gate 	ASSERT(q != NULL);
38717c478bd9Sstevel@tonic-gate 	create_syncq_putlocks(q);
38727c478bd9Sstevel@tonic-gate }
38737c478bd9Sstevel@tonic-gate 
38747c478bd9Sstevel@tonic-gate /*
38757c478bd9Sstevel@tonic-gate  * STREAMS Flow Trace - record STREAMS Flow Trace events as an mblk flows
38767c478bd9Sstevel@tonic-gate  * through a stream.
38777c478bd9Sstevel@tonic-gate  *
38787c478bd9Sstevel@tonic-gate  * Data currently record per event is a hrtime stamp, queue address, event
38797c478bd9Sstevel@tonic-gate  * type, and a per type datum.  Much of the STREAMS framework is instrumented
38807c478bd9Sstevel@tonic-gate  * for automatic flow tracing (when enabled).  Events can be defined and used
38817c478bd9Sstevel@tonic-gate  * by STREAMS modules and drivers.
38827c478bd9Sstevel@tonic-gate  *
38837c478bd9Sstevel@tonic-gate  * Global objects:
38847c478bd9Sstevel@tonic-gate  *
38857c478bd9Sstevel@tonic-gate  *	str_ftevent() - Add a flow-trace event to a dblk.
38867c478bd9Sstevel@tonic-gate  *	str_ftfree() - Free flow-trace data
38877c478bd9Sstevel@tonic-gate  *
38887c478bd9Sstevel@tonic-gate  * Local objects:
38897c478bd9Sstevel@tonic-gate  *
38907c478bd9Sstevel@tonic-gate  *	fthdr_cache - pointer to the kmem cache for trace header.
38917c478bd9Sstevel@tonic-gate  *	ftblk_cache - pointer to the kmem cache for trace data blocks.
38927c478bd9Sstevel@tonic-gate  */
38937c478bd9Sstevel@tonic-gate 
38947c478bd9Sstevel@tonic-gate int str_ftnever = 1;	/* Don't do STREAMS flow tracing */
38957c478bd9Sstevel@tonic-gate 
38967c478bd9Sstevel@tonic-gate void
38977c478bd9Sstevel@tonic-gate str_ftevent(fthdr_t *hp, void *p, ushort_t evnt, ushort_t data)
38987c478bd9Sstevel@tonic-gate {
38997c478bd9Sstevel@tonic-gate 	ftblk_t *bp = hp->tail;
39007c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
39017c478bd9Sstevel@tonic-gate 	ftevnt_t *ep;
39027c478bd9Sstevel@tonic-gate 	int ix, nix;
39037c478bd9Sstevel@tonic-gate 
39047c478bd9Sstevel@tonic-gate 	ASSERT(hp != NULL);
39057c478bd9Sstevel@tonic-gate 
39067c478bd9Sstevel@tonic-gate 	for (;;) {
39077c478bd9Sstevel@tonic-gate 		if ((ix = bp->ix) == FTBLK_EVNTS) {
39087c478bd9Sstevel@tonic-gate 			/*
39097c478bd9Sstevel@tonic-gate 			 * Tail doesn't have room, so need a new tail.
39107c478bd9Sstevel@tonic-gate 			 *
39117c478bd9Sstevel@tonic-gate 			 * To make this MT safe, first, allocate a new
39127c478bd9Sstevel@tonic-gate 			 * ftblk, and initialize it.  To make life a
39137c478bd9Sstevel@tonic-gate 			 * little easier, reserve the first slot (mostly
39147c478bd9Sstevel@tonic-gate 			 * by making ix = 1).  When we are finished with
39157c478bd9Sstevel@tonic-gate 			 * the initialization, CAS this pointer to the
39167c478bd9Sstevel@tonic-gate 			 * tail.  If this succeeds, this is the new
39177c478bd9Sstevel@tonic-gate 			 * "next" block.  Otherwise, another thread
39187c478bd9Sstevel@tonic-gate 			 * got here first, so free the block and start
39197c478bd9Sstevel@tonic-gate 			 * again.
39207c478bd9Sstevel@tonic-gate 			 */
39217c478bd9Sstevel@tonic-gate 			if (!(nbp = kmem_cache_alloc(ftblk_cache,
39227c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP))) {
39237c478bd9Sstevel@tonic-gate 				/* no mem, so punt */
39247c478bd9Sstevel@tonic-gate 				str_ftnever++;
39257c478bd9Sstevel@tonic-gate 				/* free up all flow data? */
39267c478bd9Sstevel@tonic-gate 				return;
39277c478bd9Sstevel@tonic-gate 			}
39287c478bd9Sstevel@tonic-gate 			nbp->nxt = NULL;
39297c478bd9Sstevel@tonic-gate 			nbp->ix = 1;
39307c478bd9Sstevel@tonic-gate 			/*
39317c478bd9Sstevel@tonic-gate 			 * Just in case there is another thread about
39327c478bd9Sstevel@tonic-gate 			 * to get the next index, we need to make sure
39337c478bd9Sstevel@tonic-gate 			 * the value is there for it.
39347c478bd9Sstevel@tonic-gate 			 */
39357c478bd9Sstevel@tonic-gate 			membar_producer();
39367c478bd9Sstevel@tonic-gate 			if (casptr(&hp->tail, bp, nbp) == bp) {
39377c478bd9Sstevel@tonic-gate 				/* CAS was successful */
39387c478bd9Sstevel@tonic-gate 				bp->nxt = nbp;
39397c478bd9Sstevel@tonic-gate 				membar_producer();
39407c478bd9Sstevel@tonic-gate 				bp = nbp;
39417c478bd9Sstevel@tonic-gate 				ix = 0;
39427c478bd9Sstevel@tonic-gate 				goto cas_good;
39437c478bd9Sstevel@tonic-gate 			} else {
39447c478bd9Sstevel@tonic-gate 				kmem_cache_free(ftblk_cache, nbp);
39457c478bd9Sstevel@tonic-gate 				bp = hp->tail;
39467c478bd9Sstevel@tonic-gate 				continue;
39477c478bd9Sstevel@tonic-gate 			}
39487c478bd9Sstevel@tonic-gate 		}
39497c478bd9Sstevel@tonic-gate 		nix = ix + 1;
39507c478bd9Sstevel@tonic-gate 		if (cas32((uint32_t *)&bp->ix, ix, nix) == ix) {
39517c478bd9Sstevel@tonic-gate 		cas_good:
39527c478bd9Sstevel@tonic-gate 			if (curthread != hp->thread) {
39537c478bd9Sstevel@tonic-gate 				hp->thread = curthread;
39547c478bd9Sstevel@tonic-gate 				evnt |= FTEV_CS;
39557c478bd9Sstevel@tonic-gate 			}
39567c478bd9Sstevel@tonic-gate 			if (CPU->cpu_seqid != hp->cpu_seqid) {
39577c478bd9Sstevel@tonic-gate 				hp->cpu_seqid = CPU->cpu_seqid;
39587c478bd9Sstevel@tonic-gate 				evnt |= FTEV_PS;
39597c478bd9Sstevel@tonic-gate 			}
39607c478bd9Sstevel@tonic-gate 			ep = &bp->ev[ix];
39617c478bd9Sstevel@tonic-gate 			break;
39627c478bd9Sstevel@tonic-gate 		}
39637c478bd9Sstevel@tonic-gate 	}
39647c478bd9Sstevel@tonic-gate 
39657c478bd9Sstevel@tonic-gate 	if (evnt & FTEV_QMASK) {
39667c478bd9Sstevel@tonic-gate 		queue_t *qp = p;
39677c478bd9Sstevel@tonic-gate 
39687c478bd9Sstevel@tonic-gate 		/*
39697c478bd9Sstevel@tonic-gate 		 * It is possible that the module info is broke
39707c478bd9Sstevel@tonic-gate 		 * (as is logsubr.c at this comment writing).
39717c478bd9Sstevel@tonic-gate 		 * Instead of panicing or doing other unmentionables,
39727c478bd9Sstevel@tonic-gate 		 * we shall put a dummy name as the mid, and continue.
39737c478bd9Sstevel@tonic-gate 		 */
39747c478bd9Sstevel@tonic-gate 		if (qp->q_qinfo == NULL)
39757c478bd9Sstevel@tonic-gate 			ep->mid = "NONAME";
39767c478bd9Sstevel@tonic-gate 		else
39777c478bd9Sstevel@tonic-gate 			ep->mid = qp->q_qinfo->qi_minfo->mi_idname;
39787c478bd9Sstevel@tonic-gate 
39797c478bd9Sstevel@tonic-gate 		if (!(qp->q_flag & QREADR))
39807c478bd9Sstevel@tonic-gate 			evnt |= FTEV_ISWR;
39817c478bd9Sstevel@tonic-gate 	} else {
39827c478bd9Sstevel@tonic-gate 		ep->mid = (char *)p;
39837c478bd9Sstevel@tonic-gate 	}
39847c478bd9Sstevel@tonic-gate 
39857c478bd9Sstevel@tonic-gate 	ep->ts = gethrtime();
39867c478bd9Sstevel@tonic-gate 	ep->evnt = evnt;
39877c478bd9Sstevel@tonic-gate 	ep->data = data;
39887c478bd9Sstevel@tonic-gate 	hp->hash = (hp->hash << 9) + hp->hash;
39897c478bd9Sstevel@tonic-gate 	hp->hash += (evnt << 16) | data;
39907c478bd9Sstevel@tonic-gate 	hp->hash += (uintptr_t)ep->mid;
39917c478bd9Sstevel@tonic-gate }
39927c478bd9Sstevel@tonic-gate 
39937c478bd9Sstevel@tonic-gate /*
39947c478bd9Sstevel@tonic-gate  * Free flow-trace data.
39957c478bd9Sstevel@tonic-gate  */
39967c478bd9Sstevel@tonic-gate void
39977c478bd9Sstevel@tonic-gate str_ftfree(dblk_t *dbp)
39987c478bd9Sstevel@tonic-gate {
39997c478bd9Sstevel@tonic-gate 	fthdr_t *hp = dbp->db_fthdr;
40007c478bd9Sstevel@tonic-gate 	ftblk_t *bp = &hp->first;
40017c478bd9Sstevel@tonic-gate 	ftblk_t *nbp;
40027c478bd9Sstevel@tonic-gate 
40037c478bd9Sstevel@tonic-gate 	if (bp != hp->tail || bp->ix != 0) {
40047c478bd9Sstevel@tonic-gate 		/*
40057c478bd9Sstevel@tonic-gate 		 * Clear out the hash, have the tail point to itself, and free
40067c478bd9Sstevel@tonic-gate 		 * any continuation blocks.
40077c478bd9Sstevel@tonic-gate 		 */
40087c478bd9Sstevel@tonic-gate 		bp = hp->first.nxt;
40097c478bd9Sstevel@tonic-gate 		hp->tail = &hp->first;
40107c478bd9Sstevel@tonic-gate 		hp->hash = 0;
40117c478bd9Sstevel@tonic-gate 		hp->first.nxt = NULL;
40127c478bd9Sstevel@tonic-gate 		hp->first.ix = 0;
40137c478bd9Sstevel@tonic-gate 		while (bp != NULL) {
40147c478bd9Sstevel@tonic-gate 			nbp = bp->nxt;
40157c478bd9Sstevel@tonic-gate 			kmem_cache_free(ftblk_cache, bp);
40167c478bd9Sstevel@tonic-gate 			bp = nbp;
40177c478bd9Sstevel@tonic-gate 		}
40187c478bd9Sstevel@tonic-gate 	}
40197c478bd9Sstevel@tonic-gate 	kmem_cache_free(fthdr_cache, hp);
40207c478bd9Sstevel@tonic-gate 	dbp->db_fthdr = NULL;
40217c478bd9Sstevel@tonic-gate }
4022