xref: /illumos-gate/usr/src/uts/common/sys/strsubr.h (revision 45ede40b2394db7967e59f19288fae9b62efd4aa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved	*/
23 
24 
25 /*
26  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 /*
32  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
33  */
34 
35 #ifndef _SYS_STRSUBR_H
36 #define	_SYS_STRSUBR_H
37 
38 /*
39  * WARNING:
40  * Everything in this file is private, belonging to the
41  * STREAMS subsystem.  The only guarantee made about the
42  * contents of this file is that if you include it, your
43  * code will not port to the next release.
44  */
45 #include <sys/stream.h>
46 #include <sys/stropts.h>
47 #include <sys/vnode.h>
48 #include <sys/kstat.h>
49 #include <sys/uio.h>
50 #include <sys/proc.h>
51 #include <sys/netstack.h>
52 #include <sys/modhash.h>
53 
54 #ifdef	__cplusplus
55 extern "C" {
56 #endif
57 
58 /*
59  * In general, the STREAMS locks are disjoint; they are only held
60  * locally, and not simultaneously by a thread.  However, module
61  * code, including at the stream head, requires some locks to be
62  * acquired in order for its safety.
63  *	1. Stream level claim.  This prevents the value of q_next
64  *		from changing while module code is executing.
65  *	2. Queue level claim.  This prevents the value of q_ptr
66  *		from changing while put or service code is executing.
67  *		In addition, it provides for queue single-threading
68  *		for QPAIR and PERQ MT-safe modules.
69  *	3. Stream head lock.  May be held by the stream head module
70  *		to implement a read/write/open/close monitor.
71  *	   Note: that the only types of twisted stream supported are
72  *	   the pipe and transports which have read and write service
73  *	   procedures on both sides of the twist.
74  *	4. Queue lock.  May be acquired by utility routines on
75  *		behalf of a module.
76  */
77 
78 /*
79  * In general, sd_lock protects the consistency of the stdata
80  * structure.  Additionally, it is used with sd_monitor
81  * to implement an open/close monitor.  In particular, it protects
82  * the following fields:
83  *	sd_iocblk
84  *	sd_flag
85  *	sd_copyflag
86  *	sd_iocid
87  *	sd_iocwait
88  *	sd_sidp
89  *	sd_pgidp
90  *	sd_wroff
91  *	sd_tail
92  *	sd_rerror
93  *	sd_werror
94  *	sd_pushcnt
95  *	sd_sigflags
96  *	sd_siglist
97  *	sd_pollist
98  *	sd_mark
99  *	sd_closetime
100  *	sd_wakeq
101  *	sd_maxblk
102  *
103  * The following fields are modified only by the allocator, which
104  * has exclusive access to them at that time:
105  *	sd_wrq
106  *	sd_strtab
107  *
108  * The following field is protected by the overlying file system
109  * code, guaranteeing single-threading of opens:
110  *	sd_vnode
111  *
112  * Stream-level locks should be acquired before any queue-level locks
113  *	are acquired.
114  *
115  * The stream head write queue lock(sd_wrq) is used to protect the
116  * fields qn_maxpsz and qn_minpsz because freezestr() which is
117  * necessary for strqset() only gets the queue lock.
118  */
119 
120 /*
121  * Function types for the parameterized stream head.
122  * The msgfunc_t takes the parameters:
123  *	msgfunc(vnode_t *vp, mblk_t *mp, strwakeup_t *wakeups,
124  *		strsigset_t *firstmsgsigs, strsigset_t *allmsgsigs,
125  *		strpollset_t *pollwakeups);
126  * It returns an optional message to be processed by the stream head.
127  *
128  * The parameters for errfunc_t are:
129  *	errfunc(vnode *vp, int ispeek, int *clearerr);
130  * It returns an errno and zero if there was no pending error.
131  */
132 typedef uint_t	strwakeup_t;
133 typedef uint_t	strsigset_t;
134 typedef short	strpollset_t;
135 typedef uintptr_t callbparams_id_t;
136 typedef	mblk_t	*(*msgfunc_t)(vnode_t *, mblk_t *, strwakeup_t *,
137 			strsigset_t *, strsigset_t *, strpollset_t *);
138 typedef int	(*errfunc_t)(vnode_t *, int, int *);
139 
140 /*
141  * Per stream sd_lock in putnext may be replaced by per cpu stream_putlocks
142  * each living in a separate cache line. putnext/canputnext grabs only one of
143  * stream_putlocks while strlock() (called on behalf of insertq()/removeq())
144  * acquires all stream_putlocks. Normally stream_putlocks are only employed
145  * for highly contended streams that have SQ_CIPUT queues in the critical path
146  * (e.g. NFS/UDP stream).
147  *
148  * stream_putlocks are dynamically assigned to stdata structure through
149  * sd_ciputctrl pointer possibly when a stream is already in use. Since
150  * strlock() uses stream_putlocks only under sd_lock acquiring sd_lock when
151  * assigning stream_putlocks to the stream ensures synchronization with
152  * strlock().
153  *
154  * For lock ordering purposes stream_putlocks are treated as the extension of
155  * sd_lock and are always grabbed right after grabbing sd_lock and released
156  * right before releasing sd_lock except putnext/canputnext where only one of
157  * stream_putlocks locks is used and where it is the first lock to grab.
158  */
159 
160 typedef struct ciputctrl_str {
161 	union _ciput_un {
162 		uchar_t	pad[64];
163 		struct _ciput_str {
164 			kmutex_t	ciput_lck;
165 			ushort_t	ciput_cnt;
166 		} ciput_str;
167 	} ciput_un;
168 } ciputctrl_t;
169 
170 #define	ciputctrl_lock	ciput_un.ciput_str.ciput_lck
171 #define	ciputctrl_count	ciput_un.ciput_str.ciput_cnt
172 
173 /*
174  * Header for a stream: interface to rest of system.
175  *
176  * NOTE: While this is a consolidation-private structure, some unbundled and
177  *       third-party products inappropriately make use of some of the fields.
178  *       As such, please take care to not gratuitously change any offsets of
179  *       existing members.
180  */
181 typedef struct stdata {
182 	struct queue	*sd_wrq;	/* write queue */
183 	struct msgb	*sd_iocblk;	/* return block for ioctl */
184 	struct vnode	*sd_vnode;	/* pointer to associated vnode */
185 	struct streamtab *sd_strtab;	/* pointer to streamtab for stream */
186 	uint_t		sd_flag;	/* state/flags */
187 	uint_t		sd_iocid;	/* ioctl id */
188 	struct pid	*sd_sidp;	/* controlling session info */
189 	struct pid	*sd_pgidp;	/* controlling process group info */
190 	ushort_t	sd_tail;	/* reserved space in written mblks */
191 	ushort_t	sd_wroff;	/* write offset */
192 	int		sd_rerror;	/* error to return on read ops */
193 	int		sd_werror;	/* error to return on write ops */
194 	int		sd_pushcnt;	/* number of pushes done on stream */
195 	int		sd_sigflags;	/* logical OR of all siglist events */
196 	struct strsig	*sd_siglist;	/* pid linked list to rcv SIGPOLL sig */
197 	struct pollhead sd_pollist;	/* list of all pollers to wake up */
198 	struct msgb	*sd_mark;	/* "marked" message on read queue */
199 	clock_t		sd_closetime;	/* time to wait to drain q in close */
200 	kmutex_t	sd_lock;	/* protect head consistency */
201 	kcondvar_t	sd_monitor;	/* open/close/push/pop monitor */
202 	kcondvar_t	sd_iocmonitor;	/* ioctl single-threading */
203 	kcondvar_t	sd_refmonitor;	/* sd_refcnt monitor */
204 	ssize_t		sd_qn_minpsz;	/* These two fields are a performance */
205 	ssize_t		sd_qn_maxpsz;	/* enhancements, cache the values in */
206 					/* the stream head so we don't have */
207 					/* to ask the module below the stream */
208 					/* head to get this information. */
209 	struct stdata	*sd_mate;	/* pointer to twisted stream mate */
210 	kthread_id_t	sd_freezer;	/* thread that froze stream */
211 	kmutex_t	sd_reflock;	/* Protects sd_refcnt */
212 	int		sd_refcnt;	/* number of claimstr */
213 	uint_t		sd_wakeq;	/* strwakeq()'s copy of sd_flag */
214 	struct queue	*sd_struiordq;	/* sync barrier struio() read queue */
215 	struct queue	*sd_struiowrq;	/* sync barrier struio() write queue */
216 	char		sd_struiodnak;	/* defer NAK of M_IOCTL by rput() */
217 	struct msgb	*sd_struionak;	/* pointer M_IOCTL mblk(s) to NAK */
218 	caddr_t		sd_t_audit_data; /* For audit purposes only */
219 	ssize_t		sd_maxblk;	/* maximum message block size */
220 	uint_t		sd_rput_opt;	/* options/flags for strrput */
221 	uint_t		sd_wput_opt;	/* options/flags for write/putmsg */
222 	uint_t		sd_read_opt;	/* options/flags for strread */
223 	msgfunc_t	sd_rprotofunc;	/* rput M_*PROTO routine */
224 	msgfunc_t	sd_rputdatafunc; /* read M_DATA routine */
225 	msgfunc_t	sd_rmiscfunc;	/* rput routine (non-data/proto) */
226 	msgfunc_t	sd_wputdatafunc; /* wput M_DATA routine */
227 	errfunc_t	sd_rderrfunc;	/* read side error callback */
228 	errfunc_t	sd_wrerrfunc;	/* write side error callback */
229 	/*
230 	 * support for low contention concurrent putnext.
231 	 */
232 	ciputctrl_t	*sd_ciputctrl;
233 	uint_t		sd_nciputctrl;
234 
235 	int		sd_anchor;	/* position of anchor in stream */
236 	/*
237 	 * Service scheduling at the stream head.
238 	 */
239 	kmutex_t	sd_qlock;
240 	struct queue	*sd_qhead;	/* Head of queues to be serviced. */
241 	struct queue	*sd_qtail;	/* Tail of queues to be serviced. */
242 	void		*sd_servid;	/* Service ID for bckgrnd schedule */
243 	ushort_t	sd_svcflags;	/* Servicing flags */
244 	short		sd_nqueues;	/* Number of queues in the list */
245 	kcondvar_t	sd_qcv;		/* Waiters for qhead to become empty */
246 	kcondvar_t	sd_zcopy_wait;
247 	uint_t		sd_copyflag;	/* copy-related flags */
248 	zoneid_t	sd_anchorzone;	/* Allow removal from same zone only */
249 	struct msgb	*sd_cmdblk;	/* reply from _I_CMD */
250 
251 	/*
252 	 * When a STREAMS device is cloned, the sd_vnode element of this
253 	 * structure is replaced by a pointer to a common vnode shared across
254 	 * all streams that are using the device. In this case, it is no longer
255 	 * possible to get from the stream head back to the original vnode via
256 	 * sd_vnode. Therefore, when such a device is cloned, the parent vnode -
257 	 * i.e. that which was created during the device clone in spec_clone()
258 	 * - is kept in sd_pvnode.
259 	 */
260 	struct vnode	*sd_pvnode;
261 } stdata_t;
262 
263 /*
264  * stdata servicing flags.
265  */
266 #define	STRS_WILLSERVICE	0x01
267 #define	STRS_SCHEDULED		0x02
268 
269 #define	STREAM_NEEDSERVICE(stp)	((stp)->sd_qhead != NULL)
270 
271 /*
272  * stdata flag field defines
273  */
274 #define	IOCWAIT		0x00000001	/* Someone is doing an ioctl */
275 #define	RSLEEP		0x00000002	/* Someone wants to read/recv msg */
276 #define	WSLEEP		0x00000004	/* Someone wants to write */
277 #define	STRPRI		0x00000008	/* An M_PCPROTO is at stream head */
278 #define	STRHUP		0x00000010	/* Device has vanished */
279 #define	STWOPEN		0x00000020	/* waiting for 1st open */
280 #define	STPLEX		0x00000040	/* stream is being multiplexed */
281 #define	STRISTTY	0x00000080	/* stream is a terminal */
282 #define	STRGETINPROG	0x00000100	/* (k)strgetmsg is running */
283 #define	IOCWAITNE	0x00000200	/* STR_NOERROR ioctl running */
284 #define	STRDERR		0x00000400	/* fatal read error from M_ERROR */
285 #define	STWRERR		0x00000800	/* fatal write error from M_ERROR */
286 #define	STRDERRNONPERSIST 0x00001000	/* nonpersistent read errors */
287 #define	STWRERRNONPERSIST 0x00002000	/* nonpersistent write errors */
288 #define	STRCLOSE	0x00004000	/* wait for a close to complete */
289 #define	SNDMREAD	0x00008000	/* used for read notification */
290 #define	OLDNDELAY	0x00010000	/* use old TTY semantics for */
291 					/* NDELAY reads and writes */
292 #define	STRXPG4TTY	0x00020000	/* Use XPG4 TTY semantics */
293 	/*		0x00040000	   unused */
294 #define	STRTOSTOP	0x00080000	/* block background writes */
295 #define	STRCMDWAIT	0x00100000	/* someone is doing an _I_CMD */
296 	/*		0x00200000	   unused */
297 #define	STRMOUNT	0x00400000	/* stream is mounted */
298 #define	STRNOTATMARK	0x00800000	/* Not at mark (when empty read q) */
299 #define	STRDELIM	0x01000000	/* generate delimited messages */
300 #define	STRATMARK	0x02000000	/* At mark (due to MSGMARKNEXT) */
301 #define	STZCNOTIFY	0x04000000	/* wait for zerocopy mblk to be acked */
302 #define	STRPLUMB	0x08000000	/* push/pop pending */
303 #define	STREOF		0x10000000	/* End-of-file indication */
304 #define	STREOPENFAIL	0x20000000	/* indicates if re-open has failed */
305 #define	STRMATE		0x40000000	/* this stream is a mate */
306 #define	STRHASLINKS	0x80000000	/* I_LINKs under this stream */
307 
308 /*
309  * Copy-related flags (sd_copyflag), set by SO_COPYOPT.
310  */
311 #define	STZCVMSAFE	0x00000001	/* safe to borrow file (segmapped) */
312 					/* pages instead of bcopy */
313 #define	STZCVMUNSAFE	0x00000002	/* unsafe to borrow file pages */
314 #define	STRCOPYCACHED	0x00000004	/* copy should NOT bypass cache */
315 
316 /*
317  * Options and flags for strrput (sd_rput_opt)
318  */
319 #define	SR_POLLIN	0x00000001	/* pollwakeup needed for band0 data */
320 #define	SR_SIGALLDATA	0x00000002	/* Send SIGPOLL for all M_DATA */
321 #define	SR_CONSOL_DATA	0x00000004	/* Consolidate M_DATA onto q_last */
322 #define	SR_IGN_ZEROLEN	0x00000008	/* Ignore zero-length M_DATA */
323 
324 /*
325  * Options and flags for strwrite/strputmsg (sd_wput_opt)
326  */
327 #define	SW_SIGPIPE	0x00000001	/* Send SIGPIPE for write error */
328 #define	SW_RECHECK_ERR	0x00000002	/* Recheck errors in strwrite loop */
329 #define	SW_SNDZERO	0x00000004	/* send 0-length msg down pipe/FIFO */
330 
331 /*
332  * Options and flags for strread (sd_read_opt)
333  */
334 #define	RD_MSGDIS	0x00000001	/* read msg discard */
335 #define	RD_MSGNODIS	0x00000002	/* read msg no discard */
336 #define	RD_PROTDAT	0x00000004	/* read M_[PC]PROTO contents as data */
337 #define	RD_PROTDIS	0x00000008	/* discard M_[PC]PROTO blocks and */
338 					/* retain data blocks */
339 /*
340  * Flags parameter for strsetrputhooks() and strsetwputhooks().
341  * These flags define the interface for setting the above internal
342  * flags in sd_rput_opt and sd_wput_opt.
343  */
344 #define	SH_CONSOL_DATA	0x00000001	/* Consolidate M_DATA onto q_last */
345 #define	SH_SIGALLDATA	0x00000002	/* Send SIGPOLL for all M_DATA */
346 #define	SH_IGN_ZEROLEN	0x00000004	/* Drop zero-length M_DATA */
347 
348 #define	SH_SIGPIPE	0x00000100	/* Send SIGPIPE for write error */
349 #define	SH_RECHECK_ERR	0x00000200	/* Recheck errors in strwrite loop */
350 
351 /*
352  * Each queue points to a sync queue (the inner perimeter) which keeps
353  * track of the number of threads that are inside a given queue (sq_count)
354  * and also is used to implement the asynchronous putnext
355  * (by queuing messages if the queue can not be entered.)
356  *
357  * Messages are queued on sq_head/sq_tail including deferred qwriter(INNER)
358  * messages. The sq_head/sq_tail list is a singly-linked list with
359  * b_queue recording the queue and b_prev recording the function to
360  * be called (either the put procedure or a qwriter callback function.)
361  *
362  * The sq_count counter tracks the number of threads that are
363  * executing inside the perimeter or (in the case of outer perimeters)
364  * have some work queued for them relating to the perimeter. The sq_rmqcount
365  * counter tracks the subset which are in removeq() (usually invoked from
366  * qprocsoff(9F)).
367  *
368  * In addition a module writer can declare that the module has an outer
369  * perimeter (by setting D_MTOUTPERIM) in which case all inner perimeter
370  * syncq's for the module point (through sq_outer) to an outer perimeter
371  * syncq. The outer perimeter consists of the doubly linked list (sq_onext and
372  * sq_oprev) linking all the inner perimeter syncq's with out outer perimeter
373  * syncq. This is used to implement qwriter(OUTER) (an asynchronous way of
374  * getting exclusive access at the outer perimeter) and outer_enter/exit
375  * which are used by the framework to acquire exclusive access to the outer
376  * perimeter during open and close of modules that have set D_MTOUTPERIM.
377  *
378  * In the inner perimeter case sq_save is available for use by machine
379  * dependent code. sq_head/sq_tail are used to queue deferred messages on
380  * the inner perimeter syncqs and to queue become_writer requests on the
381  * outer perimeter syncqs.
382  *
383  * Note: machine dependent optimized versions of putnext may depend
384  * on the order of sq_flags and sq_count (so that they can e.g.
385  * read these two fields in a single load instruction.)
386  *
387  * Per perimeter SQLOCK/sq_count in putnext/put may be replaced by per cpu
388  * sq_putlocks/sq_putcounts each living in a separate cache line. Obviously
389  * sq_putlock[x] protects sq_putcount[x]. putnext/put routine will grab only 1
390  * of sq_putlocks and update only 1 of sq_putcounts. strlock() and many
391  * other routines in strsubr.c and ddi.c will grab all sq_putlocks (as well as
392  * SQLOCK) and figure out the count value as the sum of sq_count and all of
393  * sq_putcounts. The idea is to make critical fast path -- putnext -- much
394  * faster at the expense of much less often used slower path like
395  * strlock(). One known case where entersq/strlock is executed pretty often is
396  * SpecWeb but since IP is SQ_CIOC and socket TCP/IP stream is nextless
397  * there's no need to grab multiple sq_putlocks and look at sq_putcounts. See
398  * strsubr.c for more comments.
399  *
400  * Note regular SQLOCK and sq_count are still used in many routines
401  * (e.g. entersq(), rwnext()) in the same way as before sq_putlocks were
402  * introduced.
403  *
404  * To understand when all sq_putlocks need to be held and all sq_putcounts
405  * need to be added up one needs to look closely at putnext code. Basically if
406  * a routine like e.g. wait_syncq() needs to be sure that perimeter is empty
407  * all sq_putlocks/sq_putcounts need to be held/added up. On the other hand
408  * there's no need to hold all sq_putlocks and count all sq_putcounts in
409  * routines like leavesq()/dropsq() and etc. since the are usually exit
410  * counterparts of entersq/outer_enter() and etc. which have already either
411  * prevented put entry poins from executing or did not care about put
412  * entrypoints. entersq() doesn't need to care about sq_putlocks/sq_putcounts
413  * if the entry point has a shared access since put has the highest degree of
414  * concurrency and such entersq() does not intend to block out put
415  * entrypoints.
416  *
417  * Before sq_putcounts were introduced the standard way to wait for perimeter
418  * to become empty was:
419  *
420  *	mutex_enter(SQLOCK(sq));
421  *	while (sq->sq_count > 0) {
422  *		sq->sq_flags |= SQ_WANTWAKEUP;
423  *		cv_wait(&sq->sq_wait, SQLOCK(sq));
424  *	}
425  *	mutex_exit(SQLOCK(sq));
426  *
427  * The new way is:
428  *
429  *	mutex_enter(SQLOCK(sq));
430  *	count = sq->sq_count;
431  *	SQ_PUTLOCKS_ENTER(sq);
432  *	SUM_SQ_PUTCOUNTS(sq, count);
433  *	while (count != 0) {
434  *		sq->sq_flags |= SQ_WANTWAKEUP;
435  *		SQ_PUTLOCKS_EXIT(sq);
436  *		cv_wait(&sq->sq_wait, SQLOCK(sq));
437  *		count = sq->sq_count;
438  *		SQ_PUTLOCKS_ENTER(sq);
439  *		SUM_SQ_PUTCOUNTS(sq, count);
440  *	}
441  *	SQ_PUTLOCKS_EXIT(sq);
442  *	mutex_exit(SQLOCK(sq));
443  *
444  * Note that SQ_WANTWAKEUP is set before dropping SQ_PUTLOCKS. This makes sure
445  * putnext won't skip a wakeup.
446  *
447  * sq_putlocks are treated as the extension of SQLOCK for lock ordering
448  * purposes and are always grabbed right after grabbing SQLOCK and released
449  * right before releasing SQLOCK. This also allows dynamic creation of
450  * sq_putlocks while holding SQLOCK (by making sq_ciputctrl non null even when
451  * the stream is already in use). Only in putnext one of sq_putlocks
452  * is grabbed instead of SQLOCK. putnext return path remembers what counter it
453  * incremented and decrements the right counter on its way out.
454  */
455 
456 struct syncq {
457 	kmutex_t	sq_lock;	/* atomic access to syncq */
458 	uint16_t	sq_count;	/* # threads inside */
459 	uint16_t	sq_flags;	/* state and some type info */
460 	/*
461 	 * Distributed syncq scheduling
462 	 *  The list of queue's is handled by sq_head and
463 	 *  sq_tail fields.
464 	 *
465 	 *  The list of events is handled by the sq_evhead and sq_evtail
466 	 *  fields.
467 	 */
468 	queue_t		*sq_head;	/* queue of deferred messages */
469 	queue_t		*sq_tail;	/* queue of deferred messages */
470 	mblk_t		*sq_evhead;	/* Event message on the syncq */
471 	mblk_t		*sq_evtail;
472 	uint_t		sq_nqueues;	/* # of queues on this sq */
473 	/*
474 	 * Concurrency and condition variables
475 	 */
476 	uint16_t	sq_type;	/* type (concurrency) of syncq */
477 	uint16_t	sq_rmqcount;	/* # threads inside removeq() */
478 	kcondvar_t	sq_wait;	/* block on this sync queue */
479 	kcondvar_t	sq_exitwait;	/* waiting for thread to leave the */
480 					/* inner perimeter */
481 	/*
482 	 * Handling synchronous callbacks such as qtimeout and qbufcall
483 	 */
484 	ushort_t	sq_callbflags;	/* flags for callback synchronization */
485 	callbparams_id_t sq_cancelid;	/* id of callback being cancelled */
486 	struct callbparams *sq_callbpend;	/* Pending callbacks */
487 
488 	/*
489 	 * Links forming an outer perimeter from one outer syncq and
490 	 * a set of inner sync queues.
491 	 */
492 	struct syncq	*sq_outer;	/* Pointer to outer perimeter */
493 	struct syncq	*sq_onext;	/* Linked list of syncq's making */
494 	struct syncq	*sq_oprev;	/* up the outer perimeter. */
495 	/*
496 	 * support for low contention concurrent putnext.
497 	 */
498 	ciputctrl_t	*sq_ciputctrl;
499 	uint_t		sq_nciputctrl;
500 	/*
501 	 * Counter for the number of threads wanting to become exclusive.
502 	 */
503 	uint_t		sq_needexcl;
504 	/*
505 	 * These two fields are used for scheduling a syncq for
506 	 * background processing. The sq_svcflag is protected by
507 	 * SQLOCK lock.
508 	 */
509 	struct syncq	*sq_next;	/* for syncq scheduling */
510 	void *		sq_servid;
511 	uint_t		sq_servcount;	/* # pending background threads */
512 	uint_t		sq_svcflags;	/* Scheduling flags	*/
513 	clock_t		sq_tstamp;	/* Time when was enabled */
514 	/*
515 	 * Maximum priority of the queues on this syncq.
516 	 */
517 	pri_t		sq_pri;
518 };
519 typedef struct syncq syncq_t;
520 
521 /*
522  * sync queue scheduling flags (for sq_svcflags).
523  */
524 #define	SQ_SERVICE	0x1		/* being serviced */
525 #define	SQ_BGTHREAD	0x2		/* awaiting service by bg thread */
526 #define	SQ_DISABLED	0x4		/* don't put syncq in service list */
527 
528 /*
529  * FASTPUT bit in sd_count/putcount.
530  */
531 #define	SQ_FASTPUT	0x8000
532 #define	SQ_FASTMASK	0x7FFF
533 
534 /*
535  * sync queue state flags
536  */
537 #define	SQ_EXCL		0x0001		/* exclusive access to inner */
538 					/*	perimeter */
539 #define	SQ_BLOCKED	0x0002		/* qprocsoff */
540 #define	SQ_FROZEN	0x0004		/* freezestr */
541 #define	SQ_WRITER	0x0008		/* qwriter(OUTER) pending or running */
542 #define	SQ_MESSAGES	0x0010		/* messages on syncq */
543 #define	SQ_WANTWAKEUP	0x0020		/* do cv_broadcast on sq_wait */
544 #define	SQ_WANTEXWAKEUP	0x0040		/* do cv_broadcast on sq_exitwait */
545 #define	SQ_EVENTS	0x0080		/* Events pending */
546 #define	SQ_QUEUED	(SQ_MESSAGES | SQ_EVENTS)
547 #define	SQ_FLAGMASK	0x00FF
548 
549 /*
550  * Test a queue to see if inner perimeter is exclusive.
551  */
552 #define	PERIM_EXCL(q)	((q)->q_syncq->sq_flags & SQ_EXCL)
553 
554 /*
555  * If any of these flags are set it is not possible for a thread to
556  * enter a put or service procedure. Instead it must either block
557  * or put the message on the syncq.
558  */
559 #define	SQ_GOAWAY	(SQ_EXCL|SQ_BLOCKED|SQ_FROZEN|SQ_WRITER|\
560 			SQ_QUEUED)
561 /*
562  * If any of these flags are set it not possible to drain the syncq
563  */
564 #define	SQ_STAYAWAY	(SQ_BLOCKED|SQ_FROZEN|SQ_WRITER)
565 
566 /*
567  * Flags to trigger syncq tail processing.
568  */
569 #define	SQ_TAIL		(SQ_QUEUED|SQ_WANTWAKEUP|SQ_WANTEXWAKEUP)
570 
571 /*
572  * Syncq types (stored in sq_type)
573  * The SQ_TYPES_IN_FLAGS (ciput) are also stored in sq_flags
574  * for performance reasons. Thus these type values have to be in the low
575  * 16 bits and not conflict with the sq_flags values above.
576  *
577  * Notes:
578  *  - putnext() and put() assume that the put procedures have the highest
579  *    degree of concurrency. Thus if any of the SQ_CI* are set then SQ_CIPUT
580  *    has to be set. This restriction can be lifted by adding code to putnext
581  *    and put that check that sq_count == 0 like entersq does.
582  *  - putnext() and put() does currently not handle !SQ_COPUT
583  *  - In order to implement !SQ_COCB outer_enter has to be fixed so that
584  *    the callback can be cancelled while cv_waiting in outer_enter.
585  *  - If SQ_CISVC needs to be implemented, qprocsoff() needs to wait
586  *    for the currently running services to stop (wait for QINSERVICE
587  *    to go off). disable_svc called from qprcosoff disables only
588  *    services that will be run in future.
589  *
590  * All the SQ_CO flags are set when there is no outer perimeter.
591  */
592 #define	SQ_CIPUT	0x0100		/* Concurrent inner put proc */
593 #define	SQ_CISVC	0x0200		/* Concurrent inner svc proc */
594 #define	SQ_CIOC		0x0400		/* Concurrent inner open/close */
595 #define	SQ_CICB		0x0800		/* Concurrent inner callback */
596 #define	SQ_COPUT	0x1000		/* Concurrent outer put proc */
597 #define	SQ_COSVC	0x2000		/* Concurrent outer svc proc */
598 #define	SQ_COOC		0x4000		/* Concurrent outer open/close */
599 #define	SQ_COCB		0x8000		/* Concurrent outer callback */
600 
601 /* Types also kept in sq_flags for performance */
602 #define	SQ_TYPES_IN_FLAGS	(SQ_CIPUT)
603 
604 #define	SQ_CI		(SQ_CIPUT|SQ_CISVC|SQ_CIOC|SQ_CICB)
605 #define	SQ_CO		(SQ_COPUT|SQ_COSVC|SQ_COOC|SQ_COCB)
606 #define	SQ_TYPEMASK	(SQ_CI|SQ_CO)
607 
608 /*
609  * Flag combinations passed to entersq and leavesq to specify the type
610  * of entry point.
611  */
612 #define	SQ_PUT		(SQ_CIPUT|SQ_COPUT)
613 #define	SQ_SVC		(SQ_CISVC|SQ_COSVC)
614 #define	SQ_OPENCLOSE	(SQ_CIOC|SQ_COOC)
615 #define	SQ_CALLBACK	(SQ_CICB|SQ_COCB)
616 
617 /*
618  * Other syncq types which are not copied into flags.
619  */
620 #define	SQ_PERMOD	0x01		/* Syncq is PERMOD */
621 
622 /*
623  * Asynchronous callback qun*** flag.
624  * The mechanism these flags are used in is one where callbacks enter
625  * the perimeter thanks to framework support. To use this mechanism
626  * the q* and qun* flavors of the callback routines must be used.
627  * e.g. qtimeout and quntimeout. The synchronization provided by the flags
628  * avoids deadlocks between blocking qun* routines and the perimeter
629  * lock.
630  */
631 #define	SQ_CALLB_BYPASSED	0x01		/* bypassed callback fn */
632 
633 /*
634  * Cancel callback mask.
635  * The mask expands as the number of cancelable callback types grows
636  * Note - separate callback flag because different callbacks have
637  * overlapping id space.
638  */
639 #define	SQ_CALLB_CANCEL_MASK	(SQ_CANCEL_TOUT|SQ_CANCEL_BUFCALL)
640 
641 #define	SQ_CANCEL_TOUT		0x02		/* cancel timeout request */
642 #define	SQ_CANCEL_BUFCALL	0x04		/* cancel bufcall request */
643 
644 typedef struct callbparams {
645 	syncq_t		*cbp_sq;
646 	void		(*cbp_func)(void *);
647 	void		*cbp_arg;
648 	callbparams_id_t cbp_id;
649 	uint_t		cbp_flags;
650 	struct callbparams *cbp_next;
651 	size_t		cbp_size;
652 } callbparams_t;
653 
654 typedef struct strbufcall {
655 	void		(*bc_func)(void *);
656 	void		*bc_arg;
657 	size_t		bc_size;
658 	bufcall_id_t	bc_id;
659 	struct strbufcall *bc_next;
660 	kthread_id_t	bc_executor;
661 } strbufcall_t;
662 
663 /*
664  * Structure of list of processes to be sent SIGPOLL/SIGURG signal
665  * on request.  The valid S_* events are defined in stropts.h.
666  */
667 typedef struct strsig {
668 	struct pid	*ss_pidp;	/* pid/pgrp pointer */
669 	pid_t		ss_pid;		/* positive pid, negative pgrp */
670 	int		ss_events;	/* S_* events */
671 	struct strsig	*ss_next;
672 } strsig_t;
673 
674 /*
675  * bufcall list
676  */
677 struct bclist {
678 	strbufcall_t	*bc_head;
679 	strbufcall_t	*bc_tail;
680 };
681 
682 /*
683  * Structure used to track mux links and unlinks.
684  */
685 struct mux_node {
686 	major_t		 mn_imaj;	/* internal major device number */
687 	uint16_t	 mn_indegree;	/* number of incoming edges */
688 	struct mux_node *mn_originp;	/* where we came from during search */
689 	struct mux_edge *mn_startp;	/* where search left off in mn_outp */
690 	struct mux_edge *mn_outp;	/* list of outgoing edges */
691 	uint_t		 mn_flags;	/* see below */
692 };
693 
694 /*
695  * Flags for mux_nodes.
696  */
697 #define	VISITED	1
698 
699 /*
700  * Edge structure - a list of these is hung off the
701  * mux_node to represent the outgoing edges.
702  */
703 struct mux_edge {
704 	struct mux_node	*me_nodep;	/* edge leads to this node */
705 	struct mux_edge	*me_nextp;	/* next edge */
706 	int		 me_muxid;	/* id of link */
707 	dev_t		 me_dev;	/* dev_t - used for kernel PUNLINK */
708 };
709 
710 /*
711  * Queue info
712  *
713  * The syncq is included here to reduce memory fragmentation
714  * for kernel memory allocators that only allocate in sizes that are
715  * powers of two. If the kernel memory allocator changes this should
716  * be revisited.
717  */
718 typedef struct queinfo {
719 	struct queue	qu_rqueue;	/* read queue - must be first */
720 	struct queue	qu_wqueue;	/* write queue - must be second */
721 	struct syncq	qu_syncq;	/* syncq - must be third */
722 } queinfo_t;
723 
724 /*
725  * Multiplexed streams info
726  */
727 typedef struct linkinfo {
728 	struct linkblk	li_lblk;	/* must be first */
729 	struct file	*li_fpdown;	/* file pointer for lower stream */
730 	struct linkinfo	*li_next;	/* next in list */
731 	struct linkinfo *li_prev;	/* previous in list */
732 } linkinfo_t;
733 
734 /*
735  * List of syncq's used by freeezestr/unfreezestr
736  */
737 typedef struct syncql {
738 	struct syncql	*sql_next;
739 	syncq_t		*sql_sq;
740 } syncql_t;
741 
742 typedef struct sqlist {
743 	syncql_t	*sqlist_head;
744 	size_t		sqlist_size;		/* structure size in bytes */
745 	size_t		sqlist_index;		/* next free entry in array */
746 	syncql_t	sqlist_array[4];	/* 4 or more entries */
747 } sqlist_t;
748 
749 typedef struct perdm {
750 	struct perdm		*dm_next;
751 	syncq_t			*dm_sq;
752 	struct streamtab	*dm_str;
753 	uint_t			dm_ref;
754 } perdm_t;
755 
756 #define	NEED_DM(dmp, qflag) \
757 	(dmp == NULL && (qflag & (QPERMOD | QMTOUTPERIM)))
758 
759 /*
760  * fmodsw_impl_t is used within the kernel. fmodsw is used by
761  * the modules/drivers. The information is copied from fmodsw
762  * defined in the module/driver into the fmodsw_impl_t structure
763  * during the module/driver initialization.
764  */
765 typedef struct fmodsw_impl	fmodsw_impl_t;
766 
767 struct fmodsw_impl {
768 	fmodsw_impl_t		*f_next;
769 	char			f_name[FMNAMESZ + 1];
770 	struct streamtab	*f_str;
771 	uint32_t		f_qflag;
772 	uint32_t		f_sqtype;
773 	perdm_t			*f_dmp;
774 	uint32_t		f_ref;
775 	uint32_t		f_hits;
776 };
777 
778 typedef enum {
779 	FMODSW_HOLD =	0x00000001,
780 	FMODSW_LOAD =	0x00000002
781 } fmodsw_flags_t;
782 
783 typedef struct cdevsw_impl {
784 	struct streamtab	*d_str;
785 	uint32_t		d_qflag;
786 	uint32_t		d_sqtype;
787 	perdm_t			*d_dmp;
788 } cdevsw_impl_t;
789 
790 /*
791  * Enumeration of the types of access that can be requested for a
792  * controlling terminal under job control.
793  */
794 enum jcaccess {
795 	JCREAD,			/* read data on a ctty */
796 	JCWRITE,		/* write data to a ctty */
797 	JCSETP,			/* set ctty parameters */
798 	JCGETP			/* get ctty parameters */
799 };
800 
801 struct str_stack {
802 	netstack_t	*ss_netstack;	/* Common netstack */
803 
804 	kmutex_t	ss_sad_lock;	/* autopush lock */
805 	mod_hash_t	*ss_sad_hash;
806 	size_t		ss_sad_hash_nchains;
807 	struct saddev	*ss_saddev;	/* sad device array */
808 	int		ss_sadcnt;	/* number of sad devices */
809 
810 	int		ss_devcnt;	/* number of mux_nodes */
811 	struct mux_node	*ss_mux_nodes;	/* mux info for cycle checking */
812 };
813 typedef struct str_stack str_stack_t;
814 
815 /*
816  * Finding related queues
817  */
818 #define	STREAM(q)	((q)->q_stream)
819 #define	SQ(rq)		((syncq_t *)((rq) + 2))
820 
821 /*
822  * Get the module/driver name for a queue.  Since some queues don't have
823  * q_info structures (e.g., see log_makeq()), fall back to "?".
824  */
825 #define	Q2NAME(q) \
826 	(((q)->q_qinfo != NULL && (q)->q_qinfo->qi_minfo->mi_idname != NULL) ? \
827 	(q)->q_qinfo->qi_minfo->mi_idname : "?")
828 
829 /*
830  * Locking macros
831  */
832 #define	QLOCK(q)	(&(q)->q_lock)
833 #define	SQLOCK(sq)	(&(sq)->sq_lock)
834 
835 #define	STREAM_PUTLOCKS_ENTER(stp) {					       \
836 		ASSERT(MUTEX_HELD(&(stp)->sd_lock));			       \
837 		if ((stp)->sd_ciputctrl != NULL) {			       \
838 			int i;						       \
839 			int nlocks = (stp)->sd_nciputctrl;		       \
840 			ciputctrl_t *cip = (stp)->sd_ciputctrl;		       \
841 			for (i = 0; i <= nlocks; i++) {			       \
842 				mutex_enter(&cip[i].ciputctrl_lock);	       \
843 			}						       \
844 		}							       \
845 	}
846 
847 #define	STREAM_PUTLOCKS_EXIT(stp) {					       \
848 		ASSERT(MUTEX_HELD(&(stp)->sd_lock));			       \
849 		if ((stp)->sd_ciputctrl != NULL) {			       \
850 			int i;						       \
851 			int nlocks = (stp)->sd_nciputctrl;		       \
852 			ciputctrl_t *cip = (stp)->sd_ciputctrl;		       \
853 			for (i = 0; i <= nlocks; i++) {			       \
854 				mutex_exit(&cip[i].ciputctrl_lock);	       \
855 			}						       \
856 		}							       \
857 	}
858 
859 #define	SQ_PUTLOCKS_ENTER(sq) {						       \
860 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
861 		if ((sq)->sq_ciputctrl != NULL) {			       \
862 			int i;						       \
863 			int nlocks = (sq)->sq_nciputctrl;		       \
864 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
865 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
866 			for (i = 0; i <= nlocks; i++) {			       \
867 				mutex_enter(&cip[i].ciputctrl_lock);	       \
868 			}						       \
869 		}							       \
870 	}
871 
872 #define	SQ_PUTLOCKS_EXIT(sq) {						       \
873 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
874 		if ((sq)->sq_ciputctrl != NULL) {			       \
875 			int i;						       \
876 			int nlocks = (sq)->sq_nciputctrl;		       \
877 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
878 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
879 			for (i = 0; i <= nlocks; i++) {			       \
880 				mutex_exit(&cip[i].ciputctrl_lock);	       \
881 			}						       \
882 		}							       \
883 	}
884 
885 #define	SQ_PUTCOUNT_SETFAST(sq) {					\
886 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				\
887 		if ((sq)->sq_ciputctrl != NULL) {			\
888 			int i;						\
889 			int nlocks = (sq)->sq_nciputctrl;		\
890 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		\
891 			ASSERT((sq)->sq_type & SQ_CIPUT);		\
892 			for (i = 0; i <= nlocks; i++) {			\
893 				mutex_enter(&cip[i].ciputctrl_lock);	\
894 				cip[i].ciputctrl_count |= SQ_FASTPUT;	\
895 				mutex_exit(&cip[i].ciputctrl_lock);	\
896 			}						\
897 		}							\
898 	}
899 
900 #define	SQ_PUTCOUNT_CLRFAST(sq) {					\
901 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				\
902 		if ((sq)->sq_ciputctrl != NULL) {			\
903 			int i;						\
904 			int nlocks = (sq)->sq_nciputctrl;		\
905 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		\
906 			ASSERT((sq)->sq_type & SQ_CIPUT);		\
907 			for (i = 0; i <= nlocks; i++) {			\
908 				mutex_enter(&cip[i].ciputctrl_lock);	\
909 				cip[i].ciputctrl_count &= ~SQ_FASTPUT;	\
910 				mutex_exit(&cip[i].ciputctrl_lock);	\
911 			}						\
912 		}							\
913 	}
914 
915 
916 #ifdef	DEBUG
917 
918 #define	SQ_PUTLOCKS_HELD(sq) {						       \
919 		ASSERT(MUTEX_HELD(SQLOCK(sq)));				       \
920 		if ((sq)->sq_ciputctrl != NULL) {			       \
921 			int i;						       \
922 			int nlocks = (sq)->sq_nciputctrl;		       \
923 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
924 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
925 			for (i = 0; i <= nlocks; i++) {			       \
926 				ASSERT(MUTEX_HELD(&cip[i].ciputctrl_lock));    \
927 			}						       \
928 		}							       \
929 	}
930 
931 #define	SUMCHECK_SQ_PUTCOUNTS(sq, countcheck) {				       \
932 		if ((sq)->sq_ciputctrl != NULL) {			       \
933 			int i;						       \
934 			uint_t count = 0;				       \
935 			int ncounts = (sq)->sq_nciputctrl;		       \
936 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
937 			for (i = 0; i <= ncounts; i++) {		       \
938 				count +=				       \
939 				    (((sq)->sq_ciputctrl[i].ciputctrl_count) & \
940 				    SQ_FASTMASK);			       \
941 			}						       \
942 			ASSERT(count == (countcheck));			       \
943 		}							       \
944 	}
945 
946 #define	SUMCHECK_CIPUTCTRL_COUNTS(ciput, nciput, countcheck) {		       \
947 		int i;							       \
948 		uint_t count = 0;					       \
949 		ASSERT((ciput) != NULL);				       \
950 		for (i = 0; i <= (nciput); i++) {			       \
951 			count += (((ciput)[i].ciputctrl_count) &	       \
952 			    SQ_FASTMASK);				       \
953 		}							       \
954 		ASSERT(count == (countcheck));				       \
955 	}
956 
957 #else	/* DEBUG */
958 
959 #define	SQ_PUTLOCKS_HELD(sq)
960 #define	SUMCHECK_SQ_PUTCOUNTS(sq, countcheck)
961 #define	SUMCHECK_CIPUTCTRL_COUNTS(sq, nciput, countcheck)
962 
963 #endif	/* DEBUG */
964 
965 #define	SUM_SQ_PUTCOUNTS(sq, count) {					       \
966 		if ((sq)->sq_ciputctrl != NULL) {			       \
967 			int i;						       \
968 			int ncounts = (sq)->sq_nciputctrl;		       \
969 			ciputctrl_t *cip = (sq)->sq_ciputctrl;		       \
970 			ASSERT((sq)->sq_type & SQ_CIPUT);		       \
971 			for (i = 0; i <= ncounts; i++) {		       \
972 				(count) += ((cip[i].ciputctrl_count) &	       \
973 				    SQ_FASTMASK);			       \
974 			}						       \
975 		}							       \
976 	}
977 
978 #define	CLAIM_QNEXT_LOCK(stp)	mutex_enter(&(stp)->sd_lock)
979 #define	RELEASE_QNEXT_LOCK(stp)	mutex_exit(&(stp)->sd_lock)
980 
981 /*
982  * syncq message manipulation macros.
983  */
984 /*
985  * Put a message on the queue syncq.
986  * Assumes QLOCK held.
987  */
988 #define	SQPUT_MP(qp, mp)						\
989 	{								\
990 		qp->q_syncqmsgs++;					\
991 		if (qp->q_sqhead == NULL) {				\
992 			qp->q_sqhead = qp->q_sqtail = mp;		\
993 		} else {						\
994 			qp->q_sqtail->b_next = mp;			\
995 			qp->q_sqtail = mp;				\
996 		}							\
997 		set_qfull(qp);						\
998 	}
999 
1000 /*
1001  * Miscellaneous parameters and flags.
1002  */
1003 
1004 /*
1005  * Default timeout in milliseconds for ioctls and close
1006  */
1007 #define	STRTIMOUT 15000
1008 
1009 /*
1010  * Flag values for stream io
1011  */
1012 #define	WRITEWAIT	0x1	/* waiting for write event */
1013 #define	READWAIT	0x2	/* waiting for read event */
1014 #define	NOINTR		0x4	/* error is not to be set for signal */
1015 #define	GETWAIT		0x8	/* waiting for getmsg event */
1016 
1017 /*
1018  * These flags need to be unique for stream io name space
1019  * and copy modes name space.  These flags allow strwaitq
1020  * and strdoioctl to proceed as if signals or errors on the stream
1021  * head have not occurred; i.e. they will be detected by some other
1022  * means.
1023  * STR_NOSIG does not allow signals to interrupt the call
1024  * STR_NOERROR does not allow stream head read, write or hup errors to
1025  * affect the call.  When used with strdoioctl(), if a previous ioctl
1026  * is pending and times out, STR_NOERROR will cause strdoioctl() to not
1027  * return ETIME. If, however, the requested ioctl times out, ETIME
1028  * will be returned (use ic_timout instead)
1029  * STR_PEEK is used to inform strwaitq that the reader is peeking at data
1030  * and that a non-persistent error should not be cleared.
1031  * STR_DELAYERR is used to inform strwaitq that it should not check errors
1032  * after being awoken since, in addition to an error, there might also be
1033  * data queued on the stream head read queue.
1034  */
1035 #define	STR_NOSIG	0x10	/* Ignore signals during strdoioctl/strwaitq */
1036 #define	STR_NOERROR	0x20	/* Ignore errors during strdoioctl/strwaitq */
1037 #define	STR_PEEK	0x40	/* Peeking behavior on non-persistent errors */
1038 #define	STR_DELAYERR	0x80	/* Do not check errors on return */
1039 
1040 /*
1041  * Copy modes for tty and I_STR ioctls
1042  */
1043 #define	U_TO_K	01			/* User to Kernel */
1044 #define	K_TO_K  02			/* Kernel to Kernel */
1045 
1046 /*
1047  * Mux defines.
1048  */
1049 #define	LINKNORMAL	0x01		/* normal mux link */
1050 #define	LINKPERSIST	0x02		/* persistent mux link */
1051 #define	LINKTYPEMASK	0x03		/* bitmask of all link types */
1052 #define	LINKCLOSE	0x04		/* unlink from strclose */
1053 
1054 /*
1055  * Definitions of Streams macros and function interfaces.
1056  */
1057 
1058 /*
1059  * Obsolete queue scheduling macros. They are not used anymore, but still kept
1060  * here for 3-d party modules and drivers who might still use them.
1061  */
1062 #define	setqsched()
1063 #define	qready()	1
1064 
1065 #ifdef _KERNEL
1066 #define	runqueues()
1067 #define	queuerun()
1068 #endif
1069 
1070 /* compatibility module for style 2 drivers with DR race condition */
1071 #define	DRMODNAME	"drcompat"
1072 
1073 /*
1074  * Macros dealing with mux_nodes.
1075  */
1076 #define	MUX_VISIT(X)	((X)->mn_flags |= VISITED)
1077 #define	MUX_CLEAR(X)	((X)->mn_flags &= (~VISITED)); \
1078 			((X)->mn_originp = NULL)
1079 #define	MUX_DIDVISIT(X)	((X)->mn_flags & VISITED)
1080 
1081 
1082 /*
1083  * Twisted stream macros
1084  */
1085 #define	STRMATED(X)	((X)->sd_flag & STRMATE)
1086 #define	STRLOCKMATES(X)	if (&((X)->sd_lock) > &(((X)->sd_mate)->sd_lock)) { \
1087 				mutex_enter(&((X)->sd_lock)); \
1088 				mutex_enter(&(((X)->sd_mate)->sd_lock));  \
1089 			} else {  \
1090 				mutex_enter(&(((X)->sd_mate)->sd_lock)); \
1091 				mutex_enter(&((X)->sd_lock)); \
1092 			}
1093 #define	STRUNLOCKMATES(X)	mutex_exit(&((X)->sd_lock)); \
1094 			mutex_exit(&(((X)->sd_mate)->sd_lock))
1095 
1096 #if defined(_KERNEL) || defined(_FAKE_KERNEL)
1097 
1098 extern void strinit(void);
1099 extern int strdoioctl(struct stdata *, struct strioctl *, int, int,
1100     cred_t *, int *);
1101 extern void strsendsig(struct strsig *, int, uchar_t, int);
1102 extern void str_sendsig(vnode_t *, int, uchar_t, int);
1103 extern void strhup(struct stdata *);
1104 extern int qattach(queue_t *, dev_t *, int, cred_t *, fmodsw_impl_t *,
1105     boolean_t);
1106 extern int qreopen(queue_t *, dev_t *, int, cred_t *);
1107 extern void qdetach(queue_t *, int, int, cred_t *, boolean_t);
1108 extern void enterq(queue_t *);
1109 extern void leaveq(queue_t *);
1110 extern int putiocd(mblk_t *, caddr_t, int, cred_t *);
1111 extern int getiocd(mblk_t *, caddr_t, int);
1112 extern struct linkinfo *alloclink(queue_t *, queue_t *, struct file *);
1113 extern void lbfree(struct linkinfo *);
1114 extern int linkcycle(stdata_t *, stdata_t *, str_stack_t *);
1115 extern struct linkinfo *findlinks(stdata_t *, int, int, str_stack_t *);
1116 extern queue_t *getendq(queue_t *);
1117 extern int mlink(vnode_t *, int, int, cred_t *, int *, int);
1118 extern int mlink_file(vnode_t *, int, struct file *, cred_t *, int *, int);
1119 extern int munlink(struct stdata *, struct linkinfo *, int, cred_t *, int *,
1120     str_stack_t *);
1121 extern int munlinkall(struct stdata *, int, cred_t *, int *, str_stack_t *);
1122 extern void mux_addedge(stdata_t *, stdata_t *, int, str_stack_t *);
1123 extern void mux_rmvedge(stdata_t *, int, str_stack_t *);
1124 extern int devflg_to_qflag(struct streamtab *, uint32_t, uint32_t *,
1125     uint32_t *);
1126 extern void setq(queue_t *, struct qinit *, struct qinit *, perdm_t *,
1127     uint32_t, uint32_t, boolean_t);
1128 extern perdm_t *hold_dm(struct streamtab *, uint32_t, uint32_t);
1129 extern void rele_dm(perdm_t *);
1130 extern int strmakectl(struct strbuf *, int32_t, int32_t, mblk_t **);
1131 extern int strmakedata(ssize_t *, struct uio *, stdata_t *, int32_t, mblk_t **);
1132 extern int strmakemsg(struct strbuf *, ssize_t *, struct uio *,
1133     struct stdata *, int32_t, mblk_t **);
1134 extern int strgetmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t *,
1135     int *, int, rval_t *);
1136 extern int strputmsg(vnode_t *, struct strbuf *, struct strbuf *, uchar_t,
1137     int flag, int fmode);
1138 extern int strstartplumb(struct stdata *, int, int);
1139 extern void strendplumb(struct stdata *);
1140 extern int stropen(struct vnode *, dev_t *, int, cred_t *);
1141 extern int strclose(struct vnode *, int, cred_t *);
1142 extern int strpoll(register struct stdata *, short, int, short *,
1143     struct pollhead **);
1144 extern void strclean(struct vnode *);
1145 extern void str_cn_clean();	/* XXX hook for consoles signal cleanup */
1146 extern int strwrite(struct vnode *, struct uio *, cred_t *);
1147 extern int strwrite_common(struct vnode *, struct uio *, cred_t *, int);
1148 extern int strread(struct vnode *, struct uio *, cred_t *);
1149 extern int strioctl(struct vnode *, int, intptr_t, int, int, cred_t *, int *);
1150 extern int strrput(queue_t *, mblk_t *);
1151 extern int strrput_nondata(queue_t *, mblk_t *);
1152 extern mblk_t *strrput_proto(vnode_t *, mblk_t *,
1153     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
1154 extern mblk_t *strrput_misc(vnode_t *, mblk_t *,
1155     strwakeup_t *, strsigset_t *, strsigset_t *, strpollset_t *);
1156 extern int getiocseqno(void);
1157 extern int strwaitbuf(size_t, int);
1158 extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *);
1159 extern struct stdata *shalloc(queue_t *);
1160 extern void shfree(struct stdata *s);
1161 extern queue_t *allocq(void);
1162 extern void freeq(queue_t *);
1163 extern qband_t *allocband(void);
1164 extern void freeband(qband_t *);
1165 extern void freebs_enqueue(mblk_t *, dblk_t *);
1166 extern void setqback(queue_t *, unsigned char);
1167 extern int strcopyin(void *, void *, size_t, int);
1168 extern int strcopyout(void *, void *, size_t, int);
1169 extern void strsignal(struct stdata *, int, int32_t);
1170 extern clock_t str_cv_wait(kcondvar_t *, kmutex_t *, clock_t, int);
1171 extern void disable_svc(queue_t *);
1172 extern void enable_svc(queue_t *);
1173 extern void remove_runlist(queue_t *);
1174 extern void wait_svc(queue_t *);
1175 extern void backenable(queue_t *, uchar_t);
1176 extern void set_qend(queue_t *);
1177 extern int strgeterr(stdata_t *, int32_t, int);
1178 extern void qenable_locked(queue_t *);
1179 extern mblk_t *getq_noenab(queue_t *, ssize_t);
1180 extern void rmvq_noenab(queue_t *, mblk_t *);
1181 extern void qbackenable(queue_t *, uchar_t);
1182 extern void set_qfull(queue_t *);
1183 
1184 extern void strblock(queue_t *);
1185 extern void strunblock(queue_t *);
1186 extern int qclaimed(queue_t *);
1187 extern int straccess(struct stdata *, enum jcaccess);
1188 
1189 extern void entersq(syncq_t *, int);
1190 extern void leavesq(syncq_t *, int);
1191 extern void claimq(queue_t *);
1192 extern void releaseq(queue_t *);
1193 extern void claimstr(queue_t *);
1194 extern void releasestr(queue_t *);
1195 extern void removeq(queue_t *);
1196 extern void insertq(struct stdata *, queue_t *);
1197 extern void drain_syncq(syncq_t *);
1198 extern void qfill_syncq(syncq_t *, queue_t *, mblk_t *);
1199 extern void qdrain_syncq(syncq_t *, queue_t *);
1200 extern int flush_syncq(syncq_t *, queue_t *);
1201 extern void wait_sq_svc(syncq_t *);
1202 
1203 extern void outer_enter(syncq_t *, uint16_t);
1204 extern void outer_exit(syncq_t *);
1205 extern void qwriter_inner(queue_t *, mblk_t *, void (*)());
1206 extern void qwriter_outer(queue_t *, mblk_t *, void (*)());
1207 
1208 extern callbparams_t *callbparams_alloc(syncq_t *, void (*)(void *),
1209     void *, int);
1210 extern void callbparams_free(syncq_t *, callbparams_t *);
1211 extern void callbparams_free_id(syncq_t *, callbparams_id_t, int32_t);
1212 extern void qcallbwrapper(void *);
1213 
1214 extern mblk_t *esballoc_wait(unsigned char *, size_t, uint_t, frtn_t *);
1215 extern mblk_t *esballoca(unsigned char *, size_t, uint_t, frtn_t *);
1216 extern mblk_t *desballoca(unsigned char *, size_t, uint_t, frtn_t *);
1217 extern int do_sendfp(struct stdata *, struct file *, struct cred *);
1218 extern int frozenstr(queue_t *);
1219 extern size_t xmsgsize(mblk_t *);
1220 
1221 extern void putnext_tail(syncq_t *, queue_t *, uint32_t);
1222 extern void stream_willservice(stdata_t *);
1223 extern void stream_runservice(stdata_t *);
1224 
1225 extern void strmate(vnode_t *, vnode_t *);
1226 extern queue_t *strvp2wq(vnode_t *);
1227 extern vnode_t *strq2vp(queue_t *);
1228 extern mblk_t *allocb_wait(size_t, uint_t, uint_t, int *);
1229 extern mblk_t *allocb_cred(size_t, cred_t *, pid_t);
1230 extern mblk_t *allocb_cred_wait(size_t, uint_t, int *, cred_t *, pid_t);
1231 extern mblk_t *allocb_tmpl(size_t, const mblk_t *);
1232 extern mblk_t *allocb_tryhard(size_t);
1233 extern void mblk_copycred(mblk_t *, const mblk_t *);
1234 extern void mblk_setcred(mblk_t *, cred_t *, pid_t);
1235 extern cred_t *msg_getcred(const mblk_t *, pid_t *);
1236 extern struct ts_label_s *msg_getlabel(const mblk_t *);
1237 extern cred_t *msg_extractcred(mblk_t *, pid_t *);
1238 extern void strpollwakeup(vnode_t *, short);
1239 extern int putnextctl_wait(queue_t *, int);
1240 
1241 extern int kstrputmsg(struct vnode *, mblk_t *, struct uio *, ssize_t,
1242     unsigned char, int, int);
1243 extern int kstrgetmsg(struct vnode *, mblk_t **, struct uio *,
1244     unsigned char *, int *, clock_t, rval_t *);
1245 
1246 extern void strsetrerror(vnode_t *, int, int, errfunc_t);
1247 extern void strsetwerror(vnode_t *, int, int, errfunc_t);
1248 extern void strseteof(vnode_t *, int);
1249 extern void strflushrq(vnode_t *, int);
1250 extern void strsetrputhooks(vnode_t *, uint_t, msgfunc_t, msgfunc_t);
1251 extern void strsetwputhooks(vnode_t *, uint_t, clock_t);
1252 extern void strsetrwputdatahooks(vnode_t *, msgfunc_t, msgfunc_t);
1253 extern int strwaitmark(vnode_t *);
1254 extern void strsignal_nolock(stdata_t *, int, uchar_t);
1255 
1256 struct multidata_s;
1257 struct pdesc_s;
1258 
1259 /*
1260  * Now that NIC drivers are expected to deal only with M_DATA mblks, the
1261  * hcksum_assoc and hcksum_retrieve functions are deprecated in favor of their
1262  * respective mac_hcksum_set and mac_hcksum_get counterparts.
1263  */
1264 extern int hcksum_assoc(mblk_t *, struct multidata_s *, struct pdesc_s  *,
1265     uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, int);
1266 extern void hcksum_retrieve(mblk_t *, struct multidata_s *, struct pdesc_s *,
1267     uint32_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *);
1268 
1269 extern void lso_info_set(mblk_t *, uint32_t, uint32_t);
1270 extern void lso_info_cleanup(mblk_t *);
1271 extern unsigned int bcksum(uchar_t *, int, unsigned int);
1272 extern boolean_t is_vmloaned_mblk(mblk_t *, struct multidata_s *,
1273     struct pdesc_s *);
1274 
1275 extern int fmodsw_register(const char *, struct streamtab *, int);
1276 extern int fmodsw_unregister(const char *);
1277 extern fmodsw_impl_t *fmodsw_find(const char *, fmodsw_flags_t);
1278 extern void fmodsw_rele(fmodsw_impl_t *);
1279 
1280 extern void freemsgchain(mblk_t *);
1281 extern mblk_t *copymsgchain(mblk_t *);
1282 
1283 extern mblk_t *mcopyinuio(struct stdata *, uio_t *, ssize_t, ssize_t, int *);
1284 
1285 /*
1286  * shared or externally configured data structures
1287  */
1288 extern ssize_t strmsgsz;		/* maximum stream message size */
1289 extern ssize_t strctlsz;		/* maximum size of ctl message */
1290 extern int nstrpush;			/* maximum number of pushes allowed */
1291 
1292 /*
1293  * Bufcalls related variables.
1294  */
1295 extern struct bclist strbcalls;		/* List of bufcalls */
1296 extern kmutex_t	strbcall_lock;		/* Protects the list of bufcalls */
1297 extern kcondvar_t strbcall_cv;		/* Signaling when a bufcall is added */
1298 extern kcondvar_t bcall_cv;	/* wait of executing bufcall completes */
1299 
1300 extern frtn_t frnop;
1301 
1302 extern struct kmem_cache *ciputctrl_cache;
1303 extern int n_ciputctrl;
1304 extern int max_n_ciputctrl;
1305 extern int min_n_ciputctrl;
1306 
1307 extern cdevsw_impl_t *devimpl;
1308 
1309 /*
1310  * esballoc queue for throttling
1311  */
1312 typedef struct esb_queue {
1313 	kmutex_t	eq_lock;
1314 	uint_t		eq_len;		/* number of queued messages */
1315 	mblk_t		*eq_head;	/* head of queue */
1316 	mblk_t		*eq_tail;	/* tail of queue */
1317 	uint_t		eq_flags;	/* esballoc queue flags */
1318 } esb_queue_t;
1319 
1320 /*
1321  * esballoc flags for queue processing.
1322  */
1323 #define	ESBQ_PROCESSING	0x01	/* queue is being processed */
1324 #define	ESBQ_TIMER	0x02	/* timer is active */
1325 
1326 extern void esballoc_queue_init(void);
1327 
1328 #endif	/* _KERNEL */
1329 
1330 /*
1331  * Note: Use of these macros are restricted to kernel/unix and
1332  * intended for the STREAMS framework.
1333  * All modules/drivers should include sys/ddi.h.
1334  *
1335  * Finding related queues
1336  */
1337 #define		_OTHERQ(q)	((q)->q_flag&QREADR? (q)+1: (q)-1)
1338 #define		_WR(q)		((q)->q_flag&QREADR? (q)+1: (q))
1339 #define		_RD(q)		((q)->q_flag&QREADR? (q): (q)-1)
1340 #define		_SAMESTR(q)	(!((q)->q_flag & QEND))
1341 
1342 /*
1343  * These are also declared here for modules/drivers that erroneously
1344  * include strsubr.h after ddi.h or fail to include ddi.h at all.
1345  */
1346 extern struct queue *OTHERQ(queue_t *); /* stream.h */
1347 extern struct queue *RD(queue_t *);
1348 extern struct queue *WR(queue_t *);
1349 extern int SAMESTR(queue_t *);
1350 
1351 /*
1352  * The following hardware checksum related macros are private
1353  * interfaces that are subject to change without notice.
1354  */
1355 #ifdef _KERNEL
1356 #define	DB_CKSUMSTART(mp)	((mp)->b_datap->db_cksumstart)
1357 #define	DB_CKSUMEND(mp)		((mp)->b_datap->db_cksumend)
1358 #define	DB_CKSUMSTUFF(mp)	((mp)->b_datap->db_cksumstuff)
1359 #define	DB_CKSUMFLAGS(mp)	((mp)->b_datap->db_struioun.cksum.flags)
1360 #define	DB_CKSUM16(mp)		((mp)->b_datap->db_cksum16)
1361 #define	DB_CKSUM32(mp)		((mp)->b_datap->db_cksum32)
1362 #define	DB_LSOFLAGS(mp)		((mp)->b_datap->db_struioun.cksum.flags)
1363 #define	DB_LSOMSS(mp)		((mp)->b_datap->db_struioun.cksum.pad)
1364 #endif	/* _KERNEL */
1365 
1366 #ifdef	__cplusplus
1367 }
1368 #endif
1369 
1370 
1371 #endif	/* _SYS_STRSUBR_H */
1372