xref: /titanic_50/usr/src/uts/common/inet/nca/nca.h (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_NCA_H
28 #define	_INET_NCA_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 #include <sys/thread.h>
37 #include <sys/door.h>
38 #include <sys/disp.h>
39 #include <sys/systm.h>
40 #include <sys/processor.h>
41 #include <sys/socket.h>
42 #include <inet/common.h>
43 #include <inet/ip.h>
44 #include <inet/tcp.h>
45 #include <inet/nca/ncadoorhdr.h>
46 
47 /*
48  * The NCA debugging facilities provided via ADB and MDB depend on a
49  * number of NCA implementation details.  In particular, note that:
50  *
51  *	* ADB macros *must* be revised whenever members are added or
52  *	  removed from the following structures:
53  *
54  *		nca_conn_t connf_t nca_cpu_t dcb_t hcb_t nca_if_t nca_io2_t
55  *		node_t nodef_t sqfan_t nca_squeue_t tb_t te_t ti_t tw_t
56  *
57  *	* ADB macros should be added when new core data structures are
58  *	  added to NCA.  Generally, if you had to put it in here, you
59  *	  need to write a macro for it.
60  *
61  *	* MDB has many dependencies on the way core data structures
62  *	  are connected.  In general, if you break these dependencies,
63  *	  the MDB NCA module will fail to build.  However, breakage
64  *	  may go undetected (for instance, changing a linked list
65  *	  into a circularly linked list).  If you have any doubts,
66  *	  inspect the NCA module source before committing your changes.
67  *
68  *	* MDB depends on the following variables (and their current
69  *	  semantics) in order to function correctly:
70  *
71  *		nca_conn_fanout nca_conn_fanout_size nca_gv nca_lru
72  *		urihash filehash
73  *
74  *	  If you change the names or *semantics* of these variables,
75  *	  you must modify the MDB module accordingly.
76  *
77  *	  In addition, you should consider whether the changes you've
78  *	  made should be reflected in the MDB dcmds themselves.
79  */
80 
81 /* The queue to make upcall on for NCAfs */
82 extern queue_t *ncaupcallq;
83 extern kmutex_t ncaupcallq_lock;
84 
85 extern int nca_logging_on;
86 extern int nca_conn_fanout_size;
87 extern boolean_t nca_deferred_oq_if;
88 extern boolean_t nca_fanout_iq_if;
89 
90 /* Checksum pointer for no checksum */
91 
92 #define	NO_CKSUM (void *)-1
93 
94 /* undef any tcp.h:tcp_t members overloaded by the Solaris 8 tcp.h */
95 
96 #undef	tcp_last_rcv_lbolt
97 #undef	tcp_state
98 #undef	tcp_rto
99 #undef	tcp_snd_ts_ok
100 #undef	tcp_snd_ws_ok
101 #undef	tcp_snxt
102 #undef	tcp_swnd
103 #undef	tcp_mss
104 #undef	tcp_iss
105 #undef	tcp_rnxt
106 #undef	tcp_rwnd
107 #undef	tcp_lport
108 #undef	tcp_fport
109 #undef	tcp_ports
110 
111 /* the iph_t is no longer defined in ip.h for Solaris 8 ? */
112 
113 /* Unaligned IP header */
114 typedef struct iph_s {
115 	uchar_t	iph_version_and_hdr_length;
116 	uchar_t	iph_type_of_service;
117 	uchar_t	iph_length[2];
118 	uchar_t	iph_ident[2];
119 	uchar_t	iph_fragment_offset_and_flags[2];
120 	uchar_t	iph_ttl;
121 	uchar_t	iph_protocol;
122 	uchar_t	iph_hdr_checksum[2];
123 	uchar_t	iph_src[4];
124 	uchar_t	iph_dst[4];
125 } iph_t;
126 
127 
128 #define	true	B_TRUE			/* used with type boolean_t */
129 #define	false	B_FALSE			/* used with type boolean_t */
130 
131 /*
132  * Power of 2^N Primes useful for hashing for N of 0-28,
133  * these primes are the nearest prime <= 2^N - 2^(N-2).
134  */
135 
136 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
137 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
138 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
139 		50331599, 100663291, 201326557, 0}
140 
141 /*
142  * Serialization queue type (move to strsubr.h (stream.h?) as a general
143  * purpose lightweight mechanism for mblk_t serialization ?).
144  */
145 typedef struct nca_squeue_s {
146 	uint16_t	sq_state;	/* state flags */
147 	uint16_t	sq_count;	/* message count */
148 	uint32_t	sq_type;	/* type flags */
149 	processorid_t	sq_bind;	/* processor to bind to */
150 	ddi_softintr_t	sq_softid;	/* softintr() id */
151 	void		(*sq_init)();	/* initialize function */
152 	void		*sq_init_arg;	/* initialize argument */
153 	void		(*sq_proc)();	/* process function */
154 	mblk_t		*sq_first;	/* first mblk chain or NULL */
155 	mblk_t		*sq_last;	/* last mblk chain or NULL */
156 	clock_t		sq_wait;	/* lbolts to wait after a fill() */
157 	clock_t		sq_iwait;	/* lbolt after nointr() */
158 	clock_t		sq_pwait;	/* lbolt after pause() */
159 	int		sq_isintr;	/* is being or was serviced by */
160 	timeout_id_t	sq_tid;		/* timer id of pending timeout() */
161 	kcondvar_t	sq_async;	/* async thread blocks on */
162 	kmutex_t	sq_lock;	/* lock before using any member */
163 	clock_t		sq_awaken;	/* time async thread was awakened */
164 	void		*sq_priv;	/* user defined private */
165 	kt_did_t	sq_ktid;	/* kernel thread id */
166 } nca_squeue_t;
167 
168 /*
169  * State flags and message count (i.e. properties that change)
170  * Note: The MDB NCA module depends on the values of these flags.
171  */
172 
173 #define	SQS_CNT_TOOMANY	0x8000	/* message count toomany */
174 
175 /* nca_squeue_t state flags now only 16 bits */
176 
177 #define	SQS_PROC	0x0001	/* being processed */
178 #define	SQS_WORKER	0x0002	/* worker thread */
179 #define	SQS_ENTER	0x0004	/* enter thread */
180 #define	SQS_FAST	0x0008	/* enter-fast thread */
181 #define	SQS_PROXY	0x0010	/* proxy thread */
182 #define	SQS_SOFTINTR	0x0020	/* softint thread */
183 				/* 0x00C0 bits not used */
184 
185 #define	SQS_NOINTR	0x0100	/* no interrupt processing */
186 #define	SQS_PAUSE	0x0200	/* paused */
187 #define	SQS_INTRWAIT	0x0400	/* interrupt waiting */
188 #define	SQS_NOPROC	0x0800	/* no processing */
189 				/* 0x7000 bits not used */
190 #define	SQS_EXIT	0x8000	/* worker(s) exit */
191 
192 /*
193  * Type flags (i.e. properties that don't change).
194  * Note: The MDB NCA module depends on the values of these flags.
195  */
196 
197 #define	SQT_BIND_MASK	0xFF000000	/* bind flags mask */
198 
199 #define	SQT_KMEM	0x00000001	/* was kmem_alloc()ed */
200 #define	SQT_DEFERRED	0x00000002	/* deferred processing */
201 #define	SQT_SOFTINTR	0x00000004	/* use softintr() */
202 
203 #define	SQT_BIND_ANY	0x01000000	/* bind worker thread to any CPU */
204 #define	SQT_BIND_TO	0x02000000	/* bind worker thread to speced CPU */
205 
206 #define	SQ_STATE_IS(sqp, flags) ((sqp)->sq_state & (flags))
207 #define	SQ_TYPE_IS(sqp, flags) ((sqp)->sq_type & (flags))
208 
209 
210 typedef struct sqfan_s {
211 	uint32_t	flg;		/* flags only */
212 	uint32_t	cnt;		/* vector count */
213 	uint32_t	ix;		/* next sqv[] to process */
214 	uint32_t	drain;		/* max mblk(s) draind per */
215 	nca_squeue_t	**sqv;	/* pointer to nca_squeue_t pointer vector */
216 } sqfan_t;
217 
218 #define	SQF_DIST_CNT	0x0001	/* sqfan_t dist by queue count */
219 #define	SQF_DIST_IPv4	0x0002	/* sqfan_t dist by IPv4 src addr */
220 
221 /*
222  * A multiphase timer is implemented using the te_t, tb_t, and ti_t structs.
223  *
224  * The multiple phases of timer entry execution are:
225  *
226  * 1) resource, execution is done from resource reclaim when the timer event
227  *    is the freeing of the timed resource.
228  *
229  * 2) process, execution is done from process thread yield (idle/return).
230  *
231  * 3) time, execution is done from a timeout callback thread.
232  *
233  * Each of the phases have a seperate timer fire time represented by the
234  * the ti_t members lbolt1, lbolt2, and lbolt3. Each lbolt is an absolute
235  * lbolt value with lbolt1 <= lbolt2 <= lbolt3.
236  */
237 
238 /*
239  * te_t - timer entry.
240  */
241 
242 typedef struct te_s {
243 	struct te_s *prev;	/* prev te_t */
244 	struct te_s *next;	/* next te_t */
245 	struct tb_s *tbp;	/* pointer to timer bucket */
246 	void	*ep;		/* pointer to encapsulating struct */
247 } te_t;
248 
249 /*
250  * tb_t - timer bucket.
251  */
252 
253 typedef struct tb_s {
254 	struct tb_s *next;	/* next tb_t in ascending time order */
255 	clock_t	exec;		/* te_t lbolt exec value for bucket */
256 	te_t	*head;		/* head of te_t list (first timer) */
257 	te_t	*tail;		/* tail of te_t list (last timer) */
258 } tb_t;
259 
260 /*
261  * ti_t - timer state.
262  */
263 
264 typedef struct ti_s {
265 	clock_t	exec;		/* next te_t exec value (0 = NONE) */
266 	clock_t	lbolt1;		/* phase1 lbolt1 (0 = NONE) */
267 	clock_t	lbolt2;		/* phase2 lbolt2 (0 = NONE) */
268 	clock_t	lbolt3;		/* phase3 lbolt3 (0 = NONE) */
269 	tb_t	*head;		/* head of tb_t list (first timer bucket) */
270 	tb_t	*tail;		/* tail of tb_t list (last timer bucket) */
271 	timeout_id_t tid;	/* timer id of pending timeout() (0 = NONE) */
272 	void	*ep;		/* pointer to encapsulating struct */
273 } ti_t;
274 
275 #define	NCA_TI_INPROC	-1	/* Processing going on */
276 #define	NCA_TI_NONE	0	/* no lbolt */
277 
278 /*
279  * TIME_WAIT grounded doubly linked list of nca_conn_t's awaiting TIME_WAIT
280  * expiration for. This list is used for reclaim, reap, and timer based
281  * processing.
282  *
283  * A multiphase timer is used:
284  *
285  * phase 1) reclaim of connections during connection allocation
286  *
287  * phase 2) reaping of connections during nca_squeue_t inq thread unwind
288  *
289  * phase 3) timeout of connections as a result of a timeout().
290  *
291  * Each of the phases have a seperate timer fire lbolt represented by the
292  * the members lbolt1, lbolt2, and lbolt3, each is an absolute lbolt value
293  * with lbolt1 <= lbolt2 <= lbolt3.
294  */
295 
296 typedef struct tw_s {
297 	clock_t	lbolt1;		/* phase1 lbolt value (0 = NONE) */
298 	clock_t	lbolt2;		/* phase2 lbolt value  */
299 	clock_t	lbolt3;		/* phase3 lbolt value  */
300 	struct conn_s *head;	/* Head of nca_conn_t list */
301 	struct conn_s *tail;	/* Tail of nca_conn_t list */
302 	timeout_id_t tid;	/* Timer id of pending timeout() (0 = NONE) */
303 	void	*ep;		/* pointer to encapsulating struct */
304 } tw_t;
305 
306 #define	NCA_TW_NONE	0	/* no lbolt */
307 
308 #define	NCA_TW_MS	1000
309 
310 #define	NCA_TW_LBOLT MSEC_TO_TICK(NCA_TW_MS)
311 
312 #define	NCA_TW_LBOLTS(twp, future) {					\
313 	clock_t	_lbolt = (future);					\
314 	clock_t	_mod = _lbolt % NCA_TW_LBOLT;				\
315 									\
316 	if (_mod) {							\
317 		/* Roundup to next TIME_WAIT bucket */			\
318 		_lbolt += NCA_TW_LBOLT - _mod;				\
319 	}								\
320 	if ((twp)->lbolt1 != _lbolt) {					\
321 		(twp)->lbolt1 = _lbolt;					\
322 		_lbolt += NCA_TW_LBOLT;					\
323 		(twp)->lbolt2 = _lbolt;					\
324 		_lbolt += NCA_TW_LBOLT;					\
325 		(twp)->lbolt3 = _lbolt;					\
326 		if ((twp)->tid != 0) {					\
327 			(void) untimeout((twp)->tid);			\
328 			(twp)->tid = 0;					\
329 		}							\
330 		if ((_lbolt) != NCA_TW_NONE) {				\
331 			(twp)->tid = timeout((pfv_t)nca_tw_fire, (twp),	\
332 			    (twp)->lbolt3 - lbolt);			\
333 		}							\
334 	}								\
335 }
336 
337 /*
338  * The Node Fanout structure.
339  *
340  * The hash tables and their linkage (hashnext) are protected by the
341  * per-bucket lock. Each node_t inserted in the list points back at
342  * the nodef_t that heads the bucket (hashfanout).
343  */
344 
345 typedef struct nodef_s {
346 	struct node_s	*head;
347 	kmutex_t	lock;
348 } nodef_t;
349 
350 /*
351  * A node_t is used to represent a cached byte-stream object. A node_t is
352  * in one of four active states:
353  *
354  * 1) path != NULL, member of a node_t hash list with an object description
355  *    (hashnext, size, path, pathsz members valid).
356  *
357  * 2) pp != NULL, 1) + phys pages allocated (pp, plrupn, plrunn members valid).
358  *
359  * 3) data != NULL, 2) + virt mapping allocated (data, datasz, vlrupn, vlrunn
360  *    members valid).
361  *
362  * 4) cksum != NULL 3) + checksum mapping allocated
363  */
364 
365 typedef struct node_s {
366 	uint32_t 	ref;		/* ref (see below) state */
367 	uint32_t 	cnt;		/* ref count */
368 	int32_t		size;		/* object size (-1 = UNKNOWN) */
369 	uint32_t	mss;		/* mblk(s) in size mss */
370 	uint64_t	ctag;		/* usr defined cache tag, 0 => no tag */
371 	ipaddr_t	laddr;		/* local IP, for virtual hosting */
372 	uint16_t	lport;		/* local port, for virtual hosting */
373 
374 	struct node_s	*plrunn;	/* Phys LRU list next node_t */
375 	struct node_s	*plrupn;	/* Phys LRU list previous node_t */
376 	struct node_s	*vlrunn;	/* Virt LRU list next node_t */
377 	struct node_s	*vlrupn;	/* Virt LRU list previous node_t */
378 
379 	nodef_t	*hashfanout;		/* hash bucket we're part of */
380 	nodef_t	*ctaghashfanout;	/* ctaghash bucket we're part off */
381 	struct node_s *hashnext;	/* hash list next node_t */
382 	struct node_s *ctaghashnext;	/* ctaghash list next node_t */
383 	struct conn_s *connhead;	/* head of list of conn(s) in miss */
384 	struct conn_s *conntail;	/* tail of list of conn(s) in miss */
385 	struct node_s *next;		/* needed if data is in chunks */
386 	struct node_s *back;		/* needed if data is in chunks */
387 
388 	clock_t	expire;		/* lbolt node_t expires (0 = NOW, -1 = NEVER) */
389 	time_t	lastmod;	/* HTTP "Last-Modified:" value */
390 
391 	mblk_t	*req;		/* whole HTTP request (including headers) */
392 	int	reqsz;		/* size of above */
393 	int	reqcontl;	/* HTTP "Content-Length:" value */
394 	uint32_t rcv_cnt;	/* rcv_list byte count */
395 	mblk_t	*rcv_head;	/* rcv_list head */
396 	mblk_t	*rcv_tail;	/* rcv_list tail */
397 	mblk_t	*rcv_ptr;	/* rcv_list pointer */
398 
399 	nca_squeue_t *sqp;	/* squeue node_t is being processed from */
400 	char	*path;		/* URI path component */
401 	int	pathsz;		/* size of above */
402 	uint_t	method;		/* HTTP request method */
403 	uint_t	version;	/* HTTP request version */
404 	char	*reqhdr;	/* HTTP request header(s) */
405 	int	reqhdrsz;	/* size of above */
406 	char	*reqhost;	/* HTTP "Host:" string */
407 	int	reqhostsz;	/* size of above */
408 	char	*reqaccept;	/* HTTP "Accept:" string */
409 	int	reqacceptsz;	/* size of above */
410 	char	*reqacceptl;	/* HTTP "Accept-Language:" string */
411 	int	reqacceptlsz;	/* size of above */
412 
413 	page_t	**pp;		/* page pointer vector for data */
414 	char	*data;		/* data buffer */
415 	int	datasz;		/* size of above */
416 	uint16_t *cksum;	/* cksum() vector for data by mss */
417 	size_t	cksumlen;	/* length of memory block for above vector */
418 	uint_t	resbody;	/* HTTP response body at &data[resbody] */
419 
420 	int	hlen;		/* data buffer split header len */
421 	int	fileoff;	/* file include offset */
422 	int	filelen;	/* length of file */
423 	struct node_s *fileback; /* head node_t of a file list (-1 for death) */
424 	struct node_s *filenext; /* next node_t of a file list */
425 	struct node_s *ctagback; /* head node_t of a ctag list */
426 	struct node_s *ctagnext; /* next node_t of a ctag list */
427 	vnode_t	*filevp;	/* vnode for the file */
428 
429 	kmutex_t lock;		/* serializes access to node_t */
430 	frtn_t	frtn;		/* STREAMS free routine; always node_freeb() */
431 	boolean_t headchunk;	/* true if this node is the head chunk */
432 
433 	/*
434 	 * The following 4 fields are used to record node states when
435 	 * upcalls are preempted. When preempted upcalls are not relavent,
436 	 * these fields should have default value 0.
437 	 */
438 	uint8_t advise;		/* an interpreted advise from http */
439 	boolean_t last_advisory; /* preempted upcall state -- advisory bit */
440 	boolean_t advisory;	/* need advisory from httpd before use */
441 	boolean_t first_upcall;	/* node in first upcall, a internal state */
442 
443 	kcondvar_t cv;		/* sync upcall/downcall process on a node */
444 	int	onqueue;	/* == 1 if on miss_queue, debug aid */
445 } node_t;
446 
447 /* Note: The MDB NCA module depends on the values of these flags. */
448 
449 #define	REF_URI		0x80000000 /* & ref = node_t URI hashed */
450 #define	REF_PHYS	0x40000000 /* & ref = phys mapping in-use */
451 #define	REF_VIRT	0x20000000 /* & ref = virt mapping in-use */
452 #define	REF_CKSUM	0x10000000 /* & ref = checksum mapping in-use */
453 #define	REF_KMEM	0x08000000 /* & ref = kmem mapped (PHYS|VIRT) */
454 #define	REF_DONE	0x04000000 /* & ref = node_t fill is done */
455 #define	REF_SAFED	0x02000000 /* & ref = node_t not safe for use */
456 #define	REF_FILE	0x01000000 /* & ref = node_t filename hashed */
457 #define	REF_RESP	0x00800000 /* & ref = node_t response header parsed */
458 #define	REF_NOLRU	0x00400000 /* & ref = node_t not safe for lru reclaim */
459 #define	REF_MISS	0x00200000 /* & ref = node_t is/will missed() proc */
460 #define	REF_ONPLRU	0x00100000 /* & ref = node_t is on Phys LRU */
461 #define	REF_ONVLRU	0x00080000 /* & ref = node_t is on Virt LRU */
462 #define	REF_PREEMPT	0x00040000 /* & ref = node_t processing preempted */
463 #define	REF_CTAG	0x00020000 /* & ref = node_t CTAG hashed */
464 #define	REF_UPCALL	0x00010000 /* & ref = node_t upcall not yet complete */
465 #define	REF_OWNED	0x00008000 /* & ref = node_t owned (won't be freed) */
466 #define	REF_ERROR	0x00004000 /* & ref = node_t errored */
467 #define	REF_VNODE	0x00002000 /* & ref = node_t vnode hashed */
468 #define	REF_NCAFS	0x00001000 /* & ref = node_t is NCAfs required */
469 #define	REF_SEGMAP	0x00000800 /* & ref = segmapped (PHYS|VIRT) */
470 #define	REF_UNUSED	0x000007FF /* & ref = UNUSED */
471 /*
472  * Mappings where no seperate PHYS and VIRT, i.e. single mapping with a
473  * virtual address e.g. REF_KMEM and REF_SEGMAP.
474  */
475 #define	REF_NOVIRT	(REF_KMEM | REF_SEGMAP)
476 
477 /* Is this node safe for reclaim ? */
478 #define	REF_RECLAIM	(REF_SAFED | REF_NOLRU | REF_MISS)
479 
480 /*
481  * NCA node_t reference counting is more complicated than nca_conn_t reference
482  * counting because we pass parts of node_t's (masquerading as dblk
483  * buffers) into the STREAMS subsystem which eventually get freed by
484  * network drivers just like regular dblk buffers.  Also, unlike nca_conn_t's,
485  * we may wish to keep a node_t around even after there are no outstanding
486  * references, since it's possible that it will be requested again.
487  *
488  * Thus, the node_t reference count reflects the number of active codepaths
489  * in Solaris making use of a given node_t -- each codepath that requires
490  * that the node_t stick around once it drops the node_t lock must acquire
491  * a reference via NODE_REFHOLD and drop that reference via NODE_REFRELE
492  * when done.  Note that following a NODE_REFRELE the node that was
493  * released may no longer exist and thus it should not be referenced unless
494  * the codepath has another outstanding reference.  When a node_t is passed
495  * into the STREAMS subsystem via desballoc() and related interfaces, a
496  * NODE_REFHOLD should be placed on the node_t and the free routine should
497  * be set to node_freeb(), which will in turn call NODE_REFRELE.
498  *
499  * The concept of node ownership allows NCA to express that it would like
500  * this node to hang around, even if there are no "explicit" references to
501  * it (the ownership counts as an implicit reference).  All "headchunk"
502  * hashed nodes are owned when they are created.  If they subsequently
503  * become disowned (currently via nca_node_del() or nca_reclaim_vlru()),
504  * they may have some or all their resources freed (via node_fr()) as soon
505  * as the last reference to them is removed.  Note that it's possible that
506  * a disowned node may become of interest again before some or all of its
507  * resources were reclaimed -- in this case, it must be reowned via
508  * NODE_OWN.  Note that an unhashed node should never be owned, though it
509  * of course may be held and released; this is because there is no sense
510  * in owning a node which is merely temporary (i.e., not hashed somewhere).
511  * Note that the corollary of this statement is not true -- that is, just
512  * because a node is hashed does not mean it is owned (it may have been
513  * disowned via nca_reclaim_vlru()) -- this is why code must always reown
514  * hashed nodes if it's desirable to have them stick around.
515  *
516  * All four macros *must* be called with the node lock held.  However,
517  * NODE_DISOWN and NODE_REFRELE return with the lock unlocked (if there is
518  * still a lock at all), because the operation may have just removed the
519  * final reference to a node and it may no longer exist.
520  *
521  * A version of NODE_REFRELE is provided which doesn't unlock the lock but
522  * can only be used when the caller can gaurantee that it's not the last ref
523  * (e.g. the caller has another outstanding reference) as if it's the last
524  * ref the node_t may no longer exist. The new macro is NODE_REFRELE_LOCKED.
525  */
526 
527 #define	NODE_DISOWN(np) {						\
528 									\
529 	NODE_T_TRACE((np), NODE_T_TRACE_DISOWN);			\
530 	ASSERT(mutex_owned(&(np)->lock));				\
531 									\
532 	if ((np)->ref & REF_OWNED) {					\
533 		if ((np)->cnt == 0)	{				\
534 			panic("nca NODE_DISOWN: %p has no references",	\
535 			    (void *)(np));				\
536 		}							\
537 		(np)->ref &= ~REF_OWNED;				\
538 		NODE_REFRELE(np);					\
539 	} else {							\
540 		mutex_exit(&(np)->lock);				\
541 	}								\
542 }
543 
544 #define	NODE_OWN(np) {							\
545 									\
546 	NODE_T_TRACE((np), NODE_T_TRACE_OWN);				\
547 	ASSERT(mutex_owned(&(np)->lock));				\
548 									\
549 	if (!((np)->ref & REF_OWNED)) {					\
550 		if ((np)->cnt == UINT_MAX)				\
551 			panic(						\
552 			    "nca NODE_OWN: %p has too many references",	\
553 			    (void *)(np));				\
554 		(np)->ref |= REF_OWNED;					\
555 		(np)->cnt++;						\
556 	}								\
557 }
558 
559 #define	NODE_REFHOLD(np) {						\
560 									\
561 	NODE_T_TRACE((np), NODE_T_TRACE_REFHOLD | ((np)->cnt + 1));	\
562 	ASSERT(mutex_owned(&(np)->lock));				\
563 									\
564 	if ((np)->cnt == UINT_MAX)					\
565 		panic("nca NODE_REFHOLD: %p has too many references",	\
566 		    (void *)(np));					\
567 	(np)->cnt++;							\
568 }
569 
570 #define	NODE_REFRELE(np) {						\
571 									\
572 	NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | ((np)->cnt - 1));	\
573 	ASSERT(mutex_owned(&(np)->lock));				\
574 									\
575 	if (((np)->ref & REF_OWNED) && (np)->cnt == 1)			\
576 		panic(							\
577 		    "nca NODE_REFRELE: %p has only OWNED reference",	\
578 		    (void *)(np));					\
579 	if ((np)->cnt == 0)						\
580 		panic("nca NODE_REFRELE: %p has no references",		\
581 		    (void *)(np));					\
582 	(np)->cnt--;							\
583 	if ((np)->cnt == 0) {						\
584 		ASSERT(((np)->ref & REF_OWNED) == 0);			\
585 		node_fr(np);		/* node_fr unlocks the lock */	\
586 	} else {							\
587 		mutex_exit(&(np)->lock);				\
588 	}								\
589 }
590 
591 #define	NODE_REFRELE_LOCKED(np) {					\
592 	uint_t	_cnt = (np)->cnt;					\
593 									\
594 	NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | (_cnt - 1));		\
595 	ASSERT(mutex_owned(&(np)->lock));				\
596 									\
597 	if ((np)->ref & REF_OWNED)					\
598 		_cnt--;							\
599 	if (((np)->ref & REF_OWNED) && _cnt == 0)			\
600 		panic("nca NODE_REFRELE_LOCKED: "			\
601 		    "%p has only OWNED reference", (void *)(np));	\
602 	if (_cnt == 0)							\
603 		panic("nca NODE_REFRELEL_LOCKED: "			\
604 		    "%p has no references", (void *)(np));		\
605 	if (_cnt == 1)							\
606 		panic("nca NODE_REFRELEL_LOCKED: "			\
607 		    "%p has only one reference", (void *)(np));		\
608 	(np)->cnt--;							\
609 }
610 
611 
612 /*
613  * NODE_T_TRACE - trace node_t events.
614  *
615  * adb:
616  * 32 bit
617  *	*node_tp,0t8192-(((*node_tp)-node_tv)%0t48)/PXXDDnPnPnPnPnPnPnPnn
618  *	node_tv,((*node_tp)-node_tv)%0t48/PXXDDnPnPnPnPnPnPnPnn
619  *
620  * 64 bit
621  *	*node_tp,0t8192-(((*node_tp)-node_tv)%0t56)/PXXDDnXnXnXnXnXnXnXnn
622  *	node_tv,((*node_tp)-node_tv)%0t56/PXXDDnXnXnXnXnXnXnXnn
623  *
624  * For incremental node tracing, note the value of node_tp (node_tp/X) after
625  * a run, then replace that in the 2nd line for node_tv.
626  */
627 
628 #define	NODE_T_STK_DEPTH	6
629 
630 struct node_ts {
631 	node_t	*node;
632 	unsigned action;
633 	unsigned ref;
634 	unsigned cnt;
635 	int	cpu;
636 	pc_t	stk[NODE_T_STK_DEPTH + 1];
637 };
638 
639 #undef	NODE_T_TRACE_ON
640 
641 #ifdef	NODE_T_TRACE_ON
642 
643 #define	NODE_T_TRACE_ALLOC	0xFF000000	/* kmem_alloc() of */
644 #define	NODE_T_TRACE_ADD	0xFE000000	/* node_add() */
645 
646 #define	NODE_T_TRACE_OWN	0xEF000000	/* node has been owned */
647 #define	NODE_T_TRACE_DISOWN	0xEE000000	/* node has been disowned */
648 #define	NODE_T_TRACE_DESBALLOC	0xED000000	/* desballoc() */
649 #define	NODE_T_TRACE_REFRELE	0xEC000000	/* refrele */
650 #define	NODE_T_TRACE_REFHOLD	0xEB000000	/* refhold */
651 #define	NODE_T_TRACE_NODE_FR	0xEA000000	/* node_fr() */
652 
653 #define	NODE_T_TRACE_TEMPNODE	0xDF000000	/* node_temp() */
654 #define	NODE_T_TRACE_REPLACE	0xDE000000	/* node_replace() */
655 #define	NODE_T_TRACE_FLUSH	0xDD000000	/* node_flush() */
656 #define	NODE_T_TRACE_DOWNCALL	0xDC000000	/* downcall_service() */
657 #define	NODE_T_TRACE_DOWNCALL_2	0xDB000000	/* dcall_service->httpd_data */
658 
659 #define	NODE_T_TRACE_DATA	0xCF000000	/* httpd_data() */
660 
661 #define	NODE_T_TRACE_LRU	0xAF000000	/* nca_lru insert */
662 #define	NODE_T_TRACE_HTTPD	0xAE000000	/* call nca_httpd() */
663 #define	NODE_T_TRACE_MISS	0xAD000000	/* http_miss() */
664 #define	NODE_T_TRACE_TEMP	0xAC000000	/* np != *npp */
665 #define	NODE_T_TRACE_XMIT	0xAB000000	/* tcp_xmit() */
666 #define	NODE_T_TRACE_MISSED	0xAA000000	/* nca_missed() */
667 
668 #define	NODE_T_TRACE_DEL	0x00000000	/* node_del() */
669 
670 #if defined(__i386) || defined(__amd64)
671 #define	NODE_T_TRACE_STK() {						\
672 	_ix = getpcstack(&_p->stk[0], NODE_T_STK_DEPTH + 1);		\
673 	if (_ix < NODE_T_STK_DEPTH + 1) {				\
674 		_p->stk[_ix + 1] = 0;					\
675 	}								\
676 }
677 #else
678 #define	NODE_T_TRACE_STK() {						\
679 	_p->stk[0] = (pc_t)callee();					\
680 	_ix = getpcstack(&_p->stk[1], NODE_T_STK_DEPTH);		\
681 	if (_ix < NODE_T_STK_DEPTH) {					\
682 		_p->stk[_ix + 1] = 0;					\
683 	}								\
684 }
685 #endif
686 
687 #define	NODE_TV_SZ 8192
688 
689 extern struct node_ts node_tv[NODE_TV_SZ];
690 extern struct node_ts *node_tp;
691 
692 #define	NODE_T_TRACE(p, a) {						\
693 	struct node_ts *_p;						\
694 	struct node_ts *_np;						\
695 	int    _ix;							\
696 									\
697 	do {								\
698 		_p = node_tp;						\
699 		if ((_np = _p + 1) == &node_tv[NODE_TV_SZ])		\
700 			_np = node_tv;					\
701 	} while (casptr(&node_tp, _p, _np) != _p);			\
702 	_p->node = (p);							\
703 	_p->action = (a);						\
704 	_p->ref = (p) ? (p)->ref : 0;					\
705 	_p->cnt = (p) ? (p)->cnt : 0;					\
706 	_p->cpu = CPU->cpu_seqid;					\
707 	NODE_T_TRACE_STK();						\
708 }
709 
710 #else	/* NODE_T_TRACE_ON */
711 
712 #define	NODE_T_TRACE(p, a)
713 
714 #endif	/* NODE_T_TRACE_ON */
715 
716 /*
717  * DOOR_TRACE - trace door node_t events.
718  *
719  * adb:
720  * 32 bit
721  *	*door_tp,0t8192-(((*door_tp)-door_tv)%0t112)/5XnPnPnPnPnPnPnPn64cnn
722  *	door_tv,((*door_tp)-door_tv)%0t112/5XnPnPnPnPnPnPnPn64cnn
723  * 64 bit
724  *	*door_tp,0t8192-(((*door_tp)-door_tv)%0t128)/PXPXXnXnXnXnXnXnXnXn64cnn
725  *	door_tv,((*door_tp)-door_tv)%0t128/PXPXXnXnXnXnXnXnXnXn64cnn
726  */
727 
728 #define	DOOR_STK_DEPTH	6
729 
730 struct door_ts {
731 	struct conn_s *cp;
732 	unsigned action;
733 	node_t	*np;
734 	int	ref;
735 	unsigned state;
736 	pc_t	stk[DOOR_STK_DEPTH + 1];
737 	char	data[64];
738 };
739 
740 #undef	DOOR_TRACE_ON
741 
742 #ifdef	DOOR_TRACE_ON
743 
744 #define	DOOR_TRACE_UPCALL	0xF0000000	/* upcall() */
745 #define	DOOR_TRACE_UPCALL_RAW	0xF1000000	/* upcall() RAW ? */
746 #define	DOOR_TRACE_UPCALL_RET	0xFF000000	/* upcall() return */
747 
748 #define	DOOR_TRACE_DOWNCALL	0xE0000000	/* downcall() */
749 #define	DOOR_TRACE_CONNECT	0xE1000000	/* connect() */
750 #define	DOOR_TRACE_CONNECT_DATA	0xE2000000	/* connect() */
751 #define	DOOR_TRACE_DIRECTFROM	0xE3000000	/* tee_splice() from */
752 #define	DOOR_TRACE_DIRECTTO	0xE4000000	/* tee_splice() to */
753 #define	DOOR_TRACE_DOWNCALL_RET	0xEF000000	/* downcall() return */
754 
755 #define	DOOR_TRACE_INIT		0x80000000	/* doorcall_init() */
756 #define	DOOR_TRACE_INIT_RET	0x88000000	/* doorcall_init() return */
757 
758 #if defined(__i386) || defined(__amd64)
759 #define	DOOR_TRACE_STK() {						\
760 	_ix = getpcstack(&_p->stk[0], DOOR_STK_DEPTH + 1);		\
761 	if (_ix < DOOR_STK_DEPTH + 1) {					\
762 		_p->stk[_ix] = 0;					\
763 	}								\
764 }
765 #else
766 #define	DOOR_TRACE_STK() {						\
767 	_p->stk[0] = (pc_t)callee();					\
768 	_ix = getpcstack(&_p->stk[1], DOOR_STK_DEPTH);			\
769 	if (_ix < DOOR_STK_DEPTH) {					\
770 		_p->stk[_ix + 1] = 0;					\
771 	}								\
772 }
773 #endif
774 
775 #define	DOOR_TV_SZ 8192
776 
777 extern struct door_ts door_tv[DOOR_TV_SZ];
778 extern struct door_ts *door_tp;
779 
780 #define	DOOR_TRACE(io, d, d_sz, a) {				\
781 	nca_conn_t *_cp = (io) ? (nca_conn_t *)(io)->cid : (nca_conn_t *)NULL; \
782 	node_t *_req_np = _cp ? _cp->req_np : (node_t *)NULL;		\
783 	struct door_ts *_p;						\
784 	struct door_ts *_np;						\
785 	int    _ix;							\
786 									\
787 	do {								\
788 		_p = door_tp;						\
789 		if ((_np = _p + 1) == &door_tv[DOOR_TV_SZ])		\
790 			_np = door_tv;					\
791 	} while (casptr(&door_tp, _p, _np) != _p);			\
792 	_p->cp = _cp;							\
793 	_p->np = _req_np;						\
794 	_p->action = (a);						\
795 	_p->ref = _req_np ? _req_np->ref : 0;				\
796 	if ((io)) {							\
797 		_p->state = ((io)->op == http_op ? 0x80000000 : 0) |	\
798 			    ((io)->more ? 0x40000000 : 0) |		\
799 			    ((io)->first ? 0x20000000 : 0) |		\
800 			    ((io)->advisory ? 0x10000000 : 0) |		\
801 			    ((io)->nocache ? 0x08000000 : 0) |		\
802 			    ((io)->preempt ? 0x04000000 : 0) |		\
803 			    ((io)->peer_len ? 0x02000000 : 0) |		\
804 			    ((io)->local_len ? 0x01000000 : 0) |	\
805 			    ((io)->data_len ? 0x00800000 : 0) |		\
806 			    (((io)->direct_type << 20) & 0x00700000) |	\
807 			    ((io)->direct_len ? 0x00080000 : 0) |	\
808 			    ((io)->trailer_len ? 0x00040000 : 0) |	\
809 			    (((io)->peer_len + (io)->local_len +	\
810 			    (io)->data_len + (io)->direct_len +		\
811 			    (io)->trailer_len) & 0x3FFFF);		\
812 	} else {							\
813 		_p->state = 0;						\
814 	}								\
815 	if ((d_sz)) {							\
816 		int _n = MIN((d_sz), 63);				\
817 									\
818 		bcopy((d), _p->data, _n);				\
819 		bzero(&_p->data[_n], 64 - _n);				\
820 	} else {							\
821 		bzero(_p->data, 64);					\
822 	}								\
823 	DOOR_TRACE_STK();						\
824 }
825 
826 #else	/* DOOR_TRACE_ON */
827 
828 #define	DOOR_TRACE(io, d, d_sz, a)
829 
830 #endif	/* DOOR_TRACE_ON */
831 
832 /*
833  * NCA node LRU cache.  Defined here so that the NCA mdb module can use it.
834  */
835 typedef struct lru_s {
836 	node_t		*phead;	/* Phys LRU list head (MRU) */
837 	node_t		*ptail;	/* Phys LRU list tail (LRU) */
838 	node_t		*vhead;	/* Virt LRU list head (MRU) */
839 	node_t 		*vtail;	/* Virt LRU list tail (LRU) */
840 
841 	uint32_t	pcount;	/* Phys count of node_t members */
842 	uint32_t	vcount;	/* Virt count of node_t members */
843 
844 	kmutex_t	lock;	/* Guarantee atomic access of above */
845 } lru_t;
846 
847 /*
848  * Per CPU instance structure.
849  *
850  * 32-bit adb: XXXnnDnnXXnnXXnnXDnnXXnn228+na
851  * 64-bit adb: PPPnnD4+nnPPnnPPnnJDnnJ180+na
852  */
853 
854 typedef struct nca_cpu_s {
855 
856 	node_t *persist_hdr_none;
857 	node_t *persist_hdr_close;
858 	node_t *persist_hdr_ka;
859 
860 	uint32_t dcb_readers;	/* count of dcb_list readers for this CPU */
861 
862 	nca_squeue_t *if_inq;	/* if_t input nca_squeue_t */
863 	nca_squeue_t *if_ouq;	/* if_t output nca_squeue_t */
864 
865 	ti_t	*tcp_ti;	/* TCP TIMER list */
866 	tw_t	*tcp_tw;	/* TCP TIME_WAIT list */
867 
868 	ddi_softintr_t soft_id;	/* soft interrupt id for if_inq worker */
869 	int	if_inq_cnt;	/* count of if_t.inq references */
870 
871 	char	pad[256 - sizeof (node_t *) - sizeof (node_t *) -
872 		    sizeof (node_t *) - sizeof (uint32_t) -
873 		    sizeof (nca_squeue_t *) - sizeof (nca_squeue_t *) -
874 		    sizeof (ti_t *) - sizeof (tw_t *) -
875 		    sizeof (ddi_softintr_t) - sizeof (int)];
876 } nca_cpu_t;
877 
878 extern nca_cpu_t *nca_gv;	/* global per CPU state indexed by cpu_seqid */
879 
880 /*
881  * hcb_t - host control block.
882  *
883  * Used early on in packet switching to select packets to be serviced by NCA
884  * and optionally later on by the HTTP protocol layer to further select HTTP
885  * request to be serviced.
886  *
887  * dcb_t - door control block.
888  *
889  * Used to associate one or more hcb_t(s) with a given httpd door instance.
890  *
891  * dcb_list - dcb_t global list, a singly linked grounded list of dcb_t's.
892  *
893  * Used to search for a hcb_t match, currently a singly linked grounded list
894  * of dcb_t's with a linear walk of the list. While this is adequate for the
895  * current httpd support (i.e. a single door) a move to either a hash or tree
896  * will be required for multiple httpd instance support (i.e. multiple doors).
897  *
898  * The dcb_list is protected by a custom reader/writer lock, the motivation
899  * for using a custom lock instead of a krwlock_t is that this lock is the
900  * single hot spot in NCA (i.e. all in-bound packets must acquire this lock)
901  * and a nonlocking atomic readers count scheme is used in the common case
902  * (i.e. reader lock) with a fall-back to a conventional kmutex_t for writer
903  * (i.e. ndd list add/delete).
904  */
905 
906 typedef struct hcb_s {
907 	struct hcb_s	*next;		/* Next hcb_t (none: NULL) */
908 	ipaddr_t	addr;		/* IP address (any: INADDR_ANY or 0) */
909 	uint16_t	port;		/* TCP port number */
910 	char		*host;		/* Host: name (any: NULL) */
911 	ssize_t		hostsz;		/* Size of above */
912 	char		*root;		/* Document root ("/": NULL) */
913 	ssize_t		rootsz;		/* Size of above */
914 } hcb_t;
915 
916 typedef struct dcb_s {
917 	struct dcb_s	*next;		/* Next dcb_t (none: NULL) */
918 	char		*door;		/* Door file (default: NULL) */
919 	ssize_t		doorsz;		/* Size of above */
920 	door_handle_t	hand;		/* Door handle (default: NULL) */
921 	hcb_t		list;		/* Head of a hcb_t list (any: NULL) */
922 } dcb_t;
923 
924 extern dcb_t dcb_list;
925 extern kmutex_t nca_dcb_lock;
926 extern kcondvar_t nca_dcb_wait;
927 extern kmutex_t nca_dcb_readers;
928 
929 #define	NOHANDLE ((door_handle_t)-1)
930 
931 #define	DCB_COUNT_USELOCK	0x80000000
932 #define	DCB_COUNT_MASK		0x3FFFFFFF
933 
934 #define	DCB_RD_ENTER(cpu) {						\
935 	uint32_t *rp;							\
936 									\
937 	cpu = CPU->cpu_seqid;						\
938 	rp = &nca_gv[cpu].dcb_readers;					\
939 	while (atomic_add_32_nv(rp, 1) & DCB_COUNT_USELOCK) {		\
940 		/* Need to use the lock, so do the dance */		\
941 		mutex_enter(&nca_dcb_lock);				\
942 		if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK &&	\
943 		    CV_HAS_WAITERS(&nca_dcb_wait)) {			\
944 			/* May be the last reader for this CPU */	\
945 			cv_signal(&nca_dcb_wait);			\
946 		}							\
947 		mutex_exit(&nca_dcb_lock);				\
948 		mutex_enter(&nca_dcb_readers);				\
949 		/*							\
950 		 * We block above waiting for the writer to exit the	\
951 		 * readers lock, if we didn't block then while we were	\
952 		 * away in the nca_dcb_lock enter the writer exited,	\
953 		 * we could optimize for this case by checking USELOCK	\
954 		 * after the decrement, but as this is an exceptional	\
955 		 * case not in the fast-path we'll just take the hit	\
956 		 * of a needless readers enter/exit.			\
957 		 */							\
958 		mutex_exit(&nca_dcb_readers);				\
959 	}								\
960 }
961 
962 #define	DCB_RD_EXIT(cpu) {						\
963 	uint32_t *rp = &nca_gv[cpu].dcb_readers;			\
964 									\
965 	if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK) {		\
966 		mutex_enter(&nca_dcb_lock);				\
967 		if (CV_HAS_WAITERS(&nca_dcb_wait)) {			\
968 			/* May be the last reader for this CPU */	\
969 			cv_signal(&nca_dcb_wait);			\
970 		}							\
971 		mutex_exit(&nca_dcb_lock);				\
972 	}								\
973 }
974 
975 #define	DCB_WR_ENTER() {						\
976 	int cpu;							\
977 	int readers;							\
978 									\
979 	mutex_enter(&nca_dcb_readers);					\
980 	mutex_enter(&nca_dcb_lock);					\
981 	for (;;) {							\
982 		readers = 0;						\
983 		for (cpu = 0; cpu < max_ncpus; cpu++) {			\
984 			int new;					\
985 			uint32_t *rp = &nca_gv[cpu].dcb_readers;	\
986 			int old = *rp;					\
987 									\
988 			if (old & DCB_COUNT_USELOCK) {			\
989 				readers += old & DCB_COUNT_MASK;	\
990 				continue;				\
991 			}						\
992 			new = old | DCB_COUNT_USELOCK;			\
993 			while (cas32(rp, old, new) != old) {		\
994 				old = *rp;				\
995 				new = old | DCB_COUNT_USELOCK;		\
996 			}						\
997 			readers += (new & DCB_COUNT_MASK);		\
998 		}							\
999 		if (readers == 0)					\
1000 			break;						\
1001 		cv_wait(&nca_dcb_wait, &nca_dcb_lock);			\
1002 	}								\
1003 	mutex_exit(&nca_dcb_lock);					\
1004 }
1005 
1006 #define	DCB_WR_EXIT() {							\
1007 	int cpu;							\
1008 									\
1009 	mutex_enter(&nca_dcb_lock);					\
1010 	for (cpu = 0; cpu < max_ncpus; cpu++) {				\
1011 		int new;						\
1012 		uint32_t *rp = &nca_gv[cpu].dcb_readers;		\
1013 		int old = *rp;						\
1014 									\
1015 		new = old & ~DCB_COUNT_USELOCK;				\
1016 		while (cas32(rp, old, new) != old) {			\
1017 			old = *rp;					\
1018 			new = old & ~DCB_COUNT_USELOCK;			\
1019 		}							\
1020 	}								\
1021 	mutex_exit(&nca_dcb_lock);					\
1022 	mutex_exit(&nca_dcb_readers);					\
1023 }
1024 
1025 typedef struct nca_door_s {
1026 	door_handle_t	handle;		/* The door handle */
1027 	char		*name;		/* The door name */
1028 	kmutex_t	lock;		/* The door lock */
1029 	kcondvar_t	cv_writer;	/* condvar for thread waiting */
1030 					/* to do door_init */
1031 	kcondvar_t	cv_reader;	/* condvar for thread waiting */
1032 					/* for a door_init to finish */
1033 	uint32_t	upcalls;	/* Number of upcalls in progress */
1034 	boolean_t	init_waiting;	/* door_init thread wanting to */
1035 					/* be exclusive */
1036 } nca_door_t;
1037 
1038 /*
1039  * if_t - interface per instance data.
1040  */
1041 
1042 typedef struct if_s {
1043 
1044 	boolean_t dev;		/* is a device instance */
1045 
1046 	queue_t	*rqp;		/* our read-side STREAMS queue */
1047 	queue_t	*wqp;		/* our write-side STREAMS queue */
1048 
1049 	/* DLPI M_DATA IP fastpath template */
1050 	size_t	mac_length;
1051 	mblk_t	*mac_mp;
1052 	int32_t	mac_mtu;
1053 	int32_t	mac_addr_len;
1054 
1055 	uint32_t ip_ident;	/* our IP ident value */
1056 
1057 	boolean_t hwcksum;	/* underlying NIC supports checksum offload */
1058 
1059 	nca_squeue_t *inq;		/* in-bound nca_squeue_t */
1060 	nca_squeue_t *ouq;		/* out-bound nca_squeue_t */
1061 
1062 	/*
1063 	 * All if_t are associated with a CPU and have a default
1064 	 * router on link are chained in a circular linked list.
1065 	 */
1066 	struct if_s *next_if;
1067 	struct if_s *prev_if;
1068 	ipaddr_t local_addr;	/* This interface's IP address. */
1069 	uchar_t router_ether_addr[6];
1070 
1071 	uint_t	hdr_ioc_id;	/* id of DL_IOC_HDR_INFO M_IOCTL sent down */
1072 	boolean_t info_req_pending;
1073 
1074 	int32_t	capab_state;	/* Capability probe state */
1075 
1076 	/* Bound local address of a NCAfs instance. */
1077 	struct sockaddr_in	bound_addr;
1078 } if_t;
1079 
1080 /*
1081  * connf_t - connection fanout data.
1082  *
1083  * The hash tables and their linkage (hashnextp, hashprevp) are protected
1084  * by the per-bucket lock. Each nca_conn_t inserted in the list points back at
1085  * the connf_t that heads the bucket.
1086  */
1087 
1088 typedef struct connf_s {
1089 	uint32_t	max;
1090 	struct conn_s	*head;
1091 	kmutex_t	lock;
1092 } connf_t;
1093 
1094 #ifdef	CONNP_T_TRACE_ON
1095 
1096 #define	CONNP_TV_SZ 32
1097 
1098 /*
1099  * Per nca_conn_t packet tracing.
1100  */
1101 typedef struct connp_s {
1102 	clock_t		lbolt;
1103 	clock_t		tcp_ti;
1104 	int32_t		len : 16,
1105 			dir : 1,
1106 			state : 4,
1107 			flags : 6,
1108 			xmit_np : 1,
1109 			xmit_head : 1,
1110 			unsent : 1,
1111 			tail_unsent : 1,
1112 			direct : 1;
1113 	uint32_t	state1;
1114 	uint32_t	state2;
1115 	uint32_t	seq;
1116 	uint32_t	ack;
1117 	uint32_t	snxt;
1118 	uint32_t	swnd;
1119 } connp_t;
1120 
1121 #endif	/* CONNP_T_TRACE_ON */
1122 
1123 /*
1124  * nca_conn_t - connection per instance data.
1125  *
1126  * Note: hashlock is used to provide atomic access to all nca_conn_t members
1127  * above it. All other members are protected by the per CPU inq nca_squeue_t
1128  * which is used to serialize access to all nca_conn_t's per interface.
1129  *
1130  * Note: the nca_conn_t can have up to 3 NODE_REFHOLDs:
1131  *
1132  *	1) if req_np != NULL then a NODE_REFHOLD(req_np) was done:
1133  *
1134  *	    1.1) if http_refed then a NODE_REFHOLD(req_np) was done
1135  *
1136  *	    1.2) if http_frefed then a NODE_REFHOLD(req_np->fileback) was done
1137  *
1138  *
1139  * TODO: reorder elements in fast-path code access order.
1140  *
1141  * Dnn4XnXXDnnDnnXXXnnXXXnnUXnnXXXnnXXnnDDXXXDXDXDXnnDnnXXDDnXXXDDnnXXXDDnn
1142  * XXXDDnnXXXDDnnXXXDDnnXXnnDXXnn
1143  * b+++DDnAnDDDDDnnDnnUnnUUDXDUnnDnn20xnnXnnddnnUUUnnXXUnXXnnUUUnn
1144  * DDDDDDnnUUnnXXUXUnn4UD4Unn4UnUUnn
1145  * 64-bit: Xnn4+4pnnppEnEnn3pnn3pnnEJnnXXnnuunn4+ppnnXX3pD4+pD4+pD4+pnnEnnppnnD
1146  */
1147 
1148 #define	TCP_XMIT_MAX_IX	5		/* Max xmit descriptors */
1149 
1150 typedef struct conn_s {
1151 
1152 	int32_t ref;			/* Reference counter */
1153 
1154 	te_t	tcp_ti;			/* TCP TIMER timer entry */
1155 
1156 	struct conn_s	*twnext;	/* TIME_WAIT next */
1157 	struct conn_s	*twprev;	/* TIME_WAIT prev */
1158 	clock_t	twlbolt;		/* TIME_WAIT lbolt */
1159 
1160 	clock_t create;			/* Create lbolt time */
1161 
1162 	connf_t	*hashfanout;		/* Hash bucket we're part of */
1163 	struct conn_s	*hashnext;	/* Hash chain next */
1164 	struct conn_s	*hashprev;	/* Hash chain prev */
1165 
1166 	struct conn_s	*bindnext;	/* Next conn_s in bind list. */
1167 	struct conn_s	*bindprev;	/* Prev conn_s in bind list. */
1168 	void		*tbf;		/* Pointer to bind hash list struct. */
1169 	/*
1170 	 * Note: atomic access of memebers above is guaranteed by the
1171 	 * hashfanout->lock of the hash bucket that the nca_conn_t is in.
1172 	 */
1173 
1174 	size_t	mac_length;		/* MAC prepend length */
1175 	mblk_t	*mac_mp;		/* MAC prepend data */
1176 
1177 	ipaddr_t	laddr;		/* Local address */
1178 	ipaddr_t	faddr;		/* Remote address. 0 => not connected */
1179 
1180 	union {
1181 		struct {
1182 			uint16_t u_fport; /* Remote port */
1183 			uint16_t u_lport; /* Local port */
1184 		} u_ports1;
1185 		uint32_t u_ports2;	/* Rem port, local port */
1186 					/* Used for TCP_MATCH performance */
1187 	} u_port;
1188 #define	conn_lport	u_port.u_ports1.u_lport
1189 #define	conn_fport	u_port.u_ports1.u_fport
1190 #define	conn_ports	u_port.u_ports2
1191 
1192 	if_t	*ifp;			/* Interface for this connection */
1193 	nca_squeue_t *inq;		/* Per CPU inq for this connection */
1194 
1195 	uint32_t req_tag;		/* nca_io_t request tag (0 == NONE) */
1196 	int	req_parse;		/* HTTP request parse state */
1197 	node_t	*req_np;		/* HTTP request node_t */
1198 	mblk_t	*req_mp;		/* HTTP request mblk_t */
1199 	char	*reqpath;		/* HTTP request URI path component */
1200 	int	reqpathsz;		/* size of above */
1201 	char	*reqrefer;		/* HTTP "Referer:" string */
1202 	int	reqrefersz;		/* size of above */
1203 	char	*requagent;		/* HTTP "User-Agent:" string */
1204 	int	requagentsz;		/* size of above */
1205 	struct conn_s *nodenext;	/* Node_t nca_conn_t list */
1206 
1207 	clock_t	http_count;		/* HTTP Keep-Alive request count */
1208 
1209 	/*
1210 	 * req_np xmit state used accross calls to tcp_xmit(). A reference
1211 	 * to the req_np and to any inderect node_t (i.e. file/ctag) ...
1212 	 */
1213 	node_t	*xmit_refed;		/* have a ref to the uri node_t */
1214 	node_t	*xmit_cur;		/* current node to transmit */
1215 
1216 	int	xmit_ix;		/* current xmit[] index */
1217 	int	xmit_pix;		/* past end xmit[] index */
1218 
1219 	struct {
1220 		node_t	*np;		/* node_t pointer for ref */
1221 		char	*dp;		/* data pointer */
1222 		uint16_t *cp;		/* cksum array */
1223 		int	sz;		/* remaining data to xmit */
1224 		int	iso;		/* initial segment offset (if any) */
1225 		node_t	*refed;		/* have a ref to the node_t */
1226 		int	dsz;		/* remaining data for current segment */
1227 		caddr_t	*dvp;		/* data segment virtual pointer */
1228 	} xmit[TCP_XMIT_MAX_IX];
1229 
1230 	/*
1231 	 * Connection NCA_IO_DIRECT_SPLICE & NCA_IO_DIRECT_TEE reference,
1232 	 * see direct_splice and direct_tee below for type of send too.
1233 	 */
1234 	struct conn_s	*direct;	/* nca_conn_t to send recv data too */
1235 	mblk_t		*direct_mp;	/* mblk_t to use for tcp_close() */
1236 
1237 	/*
1238 	 * nca_conn_t state.
1239 	 */
1240 
1241 	int32_t	tcp_state;
1242 
1243 	uint32_t
1244 		tcp_urp_last_valid : 1,	/* Is tcp_urp_last valid? */
1245 		tcp_hard_binding : 1,	/* If we've started a full bind */
1246 		tcp_hard_bound : 1,	/* If we've done a full bind with IP */
1247 		tcp_fin_acked : 1,	/* Has our FIN been acked? */
1248 
1249 		tcp_fin_rcvd : 1,	/* Have we seen a FIN? */
1250 		tcp_fin_sent : 1,	/* Have we sent our FIN yet? */
1251 		tcp_ordrel_done : 1,	/* Have we sent the ord_rel upstream? */
1252 		tcp_flow_stopped : 1,	/* Have we flow controlled xmitter? */
1253 
1254 		tcp_debug : 1,		/* SO_DEBUG "socket" option. */
1255 		tcp_dontroute : 1,	/* SO_DONTROUTE "socket" option. */
1256 		tcp_broadcast : 1,	/* SO_BROADCAST "socket" option. */
1257 		tcp_useloopback : 1,	/* SO_USELOOPBACK "socket" option. */
1258 
1259 		tcp_oobinline : 1,	/* SO_OOBINLINE "socket" option. */
1260 		tcp_dgram_errind : 1,	/* SO_DGRAM_ERRIND option */
1261 		tcp_detached : 1,	/* If we're detached from a stream */
1262 		tcp_bind_pending : 1,	/* Client is waiting for bind ack */
1263 
1264 		tcp_unbind_pending : 1, /* Client sent T_UNBIND_REQ */
1265 		tcp_deferred_clean_death : 1,
1266 					/* defer tcp endpoint cleanup etc. */
1267 		tcp_co_wakeq_done : 1,	/* A strwakeq() has been done */
1268 		tcp_co_wakeq_force : 1,	/* A strwakeq() must be done */
1269 
1270 		tcp_co_norm : 1,	/* In normal mode, putnext() done */
1271 		tcp_co_wakeq_need : 1,	/* A strwakeq() needs to be done */
1272 		tcp_snd_ws_ok : 1,	/* Received WSCALE from peer */
1273 		tcp_snd_ts_ok : 1,	/* Received TSTAMP from peer */
1274 
1275 		tcp_linger : 1,		/* SO_LINGER turned on */
1276 		tcp_zero_win_probe: 1,	/* Zero win probing is in progress */
1277 		tcp_loopback: 1,	/* src and dst are the same machine */
1278 		tcp_localnet: 1,	/* src and dst are on the same subnet */
1279 
1280 		tcp_syn_defense: 1,	/* For defense against SYN attack */
1281 #define	tcp_dontdrop	tcp_syn_defense
1282 		tcp_set_timer : 1,
1283 		tcp_1_junk_fill_thru_bit_31 : 2;
1284 
1285 	uint32_t
1286 		tcp_active_open: 1,	/* This is a active open */
1287 		tcp_timeout : 1,	/* qbufcall failed, qtimeout pending */
1288 		tcp_rexmit : 1,		/* TCP is retransmitting */
1289 		tcp_snd_sack_ok : 1,	/* Can use SACK for this connection */
1290 
1291 		tcp_bind_proxy_addr : 1,	/* proxy addr is being used */
1292 		tcp_recvdstaddr : 1,	/* return T_EXTCONN_IND with dst addr */
1293 		tcp_refed : 1,		/* nca_conn_t refed by TCP */
1294 		tcp_time_wait_comp : 1, /* TIME_WAIT compressed nca_conn_t */
1295 
1296 		tcp_close : 1,		/* nca_conn_t close */
1297 		http_persist : 3,	/* HTTP persistent connection state */
1298 
1299 		deferred_xmit_end : 1,	/* xmit_end() deferred to xmit() */
1300 		http_direct_splice : 1,	/* have a connection to splice too */
1301 		http_direct_tee : 1,	/* have a connection to tee too */
1302 
1303 		tcp_2_junk_fill_thru_bit_31 : 17;
1304 /*
1305  * Note: all nca_conn_t members to be accessed by a tcp_time_wait_comp
1306  * nca_conn_t must be above this point !!!
1307  */
1308 
1309 	uchar_t	tcp_timer_backoff;	/* Backoff shift count. */
1310 	clock_t tcp_last_recv_time;	/* Last time we receive a segment. */
1311 	clock_t	tcp_dack_set_time;	/* When delayed ACK timer is set. */
1312 
1313 	int	tcp_ip_hdr_len;		/* Byte len of our current IP header */
1314 	clock_t	tcp_first_timer_threshold;  /* When to prod IP */
1315 	clock_t	tcp_second_timer_threshold; /* When to give up completely */
1316 	clock_t	tcp_first_ctimer_threshold; /* 1st threshold while connecting */
1317 	clock_t tcp_second_ctimer_threshold; /* 2nd ... while connecting */
1318 
1319 	clock_t	tcp_last_rcv_lbolt; /* lbolt on last packet, used for PAWS */
1320 
1321 
1322 	uint32_t tcp_obsegs;		/* Outbound segments on this stream */
1323 
1324 	uint32_t tcp_mss;		/* Max segment size */
1325 	uint32_t tcp_naglim;		/* Tunable nagle limit */
1326 	int32_t	tcp_hdr_len;		/* Byte len of combined TCP/IP hdr */
1327 	tcph_t	*tcp_tcph;		/* tcp header within combined hdr */
1328 	int32_t	tcp_tcp_hdr_len;	/* tcp header len within combined */
1329 	uint32_t	tcp_valid_bits;
1330 #define	TCP_ISS_VALID	0x1	/* Is the tcp_iss seq num active? */
1331 #define	TCP_FSS_VALID	0x2	/* Is the tcp_fss seq num active? */
1332 #define	TCP_URG_VALID	0x4	/* If the tcp_urg seq num active? */
1333 
1334 	int32_t	tcp_xmit_hiwater;	/* Send buffer high water mark. */
1335 
1336 	union {				/* template ip header */
1337 		ipha_t	tcp_u_ipha;
1338 		char	tcp_u_buf[IP_SIMPLE_HDR_LENGTH+TCP_MIN_HEADER_LENGTH];
1339 		double	tcp_u_aligner;
1340 	} tcp_u;
1341 #define	tcp_ipha	tcp_u.tcp_u_ipha
1342 #define	tcp_iphc	tcp_u.tcp_u_buf
1343 
1344 	uint32_t tcp_sum;		/* checksum to compensate for source */
1345 					/* routed packets. Host byte order */
1346 
1347 	uint16_t tcp_last_sent_len;	/* Record length for nagle */
1348 	uint16_t tcp_dupack_cnt;	/* # of consequtive duplicate acks */
1349 
1350 	uint32_t tcp_rnxt;		/* Seq we expect to recv next */
1351 	uint32_t tcp_rwnd;		/* Current receive window */
1352 	uint32_t tcp_rwnd_max;		/* Maximum receive window */
1353 
1354 	mblk_t	*tcp_rcv_head;		/* Queued until push, urgent data or */
1355 	mblk_t	*tcp_rcv_tail;		/* the count exceeds */
1356 	uint32_t tcp_rcv_cnt;		/* tcp_rcv_push_wait. */
1357 
1358 	mblk_t	*tcp_reass_head;	/* Out of order reassembly list head */
1359 	mblk_t	*tcp_reass_tail;	/* Out of order reassembly list tail */
1360 
1361 	uint32_t tcp_cwnd_ssthresh;	/* Congestion window */
1362 	uint32_t tcp_cwnd_max;
1363 	uint32_t tcp_csuna;		/* Clear (no rexmits in window) suna */
1364 
1365 	int	tcp_rttv_updates;
1366 	clock_t	tcp_rto;		/* Round trip timeout */
1367 	clock_t	tcp_rtt_sa;		/* Round trip smoothed average */
1368 	clock_t	tcp_rtt_sd;		/* Round trip smoothed deviation */
1369 	clock_t	tcp_rtt_update;		/* Round trip update(s) */
1370 	clock_t tcp_ms_we_have_waited;	/* Total retrans time */
1371 
1372 	uint32_t tcp_swl1;		/* These help us avoid using stale */
1373 	uint32_t tcp_swl2;		/*  packets to update state */
1374 
1375 	mblk_t	*tcp_xmit_head;		/* Head of rexmit list */
1376 	mblk_t	*tcp_xmit_last;		/* last valid data seen by tcp_wput */
1377 	uint32_t tcp_unsent;		/* # of bytes in hand that are unsent */
1378 	mblk_t	*tcp_xmit_tail;		/* Last rexmit data sent */
1379 	uint32_t tcp_xmit_tail_unsent;	/* # of unsent bytes in xmit_tail */
1380 
1381 	uint32_t tcp_snxt;		/* Senders next seq num */
1382 	uint32_t tcp_suna;		/* Sender unacknowledged */
1383 	uint32_t tcp_rexmit_nxt;	/* Next rexmit seq num */
1384 	uint32_t tcp_rexmit_max;	/* Max retran seq num */
1385 	int32_t	tcp_snd_burst;		/* Send burst factor */
1386 	uint32_t tcp_swnd;		/* Senders window (relative to suna) */
1387 	uint32_t tcp_cwnd;		/* Congestion window */
1388 	int32_t tcp_cwnd_cnt;		/* cwnd cnt in congestion avoidance */
1389 	uint32_t tcp_ackonly;		/* Senders last ack seq num */
1390 
1391 	uint32_t tcp_irs;		/* Initial recv seq num */
1392 	uint32_t tcp_iss;		/* Initial send seq num */
1393 	uint32_t tcp_fss;		/* Final/fin send seq num */
1394 	uint32_t tcp_urg;		/* Urgent data seq num */
1395 
1396 	uint32_t tcp_rack;		/* Seq # we have acked */
1397 	uint32_t tcp_rack_cnt;		/* # of bytes we have deferred ack */
1398 
1399 	uint32_t tcp_max_swnd;		/* Maximum swnd we have seen */
1400 	int64_t	tcp_rexmit_fire_time;
1401 	int64_t	tcp_dack_fire_time;
1402 	int64_t tcp_ka_fire_time;
1403 	int64_t	tcp_http_ka_fire_time;
1404 
1405 	int32_t	tcp_keepalive_intrvl;	/* Zero means don't bother */
1406 	int32_t	tcp_ka_probe_sent;
1407 	int32_t tcp_ka_last_intrvl;
1408 
1409 #define	TCP_DACK_TIMER		0x1
1410 #define	TCP_REXMIT_TIMER	0x2
1411 #define	TCP_KA_TIMER		0x4
1412 #define	TCP_HTTP_KA_TIMER	0x8
1413 	int16_t		tcp_running_timer;
1414 	int16_t		tcp_pending_timer;
1415 
1416 #ifdef	CONNP_T_TRACE_ON
1417 	connp_t *pkt_tp;		/* Packet tracing pointer */
1418 	connp_t	pkt_tv[CONNP_TV_SZ];	/* Packet tracing vector */
1419 #endif	/* CONNP_T_TRACE_ON */
1420 
1421 } nca_conn_t;
1422 
1423 /*
1424  * Active stack support parameters to control what ports NCA can use.
1425  * They are declared in ncaproto.c
1426  */
1427 extern struct nca_tbf_s *nca_tcp_port;
1428 extern in_port_t tcp_lo_port;
1429 extern in_port_t tcp_hi_port;
1430 
1431 /*
1432  * nca_conn_t.http_persist values and corresponding HTTP header strings are
1433  * used to determine the connection persistent state of a connection and
1434  * any HTTP header which needs to be sent.
1435  */
1436 
1437 #define	PERSIST_NONE		0	/* Not persistent */
1438 
1439 #define	PERSIST_CLOSE		1	/* Was persistent, send close header */
1440 #define	PERSIST_TRUE		2	/* Connection is HTTP persistent */
1441 #define	PERSIST_KA		3	/* Persistent, send Keep-Alive header */
1442 #define	PERSIST_UPCALL		4	/* Insert "Connection: close" on */
1443 					/* upcall and clear flag */
1444 
1445 #define	PERSIST_HDR_NONE	"\r\n"
1446 #define	PERSIST_HDR_CLOSE	"Connection: close\r\n\r\n"
1447 #define	PERSIST_HDR_KA		"Connection: Keep-Alive\r\n\r\n"
1448 
1449 /*
1450  * nca_conn_t nca_squeue_ctl() flag values:
1451  */
1452 
1453 #define	CONN_MISS_DONE		0x0001	/* The conn miss processing is done */
1454 #define	IF_TIME_WAIT		0x0002	/* A TIME_WAIT has fired */
1455 #define	IF_TCP_TIMER		0x0003	/* A TCP TIMER has fired */
1456 #define	NCA_CONN_TCP_TIMER	0x0004	/* A TCP TIMER needs to be execed */
1457 #define	IF_TCP_CONNECT		0x0005	/* TCP connection request */
1458 #define	IF_TCP_SEND		0x0006	/* A new send request. */
1459 
1460 #define	IF_TCP_DIRECT_TO	0x0010	/* A TCP direct i/o, step 1 */
1461 #define	IF_TCP_DIRECT_FROM	0x0012	/* A TCP direct i/o, step 2 */
1462 #define	IF_TCP_DIRECT_TEE	0x0001	/* If a tee else a splice */
1463 #define	IF_TCP_DIRECT_CLOSE	0x001F	/* A TCP direct i/o close */
1464 
1465 #define	NCA_CONN_T_STK_DEPTH	7	/* max stack backtrace depth */
1466 
1467 struct conn_ts {
1468 	nca_conn_t	*conn;
1469 	unsigned action;
1470 	int	ref;
1471 	int	cpu;
1472 	pc_t	stk[NCA_CONN_T_STK_DEPTH + 1];
1473 };
1474 
1475 #undef	NCA_CONN_T_TRACE_ON
1476 
1477 #ifdef	NCA_CONN_T_TRACE_ON
1478 
1479 /*
1480  * adb:
1481  * 32 bit
1482  *	*conn_tp,0t4096-(((*conn_tp)-con_tv)%0t48)/PXDDnPnPnPnPnPnPnPnPnn
1483  *	con_tv,((*conn_tp)-con_tv)%0t48/PXDDnPnPnPnPnPnPnPnPnn
1484  * 64 bit
1485  *	*conn_tp,0t4096-(((*conn_tp)-con_tv)%0t56)/PXDDnXnXnXnXnXnXnXnXnn
1486  *	con_tv,((*conn_tp)-con_tv)%0t56/PXDDnXnXnXnXnXnXnXnXnn
1487  */
1488 
1489 #define	NCA_CONN_T_REFINIT	0x10000000	/* CONN_REF init() |ref value */
1490 #define	NCA_CONN_T_REFINIT1	0x11000000	/* CONN_REF init() |ref value */
1491 #define	NCA_CONN_T_REFINIT2	0x12000000	/* CONN_REF init() |ref value */
1492 #define	NCA_CONN_T_REFNOTCP	0x13000000 /* CONN_REF no longer tcp_refed */
1493 #define	NCA_CONN_T_REFHOLD	0x1A000000	/* CONN_REFHOLD() | ref value */
1494 #define	NCA_CONN_T_REFRELE	0x1F000000	/* CONN_REFRELE() | ref value */
1495 
1496 #define	NCA_CONN_T_HTTPCALL	0x20000000	/* call http() | rbytes */
1497 #define	NCA_CONN_T_HTTPRET1	0x21000000	/* return http() */
1498 #define	NCA_CONN_T_HTTPRET2	0x22000000	/* return ! http() */
1499 
1500 #define	NCA_CONN_T_MISSDONE	0x30000000	/* CONN_MISS_DONE */
1501 #define	NCA_CONN_T_TCPTIMER	0x31000000	/* NCA_CONN_TCP_TIMER */
1502 #define	NCA_CONN_T_XMIT_END	0x32000000	/* xmit_end() | tcp_unsent */
1503 #define	NCA_CONN_T_XMIT_BAD	0x33000000 /* xmit_end() bad state |tcp_state */
1504 #define	NCA_CONN_T_XMIT_DEF	0x34000000	/* xmit_end() deferred */
1505 #define	NCA_CONN_T_TIME_WAIT 0x35000000	/* done: tcp_state == TCPS_TIME_WAIT */
1506 #define	NCA_CONN_T_PKT_IN	0x36000000	/* tcp_input() | flags */
1507 #define	NCA_CONN_T_PKT_OUT	0x37000000	/* tcp_input() | flags */
1508 
1509 #define	NCA_CONN_T_DIRECT	0x40000000	/* tcp_direct() from conn_t */
1510 #define	NCA_CONN_T_DIRECT1	0x41000000	/* tcp_direct() to conn_t */
1511 #define	NCA_CONN_T_DIRECT2	0x42000000	/* IF_TCP_DIRECT_TO | TEE */
1512 #define	NCA_CONN_T_DIRECT3	0x43000000	/* IF_TCP_DIRECT_FROM | TEE */
1513 #define	NCA_CONN_T_DIRECT4	0x44000000	/* tcp_close() */
1514 #define	NCA_CONN_T_DIRECT5	0x45000000	/* IF_TCP_DIRECT_CLOSE */
1515 						/* from|tcp_state */
1516 #define	NCA_CONN_T_DIRECT6	0x46000000	/* IF_TCP_DIRECT_CLOSE to */
1517 
1518 #if defined(__i386) || defined(__amd64)
1519 #define	NCA_CONN_T_TRACE_STK() {					\
1520 	_ix = getpcstack(&_p->stk[0], NCA_CONN_T_STK_DEPTH + 1);	\
1521 	if (_ix < NCA_CONN_T_STK_DEPTH + 1) {				\
1522 		_p->stk[_ix + 1] = 0;					\
1523 	}								\
1524 }
1525 #else
1526 #define	NCA_CONN_T_TRACE_STK() {					\
1527 	_p->stk[0] = (pc_t)callee();					\
1528 	_ix = getpcstack(&_p->stk[1], NCA_CONN_T_STK_DEPTH);		\
1529 	if (_ix < NCA_CONN_T_STK_DEPTH) {				\
1530 		_p->stk[_ix + 1] = 0;					\
1531 	}								\
1532 }
1533 #endif
1534 
1535 #define	CON_TV_SZ 4096
1536 
1537 extern struct conn_ts con_tv[CON_TV_SZ];
1538 extern struct conn_ts *conn_tp;
1539 
1540 #define	NCA_CONN_T_TRACE(p, a) {					\
1541 	struct conn_ts *_p;						\
1542 	struct conn_ts *_np;						\
1543 	int    _ix;							\
1544 									\
1545 	do {								\
1546 		_p = conn_tp;					\
1547 		if ((_np = _p + 1) == &con_tv[CON_TV_SZ])	\
1548 			_np = con_tv;				\
1549 	} while (casptr(&conn_tp, _p, _np) != _p);			\
1550 	_p->conn = (p);							\
1551 	_p->action = (a);						\
1552 	_p->ref = (p)->ref;						\
1553 	_p->cpu = CPU->cpu_seqid;					\
1554 	NCA_CONN_T_TRACE_STK();						\
1555 }
1556 
1557 #else	/* NCA_CONN_T_TRACE_ON */
1558 
1559 #define	NCA_CONN_T_TRACE(p, a)
1560 
1561 #endif	/* NCA_CONN_T_TRACE_ON */
1562 
1563 
1564 #define	CONN_REFHOLD(connp) {						\
1565 									\
1566 	NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFHOLD | ((connp)->ref + 1)); \
1567 									\
1568 	if ((connp)->ref <= 0)						\
1569 		panic("nca CONN_REFHOLD: %p has no references",		\
1570 		    (void *)(connp));					\
1571 	(connp)->ref++;							\
1572 }
1573 
1574 #define	CONN_REFRELE(connp) {						\
1575 									\
1576 	NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFRELE | ((connp)->ref - 1)); \
1577 									\
1578 	if ((connp)->tcp_refed) {					\
1579 		if ((connp)->ref == 1)					\
1580 			panic("nca CONN_REFRELE: %p "			\
1581 			    "has only tcp_refed reference",		\
1582 			    (void *)(connp));				\
1583 		if ((connp)->ref < 1)					\
1584 			panic("nca CONN_REFRELE: %p has no references",	\
1585 			    (void *)(connp));				\
1586 	} else {							\
1587 		if ((connp)->ref <= 0)					\
1588 			panic("nca CONN_REFRELE: %p has no references",	\
1589 			    (void *)(connp));				\
1590 	}								\
1591 	(connp)->ref--;							\
1592 	if ((connp)->ref == 0) {					\
1593 		/* Last ref of a nca_conn_t, so free it */		\
1594 		kmutex_t *lock = &(connp)->hashfanout->lock;		\
1595 		mutex_enter(lock);					\
1596 		nca_conn_free(connp);					\
1597 		/* Note: nca_conn_free exits lock */			\
1598 	}								\
1599 }
1600 
1601 /*
1602  * The nca_io2_shadow_t is used by the kernel to contian a copy of a user-
1603  * land nca_io2_t and the the user-land nca_io2_t address and size.
1604  */
1605 
1606 typedef struct nca_io2_shadow_s {
1607 	nca_io2_t	io;		/* copy of user-land nca_io2_t */
1608 	void		*data_ptr;	/* copy of door_arg_t.data_ptr */
1609 	size_t		data_size;	/* copy of door_arg_t.data_size */
1610 } nca_io2_shadow_t;
1611 
1612 #define	SHADOW_NONE	0x00		/* nca_io2_t.shadow NONE */
1613 #define	SHADOW_DOORSRV	0x01		/* nca_io2_t.shadow door_srv() */
1614 #define	SHADOW_NCAFS	0x02		/* nca_io2_t.shadow NCAfs */
1615 
1616 
1617 /*
1618  * Given a ptr to a nca_io2_t, a field and the field_length, write data
1619  * into buffer (Note: word aligned offsets).
1620  */
1621 #define	NCA_IO_WDATA(val, vsize, p, n_used, len, off)		\
1622 	/*CONSTCOND*/						\
1623 	if ((val) == NULL) {					\
1624 		(p)->len = vsize;				\
1625 		(p)->off = 0;					\
1626 	} else {						\
1627 		(p)->len = (vsize);				\
1628 		(p)->off = ((n_used) + sizeof (uint32_t) - 1) &	\
1629 				(~(sizeof (uint32_t) - 1));	\
1630 		bcopy((char *)(val),				\
1631 		    ((char *)(p) + (p)->off), (vsize));		\
1632 		(n_used) = (p)->off + (p)->len;			\
1633 	}
1634 
1635 /*
1636  * Given a ptr to an nca_io2_t, a field length member name, append data to
1637  * it in the buffer. Note: must be the last field a WDATA() was done for.
1638  *
1639  * Note: a NULL NCA_IO_WDATA() can be followed by a NCA_IO_ADATA() only if
1640  *		vsize was == -1.
1641  *
1642  */
1643 #define	NCA_IO_ADATA(val, vsize, p, n_used, len, off)		\
1644 	if ((p)->len == -1) {					\
1645 		(p)->len = 0;					\
1646 		(p)->off = ((n_used) + sizeof (uint32_t) - 1) &	\
1647 		(~(sizeof (uint32_t) - 1));			\
1648 	}							\
1649 	bcopy((char *)(val), ((char *)(p) + \
1650 	    (p)->off + (p)->len), (vsize));			\
1651 	(p)->len += (vsize);					\
1652 	(n_used) += (vsize);
1653 
1654 /*
1655  * Given a ptr to a nca_io2_t and a field construct a pointer.
1656  */
1657 #define	NCA_IO_PDATA(p, off) ((char *)(p) + (p)->off)
1658 
1659 
1660 #ifndef	isdigit
1661 #define	isdigit(c) ((c) >= '0' && (c) <= '9')
1662 #endif
1663 
1664 #ifndef	tolower
1665 #define	tolower(c) ((c) >= 'A' && (c) <= 'Z' ? (c) | 0x20 : (c))
1666 #endif
1667 
1668 #ifndef	isalpha
1669 #define	isalpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
1670 #endif
1671 
1672 #ifndef	isspace
1673 #define	isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
1674 		    (c) == '\r' || (c) == '\f' || (c) == '\013')
1675 #endif
1676 
1677 extern char *strnchr(const char *, int, size_t);
1678 extern char *strnstr(const char *, const char *, size_t);
1679 extern char *strncasestr(const char *, const char *, size_t);
1680 extern char *strrncasestr(const char *, const char *, size_t);
1681 extern int atoin(const char *, size_t);
1682 extern int digits(int);
1683 
1684 extern void nca_conn_free(nca_conn_t *);
1685 extern void nca_logit_off(void);
1686 extern void node_fr(node_t *);
1687 
1688 extern nca_squeue_t *nca_squeue_init(nca_squeue_t *, uint32_t,
1689     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
1690 extern void nca_squeue_fini(nca_squeue_t *);
1691 extern void nca_squeue_enter(nca_squeue_t *, mblk_t *, void *);
1692 extern void nca_squeue_fill(nca_squeue_t *, mblk_t *, void *);
1693 extern mblk_t *nca_squeue_remove(nca_squeue_t *);
1694 extern void nca_squeue_worker(nca_squeue_t *);
1695 extern mblk_t *nca_squeue_ctl(mblk_t *, void *, unsigned short);
1696 extern void nca_squeue_signal(nca_squeue_t *);
1697 extern void nca_squeue_exit(nca_squeue_t *);
1698 extern void sqfan_init(sqfan_t *, uint32_t, uint32_t, uint32_t);
1699 extern nca_squeue_t *sqfan_ixinit(sqfan_t *, uint32_t, nca_squeue_t *, uint32_t,
1700     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
1701 extern void sqfan_fini(sqfan_t *);
1702 extern void sqfan_fill(sqfan_t *, mblk_t *, void *);
1703 extern mblk_t *sqfan_remove(sqfan_t *);
1704 extern void nca_squeue_nointr(nca_squeue_t *, mblk_t *, void *, int);
1705 extern void nca_squeue_pause(nca_squeue_t *, mblk_t *, void *, int, boolean_t);
1706 extern void nca_squeue_willproxy(nca_squeue_t *);
1707 extern void nca_squeue_proxy(nca_squeue_t *, nca_squeue_t *);
1708 extern void nca_squeue_bind(nca_squeue_t *, uint32_t, processorid_t);
1709 
1710 extern int nca_tcp_clean_death(nca_conn_t *, int);
1711 extern nca_conn_t *nca_tcp_connect(ipaddr_t, in_port_t, boolean_t);
1712 extern void nca_tcp_send(nca_conn_t *, mblk_t *);
1713 extern void nca_tcp_direct(nca_conn_t *, nca_conn_t *, uint32_t);
1714 
1715 /* Functions prototypes from ncadoorsrv.c */
1716 extern node_t *nca_node_flush(node_t *);
1717 extern void nca_downcall_service(void *, door_arg_t *, void (**)(void *,
1718     void *), void **, int *);
1719 extern node_t *ctag_lookup(uint64_t, unsigned *);
1720 extern node_t *node_replace(node_t *, nca_conn_t *);
1721 extern node_t *node_temp(node_t *, nca_conn_t *);
1722 extern void find_ctags(node_t *, nca_io2_t *, int *);
1723 extern void nca_ncafs_srv(nca_io2_t *, struct uio *, queue_t *);
1724 extern boolean_t nca_reclaim_vlru(void);
1725 extern boolean_t nca_reclaim_plru(boolean_t, boolean_t);
1726 
1727 /*
1728  * We want to use the tcp_mib in NCA. The ip_mib is already made extern
1729  * in ip.h so we don't need to declare it here.
1730  */
1731 extern mib2_tcp_t tcp_mib;
1732 
1733 /*
1734  * NCA_COUNTER() is used to add a signed long value to a unsigned long
1735  * counter, in general these counters are used to maintain NCA state.
1736  *
1737  * NCA_DEBUG_COUNTER() is used like NCA_COUNTER() but for counters used
1738  * to maintain additional debug state, by default these counters aren't
1739  * updated unless the global value nca_debug_counter is set to a value
1740  * other then zero.
1741  *
1742  * Also, if NCA_COUNTER_TRACE is defined a time ordered wrapping trace
1743  * buffer is maintained with hrtime_t stamps, counter address, value to
1744  * add, and new value entries for all NCA_COUNTER() and NCA_DEBUG_COUNTER()
1745  * use.
1746  */
1747 
1748 #undef	NCA_COUNTER_TRACE
1749 
1750 #ifdef	NCA_COUNTER_TRACE
1751 
1752 #define	NCA_COUNTER_TRACE_SZ	1024
1753 
1754 typedef struct nca_counter_s {
1755 	hrtime_t	t;
1756 	unsigned long	*p;
1757 	unsigned long	v;
1758 	unsigned long	nv;
1759 } nca_counter_t;
1760 
1761 extern nca_counter_t nca_counter_tv[];
1762 extern nca_counter_t *nca_counter_tp;
1763 
1764 #define	NCA_COUNTER(_p, _v) {						\
1765 	unsigned long	*p = _p;					\
1766 	long		v = _v;						\
1767 	unsigned long	_nv;						\
1768 	nca_counter_t	*_otp;						\
1769 	nca_counter_t	*_ntp;						\
1770 									\
1771 	_nv = atomic_add_long_nv(p, v);					\
1772 	do {								\
1773 		_otp = nca_counter_tp;					\
1774 		_ntp = _otp + 1;					\
1775 		if (_ntp == &nca_counter_tv[NCA_COUNTER_TRACE_SZ])	\
1776 			_ntp = nca_counter_tv;				\
1777 	} while (casptr((void *)&nca_counter_tp, (void *)_otp,		\
1778 	    (void *)_ntp) != (void *)_otp);				\
1779 	_ntp->t = gethrtime();						\
1780 	_ntp->p = p;							\
1781 	_ntp->v = v;							\
1782 	_ntp->nv = _nv;							\
1783 }
1784 
1785 #else	/* NCA_COUNTER_TRACE */
1786 
1787 #define	NCA_COUNTER(p, v) atomic_add_long((p), (v))
1788 
1789 #endif	/* NCA_COUNTER_TRACE */
1790 
1791 
1792 /*
1793  * This is the buf used in upcall to httpd.
1794  */
1795 typedef struct {
1796 	uintptr_t	tid;
1797 	char		*buf;
1798 } http_buf_table_t;
1799 
1800 /*
1801  * URI and filename hash, a simple static hash bucket array of singly
1802  * linked grounded lists is used with a hashing algorithm which has
1803  * proven to have good distribution properities for strings of ...
1804  *
1805  * Note: NCA_HASH_SZ must be a prime number.
1806  */
1807 
1808 #define	NCA_HASH_SZ	8053
1809 #define	NCA_HASH_MASK	0xFFFFFF
1810 #define	HASH_IX(s, l, hix, hsz) { \
1811 	char *cp = (s); \
1812 	int len = (l); \
1813 			\
1814 	(hix) = 0; \
1815 	while (len-- > 0) { \
1816 		(hix) = (hix) * 33 + *cp++; \
1817 		(hix) &= NCA_HASH_MASK; \
1818 	} \
1819 	(hix) %= (hsz); \
1820 }
1821 
1822 /*
1823  * CTAG hash.
1824  */
1825 #define	NCA_CTAGHASH_SZ	4096
1826 #define	CTAGHASH_IX(t, ix) ((ix) = (t) % NCA_CTAGHASH_SZ)
1827 
1828 /*
1829  * VNODE hash.
1830  *
1831  * Note: NCA_VNODEHASH_SZ must be a P2Ps() value.
1832  */
1833 #define	NCA_VNODEHASH_SZ 12281
1834 #define	VNODEHASH_IX(p, ix) ((ix) = (((uintptr_t)p >> 27) ^ \
1835 	((uintptr_t)p >> 17) ^ ((uintptr_t)p >> 11) ^ (uintptr_t)p) % \
1836 	ncavnodehash_sz)
1837 
1838 extern pgcnt_t nca_ppmax;
1839 extern pgcnt_t nca_vpmax;
1840 extern pgcnt_t nca_pplim;
1841 extern pgcnt_t nca_vplim;
1842 extern pgcnt_t nca_ppmem;
1843 extern pgcnt_t nca_vpmem;
1844 extern ssize_t nca_kbmem;
1845 extern ssize_t nca_spmem;
1846 extern ssize_t nca_ckmem;
1847 extern ssize_t nca_mbmem;
1848 extern ssize_t nca_cbmem;
1849 extern ssize_t nca_lbmem;
1850 extern size_t  nca_maxkmem;
1851 extern uint32_t nca_use_segmap;
1852 
1853 extern ulong_t nca_hits;
1854 extern ulong_t nca_file;
1855 extern ulong_t nca_ctag;
1856 extern ulong_t nca_miss;
1857 
1858 extern ulong_t nca_hit304;
1859 extern ulong_t nca_hitnoV;
1860 extern ulong_t nca_hitnoVfast;
1861 extern ulong_t nca_hitnoVtemp;
1862 
1863 extern ulong_t nca_filehits;
1864 extern ulong_t nca_filenoV;
1865 extern ulong_t nca_filenoVfast;
1866 extern ulong_t nca_filemiss;
1867 
1868 extern ulong_t nca_missURI;
1869 extern ulong_t nca_missQ;
1870 extern ulong_t nca_missSAFE;
1871 extern ulong_t nca_missnoV;
1872 extern ulong_t nca_missnotcp;
1873 extern ulong_t nca_missfail;
1874 extern ulong_t nca_misstemp;
1875 extern ulong_t nca_missnohash;
1876 extern ulong_t nca_missclean;
1877 extern ulong_t nca_missadvisory;
1878 extern ulong_t nca_missadvNoA;
1879 extern ulong_t nca_missERROR;
1880 
1881 extern ulong_t nca_ERROR;
1882 extern ulong_t nca_flushnode;
1883 extern ulong_t nca_replacenode;
1884 extern ulong_t nca_tempnode;
1885 
1886 extern ulong_t nca_fail304;
1887 
1888 extern ulong_t nca_nocache1;
1889 extern ulong_t nca_nocache2;
1890 extern ulong_t nca_nocache3;
1891 extern ulong_t nca_nocache4;
1892 extern ulong_t nca_nocache5;
1893 extern ulong_t nca_nocache6;
1894 extern ulong_t nca_nocache6nomp;
1895 extern ulong_t nca_nocache7;
1896 extern ulong_t nca_nocache8;
1897 extern ulong_t nca_nocache9;
1898 extern ulong_t nca_nocache10;
1899 extern ulong_t nca_nocache11;
1900 extern ulong_t nca_nocache12;
1901 extern ulong_t nca_nocache13;
1902 extern ulong_t nca_nocache14;
1903 extern ulong_t nca_nocache15;
1904 extern ulong_t nca_nodes;
1905 extern ulong_t nca_desballoc;
1906 
1907 extern ulong_t nca_plrucnt;
1908 extern ulong_t nca_vlrucnt;
1909 extern ulong_t nca_rpcall;
1910 extern ulong_t nca_rvcall;
1911 extern ulong_t nca_rpbusy;
1912 extern ulong_t nca_rvbusy;
1913 extern ulong_t nca_rpfail;
1914 extern ulong_t nca_rpempty;
1915 extern ulong_t nca_rvempty;
1916 extern ulong_t nca_rpdone;
1917 extern ulong_t nca_rvdone;
1918 extern ulong_t nca_rmdone;
1919 extern ulong_t nca_rkdone;
1920 extern ulong_t nca_rsdone;
1921 extern ulong_t nca_rndone;
1922 extern ulong_t nca_rpnone;
1923 extern ulong_t nca_rvnone;
1924 extern ulong_t nca_rmnone;
1925 extern ulong_t nca_rknone;
1926 extern ulong_t nca_rsnone;
1927 extern ulong_t nca_rnh;
1928 extern ulong_t nca_ref[];
1929 extern ulong_t nca_vmap_rpcall;
1930 
1931 extern ulong_t nca_node_kmem_fail1;
1932 extern ulong_t nca_node_kmem_fail2;
1933 
1934 extern ulong_t doorsrv_nopreempt;
1935 extern ulong_t doorsrv_badconnect;
1936 extern ulong_t doorsrv_invaladvise;
1937 extern ulong_t doorsrv_notupcall;
1938 extern ulong_t doorsrv_badadvise;
1939 extern ulong_t doorsrv_cksum;
1940 extern ulong_t doorsrv_error;
1941 extern ulong_t doorsrv_op;
1942 extern ulong_t doorsrv_badtee;
1943 extern ulong_t doorsrv_badio;
1944 extern ulong_t doorsrv_sz;
1945 
1946 extern ulong_t nca_allocfail;
1947 extern ulong_t nca_mapinfail;
1948 extern ulong_t nca_mapinfail1;
1949 extern ulong_t nca_mapinfail2;
1950 extern ulong_t nca_mapinfail3;
1951 
1952 extern ulong_t nca_httpd_http;
1953 extern ulong_t nca_httpd_badsz;
1954 extern ulong_t nca_httpd_nosz;
1955 extern ulong_t nca_httpd_filename;
1956 extern ulong_t nca_httpd_filename1;
1957 extern ulong_t nca_httpd_filename2;
1958 extern ulong_t nca_httpd_trailer;
1959 extern ulong_t nca_httpd_preempt;
1960 extern ulong_t nca_httpd_downcall;
1961 extern ulong_t nca_early_downcall;
1962 extern ulong_t nca_httpd_more;
1963 
1964 ulong_t nca_logit_noupcall;
1965 
1966 ulong_t nca_logit;
1967 ulong_t nca_logit_nomp;
1968 ulong_t nca_logit_no;
1969 ulong_t nca_logit_NULL;
1970 ulong_t nca_logit_fail;
1971 
1972 ulong_t nca_logit_flush_NULL1;
1973 ulong_t nca_logit_flush_NULL2;
1974 
1975 ulong_t nca_logger_NULL1;
1976 ulong_t nca_logger_NULL2;
1977 
1978 ulong_t nca_log_buf_alloc_NULL;
1979 ulong_t nca_log_buf_alloc_fail;
1980 ulong_t nca_log_buf_alloc_part;
1981 
1982 ulong_t nca_log_buf_dup;
1983 
1984 extern ulong_t nca_upcalls;
1985 extern ulong_t nca_ncafs_upcalls;
1986 
1987 extern ulong_t nca_conn_count;
1988 extern ulong_t nca_conn_kmem;
1989 extern ulong_t nca_conn_kmem_fail;
1990 extern ulong_t nca_conn_allocb_fail;
1991 extern ulong_t nca_conn_tw;
1992 extern ulong_t nca_conn_tw1;
1993 extern ulong_t nca_conn_tw2;
1994 extern ulong_t nca_conn_reinit_cnt;
1995 extern ulong_t nca_conn_NULL1;
1996 extern ulong_t nca_conn_Q0;
1997 extern ulong_t nca_conn_FLAGS;
1998 
1999 extern ulong_t tcpwronginq;
2000 extern ulong_t ipsendup;
2001 extern ulong_t ipwrongcpu;
2002 extern ulong_t iponcpu;
2003 
2004 extern ulong_t nca_tcp_xmit_null;
2005 extern ulong_t nca_tcp_xmit_null1;
2006 
2007 extern ulong_t tw_on;
2008 extern ulong_t tw_fire;
2009 extern ulong_t tw_fire1;
2010 extern ulong_t tw_fire2;
2011 extern ulong_t tw_fire3;
2012 extern ulong_t tw_add;
2013 extern ulong_t tw_add1;
2014 extern ulong_t tw_delete;
2015 extern ulong_t tw_reclaim;
2016 extern ulong_t tw_reap;
2017 extern ulong_t tw_reap1;
2018 extern ulong_t tw_reap2;
2019 extern ulong_t tw_reap3;
2020 extern ulong_t tw_reap4;
2021 extern ulong_t tw_reap5;
2022 extern ulong_t tw_timer;
2023 extern ulong_t tw_timer1;
2024 extern ulong_t tw_timer2;
2025 extern ulong_t tw_timer3;
2026 extern ulong_t tw_timer4;
2027 extern ulong_t tw_timer5;
2028 
2029 extern ulong_t ti_on;
2030 extern ulong_t ti_fire;
2031 extern ulong_t ti_fire1;
2032 extern ulong_t ti_fire2;
2033 extern ulong_t ti_fire3;
2034 extern ulong_t ti_fire4;
2035 extern ulong_t ti_add;
2036 extern ulong_t ti_add1;
2037 extern ulong_t ti_add2;
2038 extern ulong_t ti_add3;
2039 extern ulong_t ti_add4;
2040 extern ulong_t ti_add5;
2041 extern ulong_t ti_add_reuse;
2042 extern ulong_t ti_delete;
2043 extern ulong_t ti_delete1;
2044 extern ulong_t ti_delete2;
2045 extern ulong_t ti_reap;
2046 extern ulong_t ti_reap1;
2047 extern ulong_t ti_reap2;
2048 extern ulong_t ti_reap3;
2049 extern ulong_t ti_reap4;
2050 extern ulong_t ti_reap5;
2051 extern ulong_t ti_timer;
2052 extern ulong_t ti_timer1;
2053 extern ulong_t ti_timer2;
2054 extern ulong_t ti_timer3;
2055 extern ulong_t ti_timer4;
2056 extern ulong_t ti_timer5;
2057 extern ulong_t ti_timer6;
2058 
2059 extern uint32_t nca_conn_q;
2060 extern uint32_t nca_conn_q0;
2061 extern uint32_t nca_conn_req_max_q;
2062 extern uint32_t nca_conn_req_max_q0;
2063 
2064 extern char nca_resp_500[];
2065 extern ssize_t nca_resp_500_sz;
2066 
2067 extern uint32_t ncaurihash_sz;
2068 extern uint32_t ncafilehash_sz;
2069 extern uint32_t ncactaghash_sz;
2070 extern uint32_t ncavnodehash_sz;
2071 extern nodef_t *ncaurihash;
2072 extern nodef_t *ncafilehash;
2073 extern nodef_t *ncavnodehash;
2074 extern nodef_t *ncactaghash;
2075 extern char nca_httpd_door_path[];
2076 extern char nca_httpd_downdoor_path[];
2077 extern door_handle_t nca_downcall_door_hand;
2078 extern uint32_t n_http_buf_size;
2079 extern door_handle_t nca_httpd_door_hand;
2080 extern sqfan_t nca_miss_fanout1;
2081 extern sqfan_t nca_miss_fanout2;
2082 extern nca_door_t nca_httpd_door;
2083 extern int nca_downdoor_created;
2084 extern int n_http_buf_table;
2085 extern http_buf_table_t *g_http_buf_table;
2086 extern struct kmem_cache *node_cache;
2087 #ifdef DEBUG
2088 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
2089 		    uint_t, const char *);
2090 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int, node_t *,
2091 		    const char *);
2092 #else
2093 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
2094 		    uint_t);
2095 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int,
2096     node_t *);
2097 #endif
2098 extern void nca_node_del(node_t *);
2099 extern void nca_node_uncache(node_t *);
2100 extern node_t *nca_node_add(char *, int, nodef_t *, int);
2101 extern node_t *node_create(int, boolean_t, char *, int);
2102 extern void nca_reclaim_phys(node_t *, boolean_t, boolean_t);
2103 extern boolean_t nca_http_pmap(node_t *);
2104 extern boolean_t nca_http_vmap(node_t *, int);
2105 extern time_t nca_http_date(char *);
2106 extern node_t *nca_httpd_data(node_t *, nca_conn_t *, nca_io2_t *, int);
2107 extern void nca_missed(node_t *, mblk_t *, nca_squeue_t *);
2108 extern void nca_miss_conn_mv(node_t *, nca_conn_t *);
2109 extern void nca_miss_conn_fr(node_t *, nca_conn_t *);
2110 extern void nca_http_logit(nca_conn_t *);
2111 extern void nca_http_error(nca_conn_t *);
2112 extern void nca_node_xmit(node_t *, nca_conn_t *);
2113 
2114 /*
2115  * It contains data for forwarding data to application programs.
2116  * For door case, doorhandle is the upcall door handle and listenerq
2117  * is NULL; for ncafs, listenerq is the upcall listener queue and
2118  * doorhandle is NULL. listenning is always B_TRUE for door and it is
2119  * B_TRUE for ncafs only after the listen system call has been issued.
2120  */
2121 typedef struct nca_listener_s {
2122 	boolean_t	listenning;	/* is ready for accepting connection */
2123 	door_handle_t	doorhandle;	/* door handle or NULL for ncafs */
2124 	queue_t		*listenerq;	/* upcall queue or NULL for door */
2125 } nca_listener_t;
2126 
2127 /*
2128  * Returned values of nca_isnca_data.
2129  * NOT_NCA_DATA:	not NCA data.
2130  * NCA_DATA_ANY_ADDR:	NCA data, matches INADDR_ANY.
2131  * NCA_DATA_ADDR:	NCA data, match an IP address.
2132  */
2133 #define	NOT_NCA_DATA		0
2134 #define	NCA_DATA_ANY_ADDR	1
2135 #define	NCA_DATA_ADDR		2
2136 
2137 extern uint32_t ipportrehashcount1;
2138 extern uint32_t ipportrehashcount2;
2139 extern uint32_t ipportbucketcnt;
2140 extern uint32_t ipporttablesize;
2141 extern uint32_t ncafscount;
2142 extern uint32_t doorcount;
2143 extern int	ip_virtual_hosting;
2144 
2145 extern nca_listener_t *nca_listener_find(ipaddr_t, uint16_t);
2146 extern nca_listener_t *nca_listener_find2(ipaddr_t, uint16_t);
2147 extern int		nca_isnca_data(ipaddr_t, uint16_t);
2148 extern int		nca_listener_add(ipaddr_t, uint16_t, void *, boolean_t);
2149 extern int		nca_listener_del(ipaddr_t, uint16_t);
2150 extern void		nca_listener_report(mblk_t *);
2151 
2152 #ifdef	__cplusplus
2153 }
2154 #endif
2155 
2156 #endif	/* _INET_NCA_H */
2157