xref: /illumos-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision 9b4e3ac25d882519cad3fc11f0c53b07f4e60536)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IP PACKET CLASSIFIER
28  *
29  * The IP packet classifier provides mapping between IP packets and persistent
30  * connection state for connection-oriented protocols. It also provides
31  * interface for managing connection states.
32  *
33  * The connection state is kept in conn_t data structure and contains, among
34  * other things:
35  *
36  *	o local/remote address and ports
37  *	o Transport protocol
38  *	o squeue for the connection (for TCP only)
39  *	o reference counter
40  *	o Connection state
41  *	o hash table linkage
42  *	o interface/ire information
43  *	o credentials
44  *	o ipsec policy
45  *	o send and receive functions.
46  *	o mutex lock.
47  *
48  * Connections use a reference counting scheme. They are freed when the
49  * reference counter drops to zero. A reference is incremented when connection
50  * is placed in a list or table, when incoming packet for the connection arrives
51  * and when connection is processed via squeue (squeue processing may be
52  * asynchronous and the reference protects the connection from being destroyed
53  * before its processing is finished).
54  *
55  * send and receive functions are currently used for TCP only. The send function
56  * determines the IP entry point for the packet once it leaves TCP to be sent to
57  * the destination address. The receive function is used by IP when the packet
58  * should be passed for TCP processing. When a new connection is created these
59  * are set to ip_output() and tcp_input() respectively. During the lifetime of
60  * the connection the send and receive functions may change depending on the
61  * changes in the connection state. For example, Once the connection is bound to
62  * an addresse, the receive function for this connection is set to
63  * tcp_conn_request().  This allows incoming SYNs to go directly into the
64  * listener SYN processing function without going to tcp_input() first.
65  *
66  * Classifier uses several hash tables:
67  *
68  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
69  *	ipcl_bind_fanout:	contains all connections in BOUND state
70  *	ipcl_proto_fanout:	IPv4 protocol fanout
71  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
72  *	ipcl_udp_fanout:	contains all UDP connections
73  *	ipcl_globalhash_fanout:	contains all connections
74  *
75  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
76  * which need to view all existing connections.
77  *
78  * All tables are protected by per-bucket locks. When both per-bucket lock and
79  * connection lock need to be held, the per-bucket lock should be acquired
80  * first, followed by the connection lock.
81  *
82  * All functions doing search in one of these tables increment a reference
83  * counter on the connection found (if any). This reference should be dropped
84  * when the caller has finished processing the connection.
85  *
86  *
87  * INTERFACES:
88  * ===========
89  *
90  * Connection Lookup:
91  * ------------------
92  *
93  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack)
94  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack)
95  *
96  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
97  * it can't find any associated connection. If the connection is found, its
98  * reference counter is incremented.
99  *
100  *	mp:	mblock, containing packet header. The full header should fit
101  *		into a single mblock. It should also contain at least full IP
102  *		and TCP or UDP header.
103  *
104  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
105  *
106  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
107  *		 the packet.
108  *
109  * 	zoneid: The zone in which the returned connection must be; the zoneid
110  *		corresponding to the ire_zoneid on the IRE located for the
111  *		packet's destination address.
112  *
113  *	For TCP connections, the lookup order is as follows:
114  *		5-tuple {src, dst, protocol, local port, remote port}
115  *			lookup in ipcl_conn_fanout table.
116  *		3-tuple {dst, remote port, protocol} lookup in
117  *			ipcl_bind_fanout table.
118  *
119  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
120  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
121  *	these interfaces do not handle cases where a packets belongs
122  *	to multiple UDP clients, which is handled in IP itself.
123  *
124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
125  * determine which actual zone gets the segment.  This is used only in a
126  * labeled environment.  The matching rules are:
127  *
128  *	- If it's not a multilevel port, then the label on the packet selects
129  *	  the zone.  Unlabeled packets are delivered to the global zone.
130  *
131  *	- If it's a multilevel port, then only the zone registered to receive
132  *	  packets on that port matches.
133  *
134  * Also, in a labeled environment, packet labels need to be checked.  For fully
135  * bound TCP connections, we can assume that the packet label was checked
136  * during connection establishment, and doesn't need to be checked on each
137  * packet.  For others, though, we need to check for strict equality or, for
138  * multilevel ports, membership in the range or set.  This part currently does
139  * a tnrh lookup on each packet, but could be optimized to use cached results
140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
141  * we would apply the same rules as TCP.)
142  *
143  * An implication of the above is that fully-bound TCP sockets must always use
144  * distinct 4-tuples; they can't be discriminated by label alone.
145  *
146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
147  * as there's no connection set-up handshake and no shared state.
148  *
149  * Labels on looped-back packets within a single zone do not need to be
150  * checked, as all processes in the same zone have the same label.
151  *
152  * Finally, for unlabeled packets received by a labeled system, special rules
153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
154  * socket in the zone whose label matches the default label of the sender, if
155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
156  * receiver's label must dominate the sender's default label.
157  *
158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack);
159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
160  *					 ip_stack);
161  *
162  *	Lookup routine to find a exact match for {src, dst, local port,
163  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
164  *	ports are read from the IP and TCP header respectively.
165  *
166  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
167  *					 zoneid, ip_stack);
168  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
169  *					 zoneid, ip_stack);
170  *
171  * 	Lookup routine to find a listener with the tuple {lport, laddr,
172  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
173  * 	parameter interface index is also compared.
174  *
175  * void ipcl_walk(func, arg, ip_stack)
176  *
177  * 	Apply 'func' to every connection available. The 'func' is called as
178  *	(*func)(connp, arg). The walk is non-atomic so connections may be
179  *	created and destroyed during the walk. The CONN_CONDEMNED and
180  *	CONN_INCIPIENT flags ensure that connections which are newly created
181  *	or being destroyed are not selected by the walker.
182  *
183  * Table Updates
184  * -------------
185  *
186  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
187  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
188  *
189  *	Insert 'connp' in the ipcl_conn_fanout.
190  *	Arguements :
191  *		connp		conn_t to be inserted
192  *		protocol	connection protocol
193  *		src		source address
194  *		dst		destination address
195  *		ports		local and remote port
196  *		ifindex		interface index for IPv6 connections
197  *
198  *	Return value :
199  *		0		if connp was inserted
200  *		EADDRINUSE	if the connection with the same tuple
201  *				already exists.
202  *
203  * int ipcl_bind_insert(connp, protocol, src, lport);
204  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
205  *
206  * 	Insert 'connp' in ipcl_bind_fanout.
207  * 	Arguements :
208  * 		connp		conn_t to be inserted
209  * 		protocol	connection protocol
210  * 		src		source address connection wants
211  * 				to bind to
212  * 		lport		local port connection wants to
213  * 				bind to
214  *
215  *
216  * void ipcl_hash_remove(connp);
217  *
218  * 	Removes the 'connp' from the connection fanout table.
219  *
220  * Connection Creation/Destruction
221  * -------------------------------
222  *
223  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
224  *
225  * 	Creates a new conn based on the type flag, inserts it into
226  * 	globalhash table.
227  *
228  *	type:	This flag determines the type of conn_t which needs to be
229  *		created i.e., which kmem_cache it comes from.
230  *		IPCL_TCPCONN	indicates a TCP connection
231  *		IPCL_SCTPCONN	indicates a SCTP connection
232  *		IPCL_UDPCONN	indicates a UDP conn_t.
233  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
234  *		IPCL_RTSCONN	indicates a RTS conn_t.
235  *		IPCL_IPCCONN	indicates all other connections.
236  *
237  * void ipcl_conn_destroy(connp)
238  *
239  * 	Destroys the connection state, removes it from the global
240  * 	connection hash table and frees its memory.
241  */
242 
243 #include <sys/types.h>
244 #include <sys/stream.h>
245 #include <sys/stropts.h>
246 #include <sys/sysmacros.h>
247 #include <sys/strsubr.h>
248 #include <sys/strsun.h>
249 #define	_SUN_TPI_VERSION 2
250 #include <sys/ddi.h>
251 #include <sys/cmn_err.h>
252 #include <sys/debug.h>
253 
254 #include <sys/systm.h>
255 #include <sys/param.h>
256 #include <sys/kmem.h>
257 #include <sys/isa_defs.h>
258 #include <inet/common.h>
259 #include <netinet/ip6.h>
260 #include <netinet/icmp6.h>
261 
262 #include <inet/ip.h>
263 #include <inet/ip6.h>
264 #include <inet/ip_ndp.h>
265 #include <inet/ip_impl.h>
266 #include <inet/udp_impl.h>
267 #include <inet/sctp_ip.h>
268 #include <inet/sctp/sctp_impl.h>
269 #include <inet/rawip_impl.h>
270 #include <inet/rts_impl.h>
271 
272 #include <sys/cpuvar.h>
273 
274 #include <inet/ipclassifier.h>
275 #include <inet/tcp.h>
276 #include <inet/ipsec_impl.h>
277 
278 #include <sys/tsol/tnet.h>
279 #include <sys/sockio.h>
280 
281 #ifdef DEBUG
282 #define	IPCL_DEBUG
283 #else
284 #undef	IPCL_DEBUG
285 #endif
286 
287 #ifdef	IPCL_DEBUG
288 int	ipcl_debug_level = 0;
289 #define	IPCL_DEBUG_LVL(level, args)	\
290 	if (ipcl_debug_level  & level) { printf args; }
291 #else
292 #define	IPCL_DEBUG_LVL(level, args) {; }
293 #endif
294 /* Old value for compatibility. Setable in /etc/system */
295 uint_t tcp_conn_hash_size = 0;
296 
297 /* New value. Zero means choose automatically.  Setable in /etc/system */
298 uint_t ipcl_conn_hash_size = 0;
299 uint_t ipcl_conn_hash_memfactor = 8192;
300 uint_t ipcl_conn_hash_maxsize = 82500;
301 
302 /* bind/udp fanout table size */
303 uint_t ipcl_bind_fanout_size = 512;
304 uint_t ipcl_udp_fanout_size = 16384;
305 
306 /* Raw socket fanout size.  Must be a power of 2. */
307 uint_t ipcl_raw_fanout_size = 256;
308 
309 /*
310  * Power of 2^N Primes useful for hashing for N of 0-28,
311  * these primes are the nearest prime <= 2^N - 2^(N-2).
312  */
313 
314 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
315 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
316 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
317 		50331599, 100663291, 201326557, 0}
318 
319 /*
320  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
321  * are aligned on cache lines.
322  */
323 typedef union itc_s {
324 	conn_t	itc_conn;
325 	char	itcu_filler[CACHE_ALIGN(conn_s)];
326 } itc_t;
327 
328 struct kmem_cache  *tcp_conn_cache;
329 struct kmem_cache  *ip_conn_cache;
330 struct kmem_cache  *ip_helper_stream_cache;
331 extern struct kmem_cache  *sctp_conn_cache;
332 extern struct kmem_cache  *tcp_sack_info_cache;
333 extern struct kmem_cache  *tcp_iphc_cache;
334 struct kmem_cache  *udp_conn_cache;
335 struct kmem_cache  *rawip_conn_cache;
336 struct kmem_cache  *rts_conn_cache;
337 
338 extern void	tcp_timermp_free(tcp_t *);
339 extern mblk_t	*tcp_timermp_alloc(int);
340 
341 static int	ip_conn_constructor(void *, void *, int);
342 static void	ip_conn_destructor(void *, void *);
343 
344 static int	tcp_conn_constructor(void *, void *, int);
345 static void	tcp_conn_destructor(void *, void *);
346 
347 static int	udp_conn_constructor(void *, void *, int);
348 static void	udp_conn_destructor(void *, void *);
349 
350 static int	rawip_conn_constructor(void *, void *, int);
351 static void	rawip_conn_destructor(void *, void *);
352 
353 static int	rts_conn_constructor(void *, void *, int);
354 static void	rts_conn_destructor(void *, void *);
355 
356 static int	ip_helper_stream_constructor(void *, void *, int);
357 static void	ip_helper_stream_destructor(void *, void *);
358 
359 boolean_t	ip_use_helper_cache = B_TRUE;
360 
361 #ifdef	IPCL_DEBUG
362 #define	INET_NTOA_BUFSIZE	18
363 
364 static char *
365 inet_ntoa_r(uint32_t in, char *b)
366 {
367 	unsigned char	*p;
368 
369 	p = (unsigned char *)&in;
370 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
371 	return (b);
372 }
373 #endif
374 
375 /*
376  * Global (for all stack instances) init routine
377  */
378 void
379 ipcl_g_init(void)
380 {
381 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
382 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
383 	    ip_conn_constructor, ip_conn_destructor,
384 	    NULL, NULL, NULL, 0);
385 
386 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
387 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
388 	    tcp_conn_constructor, tcp_conn_destructor,
389 	    NULL, NULL, NULL, 0);
390 
391 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
392 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
393 	    udp_conn_constructor, udp_conn_destructor,
394 	    NULL, NULL, NULL, 0);
395 
396 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
397 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
398 	    rawip_conn_constructor, rawip_conn_destructor,
399 	    NULL, NULL, NULL, 0);
400 
401 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
402 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
403 	    rts_conn_constructor, rts_conn_destructor,
404 	    NULL, NULL, NULL, 0);
405 
406 	if (ip_use_helper_cache) {
407 		ip_helper_stream_cache = kmem_cache_create
408 		    ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t),
409 		    CACHE_ALIGN_SIZE, ip_helper_stream_constructor,
410 		    ip_helper_stream_destructor, NULL, NULL, NULL, 0);
411 	} else {
412 		ip_helper_stream_cache = NULL;
413 	}
414 }
415 
416 /*
417  * ipclassifier intialization routine, sets up hash tables.
418  */
419 void
420 ipcl_init(ip_stack_t *ipst)
421 {
422 	int i;
423 	int sizes[] = P2Ps();
424 
425 	/*
426 	 * Calculate size of conn fanout table from /etc/system settings
427 	 */
428 	if (ipcl_conn_hash_size != 0) {
429 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
430 	} else if (tcp_conn_hash_size != 0) {
431 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
432 	} else {
433 		extern pgcnt_t freemem;
434 
435 		ipst->ips_ipcl_conn_fanout_size =
436 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
437 
438 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
439 			ipst->ips_ipcl_conn_fanout_size =
440 			    ipcl_conn_hash_maxsize;
441 		}
442 	}
443 
444 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
445 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
446 			break;
447 		}
448 	}
449 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
450 		/* Out of range, use the 2^16 value */
451 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
452 	}
453 
454 	/* Take values from /etc/system */
455 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
456 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
457 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
458 
459 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
460 
461 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
462 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
463 
464 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
465 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
466 		    MUTEX_DEFAULT, NULL);
467 	}
468 
469 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
470 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
471 
472 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
473 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
474 		    MUTEX_DEFAULT, NULL);
475 	}
476 
477 	ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX *
478 	    sizeof (connf_t), KM_SLEEP);
479 	for (i = 0; i < IPPROTO_MAX; i++) {
480 		mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL,
481 		    MUTEX_DEFAULT, NULL);
482 	}
483 
484 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
485 	    sizeof (connf_t), KM_SLEEP);
486 	for (i = 0; i < IPPROTO_MAX; i++) {
487 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
488 		    MUTEX_DEFAULT, NULL);
489 	}
490 
491 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
492 	mutex_init(&ipst->ips_rts_clients->connf_lock,
493 	    NULL, MUTEX_DEFAULT, NULL);
494 
495 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
496 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
497 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
498 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
499 		    MUTEX_DEFAULT, NULL);
500 	}
501 
502 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
503 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
504 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
505 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
506 		    MUTEX_DEFAULT, NULL);
507 	}
508 
509 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
510 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
511 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
512 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
513 		    NULL, MUTEX_DEFAULT, NULL);
514 	}
515 }
516 
517 void
518 ipcl_g_destroy(void)
519 {
520 	kmem_cache_destroy(ip_conn_cache);
521 	kmem_cache_destroy(tcp_conn_cache);
522 	kmem_cache_destroy(udp_conn_cache);
523 	kmem_cache_destroy(rawip_conn_cache);
524 	kmem_cache_destroy(rts_conn_cache);
525 }
526 
527 /*
528  * All user-level and kernel use of the stack must be gone
529  * by now.
530  */
531 void
532 ipcl_destroy(ip_stack_t *ipst)
533 {
534 	int i;
535 
536 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
537 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
538 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
539 	}
540 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
541 	    sizeof (connf_t));
542 	ipst->ips_ipcl_conn_fanout = NULL;
543 
544 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
545 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
546 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
547 	}
548 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
549 	    sizeof (connf_t));
550 	ipst->ips_ipcl_bind_fanout = NULL;
551 
552 	for (i = 0; i < IPPROTO_MAX; i++) {
553 		ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL);
554 		mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock);
555 	}
556 	kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t));
557 	ipst->ips_ipcl_proto_fanout = NULL;
558 
559 	for (i = 0; i < IPPROTO_MAX; i++) {
560 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
561 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
562 	}
563 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
564 	    IPPROTO_MAX * sizeof (connf_t));
565 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
566 
567 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
568 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
569 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
570 	}
571 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
572 	    sizeof (connf_t));
573 	ipst->ips_ipcl_udp_fanout = NULL;
574 
575 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
576 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
577 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
578 	}
579 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
580 	    sizeof (connf_t));
581 	ipst->ips_ipcl_raw_fanout = NULL;
582 
583 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
584 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
585 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
586 	}
587 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
588 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
589 	ipst->ips_ipcl_globalhash_fanout = NULL;
590 
591 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
592 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
593 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
594 	ipst->ips_rts_clients = NULL;
595 }
596 
597 /*
598  * conn creation routine. initialize the conn, sets the reference
599  * and inserts it in the global hash table.
600  */
601 conn_t *
602 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
603 {
604 	conn_t	*connp;
605 	sctp_stack_t *sctps;
606 	struct kmem_cache *conn_cache;
607 
608 	switch (type) {
609 	case IPCL_SCTPCONN:
610 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
611 			return (NULL);
612 		sctp_conn_init(connp);
613 		sctps = ns->netstack_sctp;
614 		SCTP_G_Q_REFHOLD(sctps);
615 		netstack_hold(ns);
616 		connp->conn_netstack = ns;
617 		return (connp);
618 
619 	case IPCL_TCPCONN:
620 		conn_cache = tcp_conn_cache;
621 		break;
622 
623 	case IPCL_UDPCONN:
624 		conn_cache = udp_conn_cache;
625 		break;
626 
627 	case IPCL_RAWIPCONN:
628 		conn_cache = rawip_conn_cache;
629 		break;
630 
631 	case IPCL_RTSCONN:
632 		conn_cache = rts_conn_cache;
633 		break;
634 
635 	case IPCL_IPCCONN:
636 		conn_cache = ip_conn_cache;
637 		break;
638 
639 	default:
640 		connp = NULL;
641 		ASSERT(0);
642 	}
643 
644 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
645 		return (NULL);
646 
647 	connp->conn_ref = 1;
648 	netstack_hold(ns);
649 	connp->conn_netstack = ns;
650 	ipcl_globalhash_insert(connp);
651 	return (connp);
652 }
653 
654 void
655 ipcl_conn_destroy(conn_t *connp)
656 {
657 	mblk_t	*mp;
658 	netstack_t	*ns = connp->conn_netstack;
659 
660 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
661 	ASSERT(connp->conn_ref == 0);
662 	ASSERT(connp->conn_ire_cache == NULL);
663 
664 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
665 
666 	if (connp->conn_peercred != NULL &&
667 	    connp->conn_peercred != connp->conn_cred)
668 		crfree(connp->conn_peercred);
669 	connp->conn_peercred = NULL;
670 
671 	if (connp->conn_cred != NULL) {
672 		crfree(connp->conn_cred);
673 		connp->conn_cred = NULL;
674 	}
675 
676 	ipcl_globalhash_remove(connp);
677 
678 	/* FIXME: add separate tcp_conn_free()? */
679 	if (connp->conn_flags & IPCL_TCPCONN) {
680 		tcp_t	*tcp = connp->conn_tcp;
681 		tcp_stack_t *tcps;
682 
683 		ASSERT(tcp != NULL);
684 		tcps = tcp->tcp_tcps;
685 		if (tcps != NULL) {
686 			if (connp->conn_latch != NULL) {
687 				IPLATCH_REFRELE(connp->conn_latch, ns);
688 				connp->conn_latch = NULL;
689 			}
690 			if (connp->conn_policy != NULL) {
691 				IPPH_REFRELE(connp->conn_policy, ns);
692 				connp->conn_policy = NULL;
693 			}
694 			tcp->tcp_tcps = NULL;
695 			TCPS_REFRELE(tcps);
696 		}
697 
698 		tcp_free(tcp);
699 		mp = tcp->tcp_timercache;
700 		tcp->tcp_cred = NULL;
701 
702 		if (tcp->tcp_sack_info != NULL) {
703 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
704 			kmem_cache_free(tcp_sack_info_cache,
705 			    tcp->tcp_sack_info);
706 		}
707 		if (tcp->tcp_iphc != NULL) {
708 			if (tcp->tcp_hdr_grown) {
709 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
710 			} else {
711 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
712 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
713 			}
714 			tcp->tcp_iphc_len = 0;
715 		}
716 		ASSERT(tcp->tcp_iphc_len == 0);
717 
718 		/*
719 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
720 		 * the mblk.
721 		 */
722 		if (tcp->tcp_rsrv_mp != NULL) {
723 			freeb(tcp->tcp_rsrv_mp);
724 			tcp->tcp_rsrv_mp = NULL;
725 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
726 		}
727 
728 		ASSERT(connp->conn_latch == NULL);
729 		ASSERT(connp->conn_policy == NULL);
730 
731 		if (ns != NULL) {
732 			ASSERT(tcp->tcp_tcps == NULL);
733 			connp->conn_netstack = NULL;
734 			netstack_rele(ns);
735 		}
736 
737 		ipcl_conn_cleanup(connp);
738 		connp->conn_flags = IPCL_TCPCONN;
739 		bzero(tcp, sizeof (tcp_t));
740 
741 		tcp->tcp_timercache = mp;
742 		tcp->tcp_connp = connp;
743 		kmem_cache_free(tcp_conn_cache, connp);
744 		return;
745 	}
746 	if (connp->conn_latch != NULL) {
747 		IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack);
748 		connp->conn_latch = NULL;
749 	}
750 	if (connp->conn_policy != NULL) {
751 		IPPH_REFRELE(connp->conn_policy, connp->conn_netstack);
752 		connp->conn_policy = NULL;
753 	}
754 	if (connp->conn_ipsec_opt_mp != NULL) {
755 		freemsg(connp->conn_ipsec_opt_mp);
756 		connp->conn_ipsec_opt_mp = NULL;
757 	}
758 
759 	if (connp->conn_flags & IPCL_SCTPCONN) {
760 		ASSERT(ns != NULL);
761 		sctp_free(connp);
762 		return;
763 	}
764 
765 	if (ns != NULL) {
766 		connp->conn_netstack = NULL;
767 		netstack_rele(ns);
768 	}
769 
770 	ipcl_conn_cleanup(connp);
771 
772 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
773 	if (connp->conn_flags & IPCL_UDPCONN) {
774 		connp->conn_flags = IPCL_UDPCONN;
775 		kmem_cache_free(udp_conn_cache, connp);
776 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
777 
778 		connp->conn_flags = IPCL_RAWIPCONN;
779 		connp->conn_ulp = IPPROTO_ICMP;
780 		kmem_cache_free(rawip_conn_cache, connp);
781 	} else if (connp->conn_flags & IPCL_RTSCONN) {
782 		connp->conn_flags = IPCL_RTSCONN;
783 		kmem_cache_free(rts_conn_cache, connp);
784 	} else {
785 		connp->conn_flags = IPCL_IPCCONN;
786 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
787 		ASSERT(connp->conn_priv == NULL);
788 		kmem_cache_free(ip_conn_cache, connp);
789 	}
790 }
791 
792 /*
793  * Running in cluster mode - deregister listener information
794  */
795 
796 static void
797 ipcl_conn_unlisten(conn_t *connp)
798 {
799 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
800 	ASSERT(connp->conn_lport != 0);
801 
802 	if (cl_inet_unlisten != NULL) {
803 		sa_family_t	addr_family;
804 		uint8_t		*laddrp;
805 
806 		if (connp->conn_pkt_isv6) {
807 			addr_family = AF_INET6;
808 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
809 		} else {
810 			addr_family = AF_INET;
811 			laddrp = (uint8_t *)&connp->conn_bound_source;
812 		}
813 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
814 		    connp->conn_lport);
815 	}
816 	connp->conn_flags &= ~IPCL_CL_LISTENER;
817 }
818 
819 /*
820  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
821  * which table the conn belonged to). So for debugging we can see which hash
822  * table this connection was in.
823  */
824 #define	IPCL_HASH_REMOVE(connp)	{					\
825 	connf_t	*connfp = (connp)->conn_fanout;				\
826 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
827 	if (connfp != NULL) {						\
828 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
829 		    (void *)(connp)));					\
830 		mutex_enter(&connfp->connf_lock);			\
831 		if ((connp)->conn_next != NULL)				\
832 			(connp)->conn_next->conn_prev =			\
833 			    (connp)->conn_prev;				\
834 		if ((connp)->conn_prev != NULL)				\
835 			(connp)->conn_prev->conn_next =			\
836 			    (connp)->conn_next;				\
837 		else							\
838 			connfp->connf_head = (connp)->conn_next;	\
839 		(connp)->conn_fanout = NULL;				\
840 		(connp)->conn_next = NULL;				\
841 		(connp)->conn_prev = NULL;				\
842 		(connp)->conn_flags |= IPCL_REMOVED;			\
843 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
844 			ipcl_conn_unlisten((connp));			\
845 		CONN_DEC_REF((connp));					\
846 		mutex_exit(&connfp->connf_lock);			\
847 	}								\
848 }
849 
850 void
851 ipcl_hash_remove(conn_t *connp)
852 {
853 	IPCL_HASH_REMOVE(connp);
854 }
855 
856 /*
857  * The whole purpose of this function is allow removal of
858  * a conn_t from the connected hash for timewait reclaim.
859  * This is essentially a TW reclaim fastpath where timewait
860  * collector checks under fanout lock (so no one else can
861  * get access to the conn_t) that refcnt is 2 i.e. one for
862  * TCP and one for the classifier hash list. If ref count
863  * is indeed 2, we can just remove the conn under lock and
864  * avoid cleaning up the conn under squeue. This gives us
865  * improved performance.
866  */
867 void
868 ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
869 {
870 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
871 	ASSERT(MUTEX_HELD(&connp->conn_lock));
872 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
873 
874 	if ((connp)->conn_next != NULL) {
875 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
876 	}
877 	if ((connp)->conn_prev != NULL) {
878 		(connp)->conn_prev->conn_next = (connp)->conn_next;
879 	} else {
880 		connfp->connf_head = (connp)->conn_next;
881 	}
882 	(connp)->conn_fanout = NULL;
883 	(connp)->conn_next = NULL;
884 	(connp)->conn_prev = NULL;
885 	(connp)->conn_flags |= IPCL_REMOVED;
886 	ASSERT((connp)->conn_ref == 2);
887 	(connp)->conn_ref--;
888 }
889 
890 #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
891 	ASSERT((connp)->conn_fanout == NULL);				\
892 	ASSERT((connp)->conn_next == NULL);				\
893 	ASSERT((connp)->conn_prev == NULL);				\
894 	if ((connfp)->connf_head != NULL) {				\
895 		(connfp)->connf_head->conn_prev = (connp);		\
896 		(connp)->conn_next = (connfp)->connf_head;		\
897 	}								\
898 	(connp)->conn_fanout = (connfp);				\
899 	(connfp)->connf_head = (connp);					\
900 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
901 	    IPCL_CONNECTED;						\
902 	CONN_INC_REF(connp);						\
903 }
904 
905 #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
906 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
907 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
908 	IPCL_HASH_REMOVE((connp));					\
909 	mutex_enter(&(connfp)->connf_lock);				\
910 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
911 	mutex_exit(&(connfp)->connf_lock);				\
912 }
913 
914 #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
915 	conn_t *pconnp = NULL, *nconnp;					\
916 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
917 	    "connp %p", (void *)connfp, (void *)(connp)));		\
918 	IPCL_HASH_REMOVE((connp));					\
919 	mutex_enter(&(connfp)->connf_lock);				\
920 	nconnp = (connfp)->connf_head;					\
921 	while (nconnp != NULL &&					\
922 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
923 		pconnp = nconnp;					\
924 		nconnp = nconnp->conn_next;				\
925 	}								\
926 	if (pconnp != NULL) {						\
927 		pconnp->conn_next = (connp);				\
928 		(connp)->conn_prev = pconnp;				\
929 	} else {							\
930 		(connfp)->connf_head = (connp);				\
931 	}								\
932 	if (nconnp != NULL) {						\
933 		(connp)->conn_next = nconnp;				\
934 		nconnp->conn_prev = (connp);				\
935 	}								\
936 	(connp)->conn_fanout = (connfp);				\
937 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
938 	    IPCL_BOUND;							\
939 	CONN_INC_REF(connp);						\
940 	mutex_exit(&(connfp)->connf_lock);				\
941 }
942 
943 #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
944 	conn_t **list, *prev, *next;					\
945 	boolean_t isv4mapped =						\
946 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
947 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
948 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
949 	IPCL_HASH_REMOVE((connp));					\
950 	mutex_enter(&(connfp)->connf_lock);				\
951 	list = &(connfp)->connf_head;					\
952 	prev = NULL;							\
953 	while ((next = *list) != NULL) {				\
954 		if (isv4mapped &&					\
955 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
956 		    connp->conn_zoneid == next->conn_zoneid) {		\
957 			(connp)->conn_next = next;			\
958 			if (prev != NULL)				\
959 				prev = next->conn_prev;			\
960 			next->conn_prev = (connp);			\
961 			break;						\
962 		}							\
963 		list = &next->conn_next;				\
964 		prev = next;						\
965 	}								\
966 	(connp)->conn_prev = prev;					\
967 	*list = (connp);						\
968 	(connp)->conn_fanout = (connfp);				\
969 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
970 	    IPCL_BOUND;							\
971 	CONN_INC_REF((connp));						\
972 	mutex_exit(&(connfp)->connf_lock);				\
973 }
974 
975 void
976 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
977 {
978 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
979 }
980 
981 void
982 ipcl_proto_insert(conn_t *connp, uint8_t protocol)
983 {
984 	connf_t	*connfp;
985 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
986 
987 	ASSERT(connp != NULL);
988 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
989 	    protocol == IPPROTO_ESP);
990 
991 	connp->conn_ulp = protocol;
992 
993 	/* Insert it in the protocol hash */
994 	connfp = &ipst->ips_ipcl_proto_fanout[protocol];
995 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
996 }
997 
998 void
999 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
1000 {
1001 	connf_t	*connfp;
1002 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1003 
1004 	ASSERT(connp != NULL);
1005 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
1006 	    protocol == IPPROTO_ESP);
1007 
1008 	connp->conn_ulp = protocol;
1009 
1010 	/* Insert it in the Bind Hash */
1011 	connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1012 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1013 }
1014 
1015 /*
1016  * This function is used only for inserting SCTP raw socket now.
1017  * This may change later.
1018  *
1019  * Note that only one raw socket can be bound to a port.  The param
1020  * lport is in network byte order.
1021  */
1022 static int
1023 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1024 {
1025 	connf_t	*connfp;
1026 	conn_t	*oconnp;
1027 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1028 
1029 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1030 
1031 	/* Check for existing raw socket already bound to the port. */
1032 	mutex_enter(&connfp->connf_lock);
1033 	for (oconnp = connfp->connf_head; oconnp != NULL;
1034 	    oconnp = oconnp->conn_next) {
1035 		if (oconnp->conn_lport == lport &&
1036 		    oconnp->conn_zoneid == connp->conn_zoneid &&
1037 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
1038 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
1039 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
1040 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
1041 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
1042 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
1043 		    &connp->conn_srcv6))) {
1044 			break;
1045 		}
1046 	}
1047 	mutex_exit(&connfp->connf_lock);
1048 	if (oconnp != NULL)
1049 		return (EADDRNOTAVAIL);
1050 
1051 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
1052 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
1053 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
1054 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
1055 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1056 		} else {
1057 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1058 		}
1059 	} else {
1060 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1061 	}
1062 	return (0);
1063 }
1064 
1065 /*
1066  * Check for a MAC exemption conflict on a labeled system.  Note that for
1067  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1068  * transport layer.  This check is for binding all other protocols.
1069  *
1070  * Returns true if there's a conflict.
1071  */
1072 static boolean_t
1073 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1074 {
1075 	connf_t	*connfp;
1076 	conn_t *tconn;
1077 
1078 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
1079 	mutex_enter(&connfp->connf_lock);
1080 	for (tconn = connfp->connf_head; tconn != NULL;
1081 	    tconn = tconn->conn_next) {
1082 		/* We don't allow v4 fallback for v6 raw socket */
1083 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
1084 			continue;
1085 		/* If neither is exempt, then there's no conflict */
1086 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
1087 			continue;
1088 		/* If both are bound to different specific addrs, ok */
1089 		if (connp->conn_src != INADDR_ANY &&
1090 		    tconn->conn_src != INADDR_ANY &&
1091 		    connp->conn_src != tconn->conn_src)
1092 			continue;
1093 		/* These two conflict; fail */
1094 		break;
1095 	}
1096 	mutex_exit(&connfp->connf_lock);
1097 	return (tconn != NULL);
1098 }
1099 
1100 static boolean_t
1101 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1102 {
1103 	connf_t	*connfp;
1104 	conn_t *tconn;
1105 
1106 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
1107 	mutex_enter(&connfp->connf_lock);
1108 	for (tconn = connfp->connf_head; tconn != NULL;
1109 	    tconn = tconn->conn_next) {
1110 		/* We don't allow v4 fallback for v6 raw socket */
1111 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
1112 			continue;
1113 		/* If neither is exempt, then there's no conflict */
1114 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
1115 			continue;
1116 		/* If both are bound to different addrs, ok */
1117 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) &&
1118 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) &&
1119 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6))
1120 			continue;
1121 		/* These two conflict; fail */
1122 		break;
1123 	}
1124 	mutex_exit(&connfp->connf_lock);
1125 	return (tconn != NULL);
1126 }
1127 
1128 /*
1129  * (v4, v6) bind hash insertion routines
1130  */
1131 int
1132 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
1133 {
1134 	connf_t	*connfp;
1135 #ifdef	IPCL_DEBUG
1136 	char	buf[INET_NTOA_BUFSIZE];
1137 #endif
1138 	int	ret = 0;
1139 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1140 
1141 	ASSERT(connp);
1142 
1143 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
1144 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
1145 
1146 	connp->conn_ulp = protocol;
1147 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
1148 	connp->conn_lport = lport;
1149 
1150 	switch (protocol) {
1151 	default:
1152 		if (is_system_labeled() &&
1153 		    check_exempt_conflict_v4(connp, ipst))
1154 			return (EADDRINUSE);
1155 		/* FALLTHROUGH */
1156 	case IPPROTO_UDP:
1157 		if (protocol == IPPROTO_UDP) {
1158 			IPCL_DEBUG_LVL(64,
1159 			    ("ipcl_bind_insert: connp %p - udp\n",
1160 			    (void *)connp));
1161 			connfp = &ipst->ips_ipcl_udp_fanout[
1162 			    IPCL_UDP_HASH(lport, ipst)];
1163 		} else {
1164 			IPCL_DEBUG_LVL(64,
1165 			    ("ipcl_bind_insert: connp %p - protocol\n",
1166 			    (void *)connp));
1167 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
1168 		}
1169 
1170 		if (connp->conn_rem != INADDR_ANY) {
1171 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1172 		} else if (connp->conn_src != INADDR_ANY) {
1173 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1174 		} else {
1175 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1176 		}
1177 		break;
1178 
1179 	case IPPROTO_TCP:
1180 
1181 		/* Insert it in the Bind Hash */
1182 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1183 		connfp = &ipst->ips_ipcl_bind_fanout[
1184 		    IPCL_BIND_HASH(lport, ipst)];
1185 		if (connp->conn_src != INADDR_ANY) {
1186 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1187 		} else {
1188 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1189 		}
1190 		if (cl_inet_listen != NULL) {
1191 			ASSERT(!connp->conn_pkt_isv6);
1192 			connp->conn_flags |= IPCL_CL_LISTENER;
1193 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
1194 			    (uint8_t *)&connp->conn_bound_source, lport);
1195 		}
1196 		break;
1197 
1198 	case IPPROTO_SCTP:
1199 		ret = ipcl_sctp_hash_insert(connp, lport);
1200 		break;
1201 	}
1202 
1203 	return (ret);
1204 }
1205 
1206 int
1207 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
1208     uint16_t lport)
1209 {
1210 	connf_t	*connfp;
1211 	int	ret = 0;
1212 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1213 
1214 	ASSERT(connp);
1215 
1216 	connp->conn_ulp = protocol;
1217 	connp->conn_srcv6 = *src;
1218 	connp->conn_lport = lport;
1219 
1220 	switch (protocol) {
1221 	default:
1222 		if (is_system_labeled() &&
1223 		    check_exempt_conflict_v6(connp, ipst))
1224 			return (EADDRINUSE);
1225 		/* FALLTHROUGH */
1226 	case IPPROTO_UDP:
1227 		if (protocol == IPPROTO_UDP) {
1228 			IPCL_DEBUG_LVL(128,
1229 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
1230 			    (void *)connp));
1231 			connfp = &ipst->ips_ipcl_udp_fanout[
1232 			    IPCL_UDP_HASH(lport, ipst)];
1233 		} else {
1234 			IPCL_DEBUG_LVL(128,
1235 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
1236 			    (void *)connp));
1237 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1238 		}
1239 
1240 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
1241 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1242 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
1243 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1244 		} else {
1245 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1246 		}
1247 		break;
1248 
1249 	case IPPROTO_TCP:
1250 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
1251 
1252 		/* Insert it in the Bind Hash */
1253 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1254 		connfp = &ipst->ips_ipcl_bind_fanout[
1255 		    IPCL_BIND_HASH(lport, ipst)];
1256 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
1257 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1258 		} else {
1259 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1260 		}
1261 		if (cl_inet_listen != NULL) {
1262 			sa_family_t	addr_family;
1263 			uint8_t		*laddrp;
1264 
1265 			if (connp->conn_pkt_isv6) {
1266 				addr_family = AF_INET6;
1267 				laddrp =
1268 				    (uint8_t *)&connp->conn_bound_source_v6;
1269 			} else {
1270 				addr_family = AF_INET;
1271 				laddrp = (uint8_t *)&connp->conn_bound_source;
1272 			}
1273 			connp->conn_flags |= IPCL_CL_LISTENER;
1274 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
1275 			    lport);
1276 		}
1277 		break;
1278 
1279 	case IPPROTO_SCTP:
1280 		ret = ipcl_sctp_hash_insert(connp, lport);
1281 		break;
1282 	}
1283 
1284 	return (ret);
1285 }
1286 
1287 /*
1288  * ipcl_conn_hash insertion routines.
1289  */
1290 int
1291 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
1292     ipaddr_t rem, uint32_t ports)
1293 {
1294 	connf_t		*connfp;
1295 	uint16_t	*up;
1296 	conn_t		*tconnp;
1297 #ifdef	IPCL_DEBUG
1298 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
1299 #endif
1300 	in_port_t	lport;
1301 	int		ret = 0;
1302 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1303 
1304 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
1305 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
1306 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
1307 	    ports, protocol));
1308 
1309 	switch (protocol) {
1310 	case IPPROTO_TCP:
1311 		if (!(connp->conn_flags & IPCL_EAGER)) {
1312 			/*
1313 			 * for a eager connection, i.e connections which
1314 			 * have just been created, the initialization is
1315 			 * already done in ip at conn_creation time, so
1316 			 * we can skip the checks here.
1317 			 */
1318 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
1319 		}
1320 		connfp = &ipst->ips_ipcl_conn_fanout[
1321 		    IPCL_CONN_HASH(connp->conn_rem,
1322 		    connp->conn_ports, ipst)];
1323 		mutex_enter(&connfp->connf_lock);
1324 		for (tconnp = connfp->connf_head; tconnp != NULL;
1325 		    tconnp = tconnp->conn_next) {
1326 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
1327 			    connp->conn_rem, connp->conn_src,
1328 			    connp->conn_ports)) {
1329 
1330 				/* Already have a conn. bail out */
1331 				mutex_exit(&connfp->connf_lock);
1332 				return (EADDRINUSE);
1333 			}
1334 		}
1335 		if (connp->conn_fanout != NULL) {
1336 			/*
1337 			 * Probably a XTI/TLI application trying to do a
1338 			 * rebind. Let it happen.
1339 			 */
1340 			mutex_exit(&connfp->connf_lock);
1341 			IPCL_HASH_REMOVE(connp);
1342 			mutex_enter(&connfp->connf_lock);
1343 		}
1344 
1345 		ASSERT(connp->conn_recv != NULL);
1346 
1347 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1348 		mutex_exit(&connfp->connf_lock);
1349 		break;
1350 
1351 	case IPPROTO_SCTP:
1352 		/*
1353 		 * The raw socket may have already been bound, remove it
1354 		 * from the hash first.
1355 		 */
1356 		IPCL_HASH_REMOVE(connp);
1357 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
1358 		ret = ipcl_sctp_hash_insert(connp, lport);
1359 		break;
1360 
1361 	default:
1362 		/*
1363 		 * Check for conflicts among MAC exempt bindings.  For
1364 		 * transports with port numbers, this is done by the upper
1365 		 * level per-transport binding logic.  For all others, it's
1366 		 * done here.
1367 		 */
1368 		if (is_system_labeled() &&
1369 		    check_exempt_conflict_v4(connp, ipst))
1370 			return (EADDRINUSE);
1371 		/* FALLTHROUGH */
1372 
1373 	case IPPROTO_UDP:
1374 		up = (uint16_t *)&ports;
1375 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
1376 		if (protocol == IPPROTO_UDP) {
1377 			connfp = &ipst->ips_ipcl_udp_fanout[
1378 			    IPCL_UDP_HASH(up[1], ipst)];
1379 		} else {
1380 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
1381 		}
1382 
1383 		if (connp->conn_rem != INADDR_ANY) {
1384 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1385 		} else if (connp->conn_src != INADDR_ANY) {
1386 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1387 		} else {
1388 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1389 		}
1390 		break;
1391 	}
1392 
1393 	return (ret);
1394 }
1395 
1396 int
1397 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
1398     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
1399 {
1400 	connf_t		*connfp;
1401 	uint16_t	*up;
1402 	conn_t		*tconnp;
1403 	in_port_t	lport;
1404 	int		ret = 0;
1405 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1406 
1407 	switch (protocol) {
1408 	case IPPROTO_TCP:
1409 		/* Just need to insert a conn struct */
1410 		if (!(connp->conn_flags & IPCL_EAGER)) {
1411 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
1412 		}
1413 		connfp = &ipst->ips_ipcl_conn_fanout[
1414 		    IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports,
1415 		    ipst)];
1416 		mutex_enter(&connfp->connf_lock);
1417 		for (tconnp = connfp->connf_head; tconnp != NULL;
1418 		    tconnp = tconnp->conn_next) {
1419 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
1420 			    connp->conn_remv6, connp->conn_srcv6,
1421 			    connp->conn_ports) &&
1422 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
1423 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
1424 				/* Already have a conn. bail out */
1425 				mutex_exit(&connfp->connf_lock);
1426 				return (EADDRINUSE);
1427 			}
1428 		}
1429 		if (connp->conn_fanout != NULL) {
1430 			/*
1431 			 * Probably a XTI/TLI application trying to do a
1432 			 * rebind. Let it happen.
1433 			 */
1434 			mutex_exit(&connfp->connf_lock);
1435 			IPCL_HASH_REMOVE(connp);
1436 			mutex_enter(&connfp->connf_lock);
1437 		}
1438 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1439 		mutex_exit(&connfp->connf_lock);
1440 		break;
1441 
1442 	case IPPROTO_SCTP:
1443 		IPCL_HASH_REMOVE(connp);
1444 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
1445 		ret = ipcl_sctp_hash_insert(connp, lport);
1446 		break;
1447 
1448 	default:
1449 		if (is_system_labeled() &&
1450 		    check_exempt_conflict_v6(connp, ipst))
1451 			return (EADDRINUSE);
1452 		/* FALLTHROUGH */
1453 	case IPPROTO_UDP:
1454 		up = (uint16_t *)&ports;
1455 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
1456 		if (protocol == IPPROTO_UDP) {
1457 			connfp = &ipst->ips_ipcl_udp_fanout[
1458 			    IPCL_UDP_HASH(up[1], ipst)];
1459 		} else {
1460 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1461 		}
1462 
1463 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
1464 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1465 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
1466 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1467 		} else {
1468 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1469 		}
1470 		break;
1471 	}
1472 
1473 	return (ret);
1474 }
1475 
1476 /*
1477  * v4 packet classifying function. looks up the fanout table to
1478  * find the conn, the packet belongs to. returns the conn with
1479  * the reference held, null otherwise.
1480  *
1481  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1482  * Lookup" comment block are applied.  Labels are also checked as described
1483  * above.  If the packet is from the inside (looped back), and is from the same
1484  * zone, then label checks are omitted.
1485  */
1486 conn_t *
1487 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1488     ip_stack_t *ipst)
1489 {
1490 	ipha_t	*ipha;
1491 	connf_t	*connfp, *bind_connfp;
1492 	uint16_t lport;
1493 	uint16_t fport;
1494 	uint32_t ports;
1495 	conn_t	*connp;
1496 	uint16_t  *up;
1497 	boolean_t shared_addr;
1498 	boolean_t unlabeled;
1499 
1500 	ipha = (ipha_t *)mp->b_rptr;
1501 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1502 
1503 	switch (protocol) {
1504 	case IPPROTO_TCP:
1505 		ports = *(uint32_t *)up;
1506 		connfp =
1507 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1508 		    ports, ipst)];
1509 		mutex_enter(&connfp->connf_lock);
1510 		for (connp = connfp->connf_head; connp != NULL;
1511 		    connp = connp->conn_next) {
1512 			if (IPCL_CONN_MATCH(connp, protocol,
1513 			    ipha->ipha_src, ipha->ipha_dst, ports))
1514 				break;
1515 		}
1516 
1517 		if (connp != NULL) {
1518 			/*
1519 			 * We have a fully-bound TCP connection.
1520 			 *
1521 			 * For labeled systems, there's no need to check the
1522 			 * label here.  It's known to be good as we checked
1523 			 * before allowing the connection to become bound.
1524 			 */
1525 			CONN_INC_REF(connp);
1526 			mutex_exit(&connfp->connf_lock);
1527 			return (connp);
1528 		}
1529 
1530 		mutex_exit(&connfp->connf_lock);
1531 
1532 		lport = up[1];
1533 		unlabeled = B_FALSE;
1534 		/* Cred cannot be null on IPv4 */
1535 		if (is_system_labeled())
1536 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
1537 			    TSLF_UNLABELED) != 0;
1538 		shared_addr = (zoneid == ALL_ZONES);
1539 		if (shared_addr) {
1540 			/*
1541 			 * No need to handle exclusive-stack zones since
1542 			 * ALL_ZONES only applies to the shared stack.
1543 			 */
1544 			zoneid = tsol_mlp_findzone(protocol, lport);
1545 			/*
1546 			 * If no shared MLP is found, tsol_mlp_findzone returns
1547 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1548 			 * search for the zone based on the packet label.
1549 			 *
1550 			 * If there is such a zone, we prefer to find a
1551 			 * connection in it.  Otherwise, we look for a
1552 			 * MAC-exempt connection in any zone whose label
1553 			 * dominates the default label on the packet.
1554 			 */
1555 			if (zoneid == ALL_ZONES)
1556 				zoneid = tsol_packet_to_zoneid(mp);
1557 			else
1558 				unlabeled = B_FALSE;
1559 		}
1560 
1561 		bind_connfp =
1562 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1563 		mutex_enter(&bind_connfp->connf_lock);
1564 		for (connp = bind_connfp->connf_head; connp != NULL;
1565 		    connp = connp->conn_next) {
1566 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1567 			    lport) && (IPCL_ZONE_MATCH(connp, zoneid) ||
1568 			    (unlabeled && connp->conn_mac_exempt)))
1569 				break;
1570 		}
1571 
1572 		/*
1573 		 * If the matching connection is SLP on a private address, then
1574 		 * the label on the packet must match the local zone's label.
1575 		 * Otherwise, it must be in the label range defined by tnrh.
1576 		 * This is ensured by tsol_receive_label.
1577 		 */
1578 		if (connp != NULL && is_system_labeled() &&
1579 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1580 		    shared_addr, connp)) {
1581 				DTRACE_PROBE3(
1582 				    tx__ip__log__info__classify__tcp,
1583 				    char *,
1584 				    "connp(1) could not receive mp(2)",
1585 				    conn_t *, connp, mblk_t *, mp);
1586 			connp = NULL;
1587 		}
1588 
1589 		if (connp != NULL) {
1590 			/* Have a listener at least */
1591 			CONN_INC_REF(connp);
1592 			mutex_exit(&bind_connfp->connf_lock);
1593 			return (connp);
1594 		}
1595 
1596 		mutex_exit(&bind_connfp->connf_lock);
1597 
1598 		IPCL_DEBUG_LVL(512,
1599 		    ("ipcl_classify: couldn't classify mp = %p\n",
1600 		    (void *)mp));
1601 		break;
1602 
1603 	case IPPROTO_UDP:
1604 		lport = up[1];
1605 		unlabeled = B_FALSE;
1606 		/* Cred cannot be null on IPv4 */
1607 		if (is_system_labeled())
1608 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
1609 			    TSLF_UNLABELED) != 0;
1610 		shared_addr = (zoneid == ALL_ZONES);
1611 		if (shared_addr) {
1612 			/*
1613 			 * No need to handle exclusive-stack zones since
1614 			 * ALL_ZONES only applies to the shared stack.
1615 			 */
1616 			zoneid = tsol_mlp_findzone(protocol, lport);
1617 			/*
1618 			 * If no shared MLP is found, tsol_mlp_findzone returns
1619 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1620 			 * search for the zone based on the packet label.
1621 			 *
1622 			 * If there is such a zone, we prefer to find a
1623 			 * connection in it.  Otherwise, we look for a
1624 			 * MAC-exempt connection in any zone whose label
1625 			 * dominates the default label on the packet.
1626 			 */
1627 			if (zoneid == ALL_ZONES)
1628 				zoneid = tsol_packet_to_zoneid(mp);
1629 			else
1630 				unlabeled = B_FALSE;
1631 		}
1632 		fport = up[0];
1633 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
1634 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1635 		mutex_enter(&connfp->connf_lock);
1636 		for (connp = connfp->connf_head; connp != NULL;
1637 		    connp = connp->conn_next) {
1638 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1639 			    fport, ipha->ipha_src) &&
1640 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
1641 			    (unlabeled && connp->conn_mac_exempt)))
1642 				break;
1643 		}
1644 
1645 		if (connp != NULL && is_system_labeled() &&
1646 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1647 		    shared_addr, connp)) {
1648 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1649 			    char *, "connp(1) could not receive mp(2)",
1650 			    conn_t *, connp, mblk_t *, mp);
1651 			connp = NULL;
1652 		}
1653 
1654 		if (connp != NULL) {
1655 			CONN_INC_REF(connp);
1656 			mutex_exit(&connfp->connf_lock);
1657 			return (connp);
1658 		}
1659 
1660 		/*
1661 		 * We shouldn't come here for multicast/broadcast packets
1662 		 */
1663 		mutex_exit(&connfp->connf_lock);
1664 		IPCL_DEBUG_LVL(512,
1665 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
1666 		    lport, fport));
1667 		break;
1668 	}
1669 
1670 	return (NULL);
1671 }
1672 
1673 conn_t *
1674 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1675     ip_stack_t *ipst)
1676 {
1677 	ip6_t		*ip6h;
1678 	connf_t		*connfp, *bind_connfp;
1679 	uint16_t	lport;
1680 	uint16_t	fport;
1681 	tcph_t		*tcph;
1682 	uint32_t	ports;
1683 	conn_t		*connp;
1684 	uint16_t	*up;
1685 	boolean_t	shared_addr;
1686 	boolean_t	unlabeled;
1687 
1688 	ip6h = (ip6_t *)mp->b_rptr;
1689 
1690 	switch (protocol) {
1691 	case IPPROTO_TCP:
1692 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
1693 		up = (uint16_t *)tcph->th_lport;
1694 		ports = *(uint32_t *)up;
1695 
1696 		connfp =
1697 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1698 		    ports, ipst)];
1699 		mutex_enter(&connfp->connf_lock);
1700 		for (connp = connfp->connf_head; connp != NULL;
1701 		    connp = connp->conn_next) {
1702 			if (IPCL_CONN_MATCH_V6(connp, protocol,
1703 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
1704 				break;
1705 		}
1706 
1707 		if (connp != NULL) {
1708 			/*
1709 			 * We have a fully-bound TCP connection.
1710 			 *
1711 			 * For labeled systems, there's no need to check the
1712 			 * label here.  It's known to be good as we checked
1713 			 * before allowing the connection to become bound.
1714 			 */
1715 			CONN_INC_REF(connp);
1716 			mutex_exit(&connfp->connf_lock);
1717 			return (connp);
1718 		}
1719 
1720 		mutex_exit(&connfp->connf_lock);
1721 
1722 		lport = up[1];
1723 		unlabeled = B_FALSE;
1724 		/* Cred can be null on IPv6 */
1725 		if (is_system_labeled()) {
1726 			cred_t *cr = DB_CRED(mp);
1727 
1728 			unlabeled = (cr != NULL &&
1729 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1730 		}
1731 		shared_addr = (zoneid == ALL_ZONES);
1732 		if (shared_addr) {
1733 			/*
1734 			 * No need to handle exclusive-stack zones since
1735 			 * ALL_ZONES only applies to the shared stack.
1736 			 */
1737 			zoneid = tsol_mlp_findzone(protocol, lport);
1738 			/*
1739 			 * If no shared MLP is found, tsol_mlp_findzone returns
1740 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1741 			 * search for the zone based on the packet label.
1742 			 *
1743 			 * If there is such a zone, we prefer to find a
1744 			 * connection in it.  Otherwise, we look for a
1745 			 * MAC-exempt connection in any zone whose label
1746 			 * dominates the default label on the packet.
1747 			 */
1748 			if (zoneid == ALL_ZONES)
1749 				zoneid = tsol_packet_to_zoneid(mp);
1750 			else
1751 				unlabeled = B_FALSE;
1752 		}
1753 
1754 		bind_connfp =
1755 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1756 		mutex_enter(&bind_connfp->connf_lock);
1757 		for (connp = bind_connfp->connf_head; connp != NULL;
1758 		    connp = connp->conn_next) {
1759 			if (IPCL_BIND_MATCH_V6(connp, protocol,
1760 			    ip6h->ip6_dst, lport) &&
1761 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
1762 			    (unlabeled && connp->conn_mac_exempt)))
1763 				break;
1764 		}
1765 
1766 		if (connp != NULL && is_system_labeled() &&
1767 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1768 		    shared_addr, connp)) {
1769 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1770 			    char *, "connp(1) could not receive mp(2)",
1771 			    conn_t *, connp, mblk_t *, mp);
1772 			connp = NULL;
1773 		}
1774 
1775 		if (connp != NULL) {
1776 			/* Have a listner at least */
1777 			CONN_INC_REF(connp);
1778 			mutex_exit(&bind_connfp->connf_lock);
1779 			IPCL_DEBUG_LVL(512,
1780 			    ("ipcl_classify_v6: found listner "
1781 			    "connp = %p\n", (void *)connp));
1782 
1783 			return (connp);
1784 		}
1785 
1786 		mutex_exit(&bind_connfp->connf_lock);
1787 
1788 		IPCL_DEBUG_LVL(512,
1789 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
1790 		    (void *)mp));
1791 		break;
1792 
1793 	case IPPROTO_UDP:
1794 		up = (uint16_t *)&mp->b_rptr[hdr_len];
1795 		lport = up[1];
1796 		unlabeled = B_FALSE;
1797 		/* Cred can be null on IPv6 */
1798 		if (is_system_labeled()) {
1799 			cred_t *cr = DB_CRED(mp);
1800 
1801 			unlabeled = (cr != NULL &&
1802 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1803 		}
1804 		shared_addr = (zoneid == ALL_ZONES);
1805 		if (shared_addr) {
1806 			/*
1807 			 * No need to handle exclusive-stack zones since
1808 			 * ALL_ZONES only applies to the shared stack.
1809 			 */
1810 			zoneid = tsol_mlp_findzone(protocol, lport);
1811 			/*
1812 			 * If no shared MLP is found, tsol_mlp_findzone returns
1813 			 * ALL_ZONES.  In that case, we assume it's SLP, and
1814 			 * search for the zone based on the packet label.
1815 			 *
1816 			 * If there is such a zone, we prefer to find a
1817 			 * connection in it.  Otherwise, we look for a
1818 			 * MAC-exempt connection in any zone whose label
1819 			 * dominates the default label on the packet.
1820 			 */
1821 			if (zoneid == ALL_ZONES)
1822 				zoneid = tsol_packet_to_zoneid(mp);
1823 			else
1824 				unlabeled = B_FALSE;
1825 		}
1826 
1827 		fport = up[0];
1828 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
1829 		    fport));
1830 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1831 		mutex_enter(&connfp->connf_lock);
1832 		for (connp = connfp->connf_head; connp != NULL;
1833 		    connp = connp->conn_next) {
1834 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1835 			    fport, ip6h->ip6_src) &&
1836 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
1837 			    (unlabeled && connp->conn_mac_exempt)))
1838 				break;
1839 		}
1840 
1841 		if (connp != NULL && is_system_labeled() &&
1842 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1843 		    shared_addr, connp)) {
1844 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1845 			    char *, "connp(1) could not receive mp(2)",
1846 			    conn_t *, connp, mblk_t *, mp);
1847 			connp = NULL;
1848 		}
1849 
1850 		if (connp != NULL) {
1851 			CONN_INC_REF(connp);
1852 			mutex_exit(&connfp->connf_lock);
1853 			return (connp);
1854 		}
1855 
1856 		/*
1857 		 * We shouldn't come here for multicast/broadcast packets
1858 		 */
1859 		mutex_exit(&connfp->connf_lock);
1860 		IPCL_DEBUG_LVL(512,
1861 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
1862 		    lport, fport));
1863 		break;
1864 	}
1865 
1866 	return (NULL);
1867 }
1868 
1869 /*
1870  * wrapper around ipcl_classify_(v4,v6) routines.
1871  */
1872 conn_t *
1873 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst)
1874 {
1875 	uint16_t	hdr_len;
1876 	ipha_t		*ipha;
1877 	uint8_t		*nexthdrp;
1878 
1879 	if (MBLKL(mp) < sizeof (ipha_t))
1880 		return (NULL);
1881 
1882 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
1883 	case IPV4_VERSION:
1884 		ipha = (ipha_t *)mp->b_rptr;
1885 		hdr_len = IPH_HDR_LENGTH(ipha);
1886 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
1887 		    zoneid, ipst));
1888 	case IPV6_VERSION:
1889 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
1890 		    &hdr_len, &nexthdrp))
1891 			return (NULL);
1892 
1893 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst));
1894 	}
1895 
1896 	return (NULL);
1897 }
1898 
1899 conn_t *
1900 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid,
1901     uint32_t ports, ipha_t *hdr, ip_stack_t *ipst)
1902 {
1903 	connf_t		*connfp;
1904 	conn_t		*connp;
1905 	in_port_t	lport;
1906 	int		af;
1907 	boolean_t	shared_addr;
1908 	boolean_t	unlabeled;
1909 	const void	*dst;
1910 
1911 	lport = ((uint16_t *)&ports)[1];
1912 
1913 	unlabeled = B_FALSE;
1914 	/* Cred can be null on IPv6 */
1915 	if (is_system_labeled()) {
1916 		cred_t *cr = DB_CRED(mp);
1917 
1918 		unlabeled = (cr != NULL &&
1919 		    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
1920 	}
1921 	shared_addr = (zoneid == ALL_ZONES);
1922 	if (shared_addr) {
1923 		/*
1924 		 * No need to handle exclusive-stack zones since ALL_ZONES
1925 		 * only applies to the shared stack.
1926 		 */
1927 		zoneid = tsol_mlp_findzone(protocol, lport);
1928 		/*
1929 		 * If no shared MLP is found, tsol_mlp_findzone returns
1930 		 * ALL_ZONES.  In that case, we assume it's SLP, and search for
1931 		 * the zone based on the packet label.
1932 		 *
1933 		 * If there is such a zone, we prefer to find a connection in
1934 		 * it.  Otherwise, we look for a MAC-exempt connection in any
1935 		 * zone whose label dominates the default label on the packet.
1936 		 */
1937 		if (zoneid == ALL_ZONES)
1938 			zoneid = tsol_packet_to_zoneid(mp);
1939 		else
1940 			unlabeled = B_FALSE;
1941 	}
1942 
1943 	af = IPH_HDR_VERSION(hdr);
1944 	dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst :
1945 	    (const void *)&((ip6_t *)hdr)->ip6_dst;
1946 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1947 
1948 	mutex_enter(&connfp->connf_lock);
1949 	for (connp = connfp->connf_head; connp != NULL;
1950 	    connp = connp->conn_next) {
1951 		/* We don't allow v4 fallback for v6 raw socket. */
1952 		if (af == (connp->conn_af_isv6 ? IPV4_VERSION :
1953 		    IPV6_VERSION))
1954 			continue;
1955 		if (connp->conn_fully_bound) {
1956 			if (af == IPV4_VERSION) {
1957 				if (!IPCL_CONN_MATCH(connp, protocol,
1958 				    hdr->ipha_src, hdr->ipha_dst, ports))
1959 					continue;
1960 			} else {
1961 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
1962 				    ((ip6_t *)hdr)->ip6_src,
1963 				    ((ip6_t *)hdr)->ip6_dst, ports))
1964 					continue;
1965 			}
1966 		} else {
1967 			if (af == IPV4_VERSION) {
1968 				if (!IPCL_BIND_MATCH(connp, protocol,
1969 				    hdr->ipha_dst, lport))
1970 					continue;
1971 			} else {
1972 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
1973 				    ((ip6_t *)hdr)->ip6_dst, lport))
1974 					continue;
1975 			}
1976 		}
1977 
1978 		if (IPCL_ZONE_MATCH(connp, zoneid) ||
1979 		    (unlabeled && connp->conn_mac_exempt))
1980 			break;
1981 	}
1982 	/*
1983 	 * If the connection is fully-bound and connection-oriented (TCP or
1984 	 * SCTP), then we've already validated the remote system's label.
1985 	 * There's no need to do it again for every packet.
1986 	 */
1987 	if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound ||
1988 	    !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) &&
1989 	    !tsol_receive_local(mp, dst, af, shared_addr, connp)) {
1990 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1991 		    char *, "connp(1) could not receive mp(2)",
1992 		    conn_t *, connp, mblk_t *, mp);
1993 		connp = NULL;
1994 	}
1995 
1996 	if (connp != NULL)
1997 		goto found;
1998 	mutex_exit(&connfp->connf_lock);
1999 
2000 	/* Try to look for a wildcard match. */
2001 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
2002 	mutex_enter(&connfp->connf_lock);
2003 	for (connp = connfp->connf_head; connp != NULL;
2004 	    connp = connp->conn_next) {
2005 		/* We don't allow v4 fallback for v6 raw socket. */
2006 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
2007 		    IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) {
2008 			continue;
2009 		}
2010 		if (af == IPV4_VERSION) {
2011 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
2012 				break;
2013 		} else {
2014 			if (IPCL_RAW_MATCH_V6(connp, protocol,
2015 			    ((ip6_t *)hdr)->ip6_dst)) {
2016 				break;
2017 			}
2018 		}
2019 	}
2020 
2021 	if (connp != NULL)
2022 		goto found;
2023 
2024 	mutex_exit(&connfp->connf_lock);
2025 	return (NULL);
2026 
2027 found:
2028 	ASSERT(connp != NULL);
2029 	CONN_INC_REF(connp);
2030 	mutex_exit(&connfp->connf_lock);
2031 	return (connp);
2032 }
2033 
2034 /* ARGSUSED */
2035 static int
2036 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2037 {
2038 	itc_t	*itc = (itc_t *)buf;
2039 	conn_t 	*connp = &itc->itc_conn;
2040 	tcp_t	*tcp = (tcp_t *)&itc[1];
2041 
2042 	bzero(connp, sizeof (conn_t));
2043 	bzero(tcp, sizeof (tcp_t));
2044 
2045 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2046 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2047 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
2048 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
2049 	connp->conn_tcp = tcp;
2050 	connp->conn_flags = IPCL_TCPCONN;
2051 	connp->conn_ulp = IPPROTO_TCP;
2052 	tcp->tcp_connp = connp;
2053 	return (0);
2054 }
2055 
2056 /* ARGSUSED */
2057 static void
2058 tcp_conn_destructor(void *buf, void *cdrarg)
2059 {
2060 	itc_t	*itc = (itc_t *)buf;
2061 	conn_t 	*connp = &itc->itc_conn;
2062 	tcp_t	*tcp = (tcp_t *)&itc[1];
2063 
2064 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
2065 	ASSERT(tcp->tcp_connp == connp);
2066 	ASSERT(connp->conn_tcp == tcp);
2067 	tcp_timermp_free(tcp);
2068 	mutex_destroy(&connp->conn_lock);
2069 	cv_destroy(&connp->conn_cv);
2070 	cv_destroy(&connp->conn_sq_cv);
2071 }
2072 
2073 /* ARGSUSED */
2074 static int
2075 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2076 {
2077 	itc_t	*itc = (itc_t *)buf;
2078 	conn_t 	*connp = &itc->itc_conn;
2079 
2080 	bzero(connp, sizeof (conn_t));
2081 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2082 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2083 	connp->conn_flags = IPCL_IPCCONN;
2084 
2085 	return (0);
2086 }
2087 
2088 /* ARGSUSED */
2089 static void
2090 ip_conn_destructor(void *buf, void *cdrarg)
2091 {
2092 	itc_t	*itc = (itc_t *)buf;
2093 	conn_t 	*connp = &itc->itc_conn;
2094 
2095 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
2096 	ASSERT(connp->conn_priv == NULL);
2097 	mutex_destroy(&connp->conn_lock);
2098 	cv_destroy(&connp->conn_cv);
2099 }
2100 
2101 /* ARGSUSED */
2102 static int
2103 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2104 {
2105 	itc_t	*itc = (itc_t *)buf;
2106 	conn_t 	*connp = &itc->itc_conn;
2107 	udp_t	*udp = (udp_t *)&itc[1];
2108 
2109 	bzero(connp, sizeof (conn_t));
2110 	bzero(udp, sizeof (udp_t));
2111 
2112 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2113 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2114 	connp->conn_udp = udp;
2115 	connp->conn_flags = IPCL_UDPCONN;
2116 	connp->conn_ulp = IPPROTO_UDP;
2117 	udp->udp_connp = connp;
2118 	return (0);
2119 }
2120 
2121 /* ARGSUSED */
2122 static void
2123 udp_conn_destructor(void *buf, void *cdrarg)
2124 {
2125 	itc_t	*itc = (itc_t *)buf;
2126 	conn_t 	*connp = &itc->itc_conn;
2127 	udp_t	*udp = (udp_t *)&itc[1];
2128 
2129 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2130 	ASSERT(udp->udp_connp == connp);
2131 	ASSERT(connp->conn_udp == udp);
2132 	mutex_destroy(&connp->conn_lock);
2133 	cv_destroy(&connp->conn_cv);
2134 }
2135 
2136 /* ARGSUSED */
2137 static int
2138 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2139 {
2140 	itc_t	*itc = (itc_t *)buf;
2141 	conn_t 	*connp = &itc->itc_conn;
2142 	icmp_t	*icmp = (icmp_t *)&itc[1];
2143 
2144 	bzero(connp, sizeof (conn_t));
2145 	bzero(icmp, sizeof (icmp_t));
2146 
2147 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2148 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2149 	connp->conn_icmp = icmp;
2150 	connp->conn_flags = IPCL_RAWIPCONN;
2151 	connp->conn_ulp = IPPROTO_ICMP;
2152 	icmp->icmp_connp = connp;
2153 	return (0);
2154 }
2155 
2156 /* ARGSUSED */
2157 static void
2158 rawip_conn_destructor(void *buf, void *cdrarg)
2159 {
2160 	itc_t	*itc = (itc_t *)buf;
2161 	conn_t 	*connp = &itc->itc_conn;
2162 	icmp_t	*icmp = (icmp_t *)&itc[1];
2163 
2164 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2165 	ASSERT(icmp->icmp_connp == connp);
2166 	ASSERT(connp->conn_icmp == icmp);
2167 	mutex_destroy(&connp->conn_lock);
2168 	cv_destroy(&connp->conn_cv);
2169 }
2170 
2171 /* ARGSUSED */
2172 static int
2173 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2174 {
2175 	itc_t	*itc = (itc_t *)buf;
2176 	conn_t 	*connp = &itc->itc_conn;
2177 	rts_t	*rts = (rts_t *)&itc[1];
2178 
2179 	bzero(connp, sizeof (conn_t));
2180 	bzero(rts, sizeof (rts_t));
2181 
2182 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2183 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2184 	connp->conn_rts = rts;
2185 	connp->conn_flags = IPCL_RTSCONN;
2186 	rts->rts_connp = connp;
2187 	return (0);
2188 }
2189 
2190 /* ARGSUSED */
2191 static void
2192 rts_conn_destructor(void *buf, void *cdrarg)
2193 {
2194 	itc_t	*itc = (itc_t *)buf;
2195 	conn_t 	*connp = &itc->itc_conn;
2196 	rts_t	*rts = (rts_t *)&itc[1];
2197 
2198 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
2199 	ASSERT(rts->rts_connp == connp);
2200 	ASSERT(connp->conn_rts == rts);
2201 	mutex_destroy(&connp->conn_lock);
2202 	cv_destroy(&connp->conn_cv);
2203 }
2204 
2205 /* ARGSUSED */
2206 int
2207 ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags)
2208 {
2209 	int error;
2210 	netstack_t	*ns;
2211 	int		ret;
2212 	tcp_stack_t	*tcps;
2213 	ip_helper_stream_info_t	*ip_helper_str;
2214 	ip_stack_t	*ipst;
2215 
2216 	ns = netstack_find_by_cred(kcred);
2217 	ASSERT(ns != NULL);
2218 	tcps = ns->netstack_tcp;
2219 	ipst = ns->netstack_ip;
2220 	ASSERT(tcps != NULL);
2221 	ip_helper_str = (ip_helper_stream_info_t *)buf;
2222 
2223 	error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred,
2224 	    &ip_helper_str->ip_helper_stream_handle, ipst->ips_ldi_ident);
2225 	if (error != 0) {
2226 		goto done;
2227 	}
2228 	error = ldi_ioctl(ip_helper_str->ip_helper_stream_handle,
2229 	    SIOCSQPTR, (intptr_t)buf, FKIOCTL, kcred, &ret);
2230 	if (error != 0) {
2231 		(void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0,
2232 		    kcred);
2233 	}
2234 done:
2235 	netstack_rele(ipst->ips_netstack);
2236 	return (error);
2237 }
2238 
2239 /* ARGSUSED */
2240 static void
2241 ip_helper_stream_destructor(void *buf, void *cdrarg)
2242 {
2243 	ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf;
2244 
2245 	ip_helper_str->ip_helper_stream_rq->q_ptr =
2246 	    ip_helper_str->ip_helper_stream_wq->q_ptr =
2247 	    ip_helper_str->ip_helper_stream_minfo;
2248 	(void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0, kcred);
2249 }
2250 
2251 
2252 /*
2253  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2254  * in the conn_t.
2255  */
2256 void
2257 ipcl_conn_cleanup(conn_t *connp)
2258 {
2259 	ASSERT(connp->conn_ire_cache == NULL);
2260 	ASSERT(connp->conn_latch == NULL);
2261 #ifdef notdef
2262 	ASSERT(connp->conn_rq == NULL);
2263 	ASSERT(connp->conn_wq == NULL);
2264 #endif
2265 	ASSERT(connp->conn_cred == NULL);
2266 	ASSERT(connp->conn_g_fanout == NULL);
2267 	ASSERT(connp->conn_g_next == NULL);
2268 	ASSERT(connp->conn_g_prev == NULL);
2269 	ASSERT(connp->conn_policy == NULL);
2270 	ASSERT(connp->conn_fanout == NULL);
2271 	ASSERT(connp->conn_next == NULL);
2272 	ASSERT(connp->conn_prev == NULL);
2273 #ifdef notdef
2274 	/*
2275 	 * The ill and ipif pointers are not cleared before the conn_t
2276 	 * goes away since they do not hold a reference on the ill/ipif.
2277 	 * We should replace these pointers with ifindex/ipaddr_t to
2278 	 * make the code less complex.
2279 	 */
2280 	ASSERT(connp->conn_xmit_if_ill == NULL);
2281 	ASSERT(connp->conn_nofailover_ill == NULL);
2282 	ASSERT(connp->conn_outgoing_ill == NULL);
2283 	ASSERT(connp->conn_incoming_ill == NULL);
2284 	ASSERT(connp->conn_outgoing_pill == NULL);
2285 	ASSERT(connp->conn_multicast_ipif == NULL);
2286 	ASSERT(connp->conn_multicast_ill == NULL);
2287 #endif
2288 	ASSERT(connp->conn_oper_pending_ill == NULL);
2289 	ASSERT(connp->conn_ilg == NULL);
2290 	ASSERT(connp->conn_drain_next == NULL);
2291 	ASSERT(connp->conn_drain_prev == NULL);
2292 #ifdef notdef
2293 	/* conn_idl is not cleared when removed from idl list */
2294 	ASSERT(connp->conn_idl == NULL);
2295 #endif
2296 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
2297 	ASSERT(connp->conn_peercred == NULL);
2298 	ASSERT(connp->conn_netstack == NULL);
2299 
2300 	ASSERT(connp->conn_helper_info == NULL);
2301 	/* Clear out the conn_t fields that are not preserved */
2302 	bzero(&connp->conn_start_clr,
2303 	    sizeof (conn_t) -
2304 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2305 }
2306 
2307 /*
2308  * All conns are inserted in a global multi-list for the benefit of
2309  * walkers. The walk is guaranteed to walk all open conns at the time
2310  * of the start of the walk exactly once. This property is needed to
2311  * achieve some cleanups during unplumb of interfaces. This is achieved
2312  * as follows.
2313  *
2314  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2315  * call the insert and delete functions below at creation and deletion
2316  * time respectively. The conn never moves or changes its position in this
2317  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2318  * won't increase due to walkers, once the conn deletion has started. Note
2319  * that we can't remove the conn from the global list and then wait for
2320  * the refcnt to drop to zero, since walkers would then see a truncated
2321  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2322  * conns until ip_open is ready to make them globally visible.
2323  * The global round robin multi-list locks are held only to get the
2324  * next member/insertion/deletion and contention should be negligible
2325  * if the multi-list is much greater than the number of cpus.
2326  */
2327 void
2328 ipcl_globalhash_insert(conn_t *connp)
2329 {
2330 	int	index;
2331 	struct connf_s	*connfp;
2332 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
2333 
2334 	/*
2335 	 * No need for atomic here. Approximate even distribution
2336 	 * in the global lists is sufficient.
2337 	 */
2338 	ipst->ips_conn_g_index++;
2339 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2340 
2341 	connp->conn_g_prev = NULL;
2342 	/*
2343 	 * Mark as INCIPIENT, so that walkers will ignore this
2344 	 * for now, till ip_open is ready to make it visible globally.
2345 	 */
2346 	connp->conn_state_flags |= CONN_INCIPIENT;
2347 
2348 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2349 	/* Insert at the head of the list */
2350 	mutex_enter(&connfp->connf_lock);
2351 	connp->conn_g_next = connfp->connf_head;
2352 	if (connp->conn_g_next != NULL)
2353 		connp->conn_g_next->conn_g_prev = connp;
2354 	connfp->connf_head = connp;
2355 
2356 	/* The fanout bucket this conn points to */
2357 	connp->conn_g_fanout = connfp;
2358 
2359 	mutex_exit(&connfp->connf_lock);
2360 }
2361 
2362 void
2363 ipcl_globalhash_remove(conn_t *connp)
2364 {
2365 	struct connf_s	*connfp;
2366 
2367 	/*
2368 	 * We were never inserted in the global multi list.
2369 	 * IPCL_NONE variety is never inserted in the global multilist
2370 	 * since it is presumed to not need any cleanup and is transient.
2371 	 */
2372 	if (connp->conn_g_fanout == NULL)
2373 		return;
2374 
2375 	connfp = connp->conn_g_fanout;
2376 	mutex_enter(&connfp->connf_lock);
2377 	if (connp->conn_g_prev != NULL)
2378 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2379 	else
2380 		connfp->connf_head = connp->conn_g_next;
2381 	if (connp->conn_g_next != NULL)
2382 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2383 	mutex_exit(&connfp->connf_lock);
2384 
2385 	/* Better to stumble on a null pointer than to corrupt memory */
2386 	connp->conn_g_next = NULL;
2387 	connp->conn_g_prev = NULL;
2388 	connp->conn_g_fanout = NULL;
2389 }
2390 
2391 /*
2392  * Walk the list of all conn_t's in the system, calling the function provided
2393  * with the specified argument for each.
2394  * Applies to both IPv4 and IPv6.
2395  *
2396  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
2397  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2398  * unplumbed or removed. New conn_t's that are created while we are walking
2399  * may be missed by this walk, because they are not necessarily inserted
2400  * at the tail of the list. They are new conn_t's and thus don't have any
2401  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2402  * is created to the struct that is going away.
2403  */
2404 void
2405 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2406 {
2407 	int	i;
2408 	conn_t	*connp;
2409 	conn_t	*prev_connp;
2410 
2411 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2412 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2413 		prev_connp = NULL;
2414 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2415 		while (connp != NULL) {
2416 			mutex_enter(&connp->conn_lock);
2417 			if (connp->conn_state_flags &
2418 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
2419 				mutex_exit(&connp->conn_lock);
2420 				connp = connp->conn_g_next;
2421 				continue;
2422 			}
2423 			CONN_INC_REF_LOCKED(connp);
2424 			mutex_exit(&connp->conn_lock);
2425 			mutex_exit(
2426 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2427 			(*func)(connp, arg);
2428 			if (prev_connp != NULL)
2429 				CONN_DEC_REF(prev_connp);
2430 			mutex_enter(
2431 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2432 			prev_connp = connp;
2433 			connp = connp->conn_g_next;
2434 		}
2435 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2436 		if (prev_connp != NULL)
2437 			CONN_DEC_REF(prev_connp);
2438 	}
2439 }
2440 
2441 /*
2442  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2443  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2444  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2445  * (peer tcp in ESTABLISHED state).
2446  */
2447 conn_t *
2448 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph,
2449     ip_stack_t *ipst)
2450 {
2451 	uint32_t ports;
2452 	uint16_t *pports = (uint16_t *)&ports;
2453 	connf_t	*connfp;
2454 	conn_t	*tconnp;
2455 	boolean_t zone_chk;
2456 
2457 	/*
2458 	 * If either the source of destination address is loopback, then
2459 	 * both endpoints must be in the same Zone.  Otherwise, both of
2460 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2461 	 * state) and the endpoints may reside in different Zones.
2462 	 */
2463 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2464 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2465 
2466 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
2467 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
2468 
2469 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2470 	    ports, ipst)];
2471 
2472 	mutex_enter(&connfp->connf_lock);
2473 	for (tconnp = connfp->connf_head; tconnp != NULL;
2474 	    tconnp = tconnp->conn_next) {
2475 
2476 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2477 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2478 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2479 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2480 
2481 			ASSERT(tconnp != connp);
2482 			CONN_INC_REF(tconnp);
2483 			mutex_exit(&connfp->connf_lock);
2484 			return (tconnp);
2485 		}
2486 	}
2487 	mutex_exit(&connfp->connf_lock);
2488 	return (NULL);
2489 }
2490 
2491 /*
2492  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2493  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2494  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2495  * (peer tcp in ESTABLISHED state).
2496  */
2497 conn_t *
2498 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph,
2499     ip_stack_t *ipst)
2500 {
2501 	uint32_t ports;
2502 	uint16_t *pports = (uint16_t *)&ports;
2503 	connf_t	*connfp;
2504 	conn_t	*tconnp;
2505 	boolean_t zone_chk;
2506 
2507 	/*
2508 	 * If either the source of destination address is loopback, then
2509 	 * both endpoints must be in the same Zone.  Otherwise, both of
2510 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2511 	 * state) and the endpoints may reside in different Zones.  We
2512 	 * don't do Zone check for link local address(es) because the
2513 	 * current Zone implementation treats each link local address as
2514 	 * being unique per system node, i.e. they belong to global Zone.
2515 	 */
2516 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2517 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2518 
2519 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
2520 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
2521 
2522 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2523 	    ports, ipst)];
2524 
2525 	mutex_enter(&connfp->connf_lock);
2526 	for (tconnp = connfp->connf_head; tconnp != NULL;
2527 	    tconnp = tconnp->conn_next) {
2528 
2529 		/* We skip tcp_bound_if check here as this is loopback tcp */
2530 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2531 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2532 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2533 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2534 
2535 			ASSERT(tconnp != connp);
2536 			CONN_INC_REF(tconnp);
2537 			mutex_exit(&connfp->connf_lock);
2538 			return (tconnp);
2539 		}
2540 	}
2541 	mutex_exit(&connfp->connf_lock);
2542 	return (NULL);
2543 }
2544 
2545 /*
2546  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2547  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2548  * Only checks for connected entries i.e. no INADDR_ANY checks.
2549  */
2550 conn_t *
2551 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state,
2552     ip_stack_t *ipst)
2553 {
2554 	uint32_t ports;
2555 	uint16_t *pports;
2556 	connf_t	*connfp;
2557 	conn_t	*tconnp;
2558 
2559 	pports = (uint16_t *)&ports;
2560 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
2561 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
2562 
2563 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2564 	    ports, ipst)];
2565 
2566 	mutex_enter(&connfp->connf_lock);
2567 	for (tconnp = connfp->connf_head; tconnp != NULL;
2568 	    tconnp = tconnp->conn_next) {
2569 
2570 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2571 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2572 		    tconnp->conn_tcp->tcp_state >= min_state) {
2573 
2574 			CONN_INC_REF(tconnp);
2575 			mutex_exit(&connfp->connf_lock);
2576 			return (tconnp);
2577 		}
2578 	}
2579 	mutex_exit(&connfp->connf_lock);
2580 	return (NULL);
2581 }
2582 
2583 /*
2584  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2585  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2586  * Only checks for connected entries i.e. no INADDR_ANY checks.
2587  * Match on ifindex in addition to addresses.
2588  */
2589 conn_t *
2590 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2591     uint_t ifindex, ip_stack_t *ipst)
2592 {
2593 	tcp_t	*tcp;
2594 	uint32_t ports;
2595 	uint16_t *pports;
2596 	connf_t	*connfp;
2597 	conn_t	*tconnp;
2598 
2599 	pports = (uint16_t *)&ports;
2600 	pports[0] = tcpha->tha_fport;
2601 	pports[1] = tcpha->tha_lport;
2602 
2603 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2604 	    ports, ipst)];
2605 
2606 	mutex_enter(&connfp->connf_lock);
2607 	for (tconnp = connfp->connf_head; tconnp != NULL;
2608 	    tconnp = tconnp->conn_next) {
2609 
2610 		tcp = tconnp->conn_tcp;
2611 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2612 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2613 		    tcp->tcp_state >= min_state &&
2614 		    (tcp->tcp_bound_if == 0 ||
2615 		    tcp->tcp_bound_if == ifindex)) {
2616 
2617 			CONN_INC_REF(tconnp);
2618 			mutex_exit(&connfp->connf_lock);
2619 			return (tconnp);
2620 		}
2621 	}
2622 	mutex_exit(&connfp->connf_lock);
2623 	return (NULL);
2624 }
2625 
2626 /*
2627  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2628  * a listener when changing state.
2629  */
2630 conn_t *
2631 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2632     ip_stack_t *ipst)
2633 {
2634 	connf_t		*bind_connfp;
2635 	conn_t		*connp;
2636 	tcp_t		*tcp;
2637 
2638 	/*
2639 	 * Avoid false matches for packets sent to an IP destination of
2640 	 * all zeros.
2641 	 */
2642 	if (laddr == 0)
2643 		return (NULL);
2644 
2645 	ASSERT(zoneid != ALL_ZONES);
2646 
2647 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2648 	mutex_enter(&bind_connfp->connf_lock);
2649 	for (connp = bind_connfp->connf_head; connp != NULL;
2650 	    connp = connp->conn_next) {
2651 		tcp = connp->conn_tcp;
2652 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2653 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2654 		    (tcp->tcp_listener == NULL)) {
2655 			CONN_INC_REF(connp);
2656 			mutex_exit(&bind_connfp->connf_lock);
2657 			return (connp);
2658 		}
2659 	}
2660 	mutex_exit(&bind_connfp->connf_lock);
2661 	return (NULL);
2662 }
2663 
2664 /*
2665  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2666  * a listener when changing state.
2667  */
2668 conn_t *
2669 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2670     zoneid_t zoneid, ip_stack_t *ipst)
2671 {
2672 	connf_t		*bind_connfp;
2673 	conn_t		*connp = NULL;
2674 	tcp_t		*tcp;
2675 
2676 	/*
2677 	 * Avoid false matches for packets sent to an IP destination of
2678 	 * all zeros.
2679 	 */
2680 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2681 		return (NULL);
2682 
2683 	ASSERT(zoneid != ALL_ZONES);
2684 
2685 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2686 	mutex_enter(&bind_connfp->connf_lock);
2687 	for (connp = bind_connfp->connf_head; connp != NULL;
2688 	    connp = connp->conn_next) {
2689 		tcp = connp->conn_tcp;
2690 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2691 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2692 		    (tcp->tcp_bound_if == 0 ||
2693 		    tcp->tcp_bound_if == ifindex) &&
2694 		    tcp->tcp_listener == NULL) {
2695 			CONN_INC_REF(connp);
2696 			mutex_exit(&bind_connfp->connf_lock);
2697 			return (connp);
2698 		}
2699 	}
2700 	mutex_exit(&bind_connfp->connf_lock);
2701 	return (NULL);
2702 }
2703 
2704 /*
2705  * ipcl_get_next_conn
2706  *	get the next entry in the conn global list
2707  *	and put a reference on the next_conn.
2708  *	decrement the reference on the current conn.
2709  *
2710  * This is an iterator based walker function that also provides for
2711  * some selection by the caller. It walks through the conn_hash bucket
2712  * searching for the next valid connp in the list, and selects connections
2713  * that are neither closed nor condemned. It also REFHOLDS the conn
2714  * thus ensuring that the conn exists when the caller uses the conn.
2715  */
2716 conn_t *
2717 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2718 {
2719 	conn_t	*next_connp;
2720 
2721 	if (connfp == NULL)
2722 		return (NULL);
2723 
2724 	mutex_enter(&connfp->connf_lock);
2725 
2726 	next_connp = (connp == NULL) ?
2727 	    connfp->connf_head : connp->conn_g_next;
2728 
2729 	while (next_connp != NULL) {
2730 		mutex_enter(&next_connp->conn_lock);
2731 		if (!(next_connp->conn_flags & conn_flags) ||
2732 		    (next_connp->conn_state_flags &
2733 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2734 			/*
2735 			 * This conn has been condemned or
2736 			 * is closing, or the flags don't match
2737 			 */
2738 			mutex_exit(&next_connp->conn_lock);
2739 			next_connp = next_connp->conn_g_next;
2740 			continue;
2741 		}
2742 		CONN_INC_REF_LOCKED(next_connp);
2743 		mutex_exit(&next_connp->conn_lock);
2744 		break;
2745 	}
2746 
2747 	mutex_exit(&connfp->connf_lock);
2748 
2749 	if (connp != NULL)
2750 		CONN_DEC_REF(connp);
2751 
2752 	return (next_connp);
2753 }
2754 
2755 #ifdef CONN_DEBUG
2756 /*
2757  * Trace of the last NBUF refhold/refrele
2758  */
2759 int
2760 conn_trace_ref(conn_t *connp)
2761 {
2762 	int	last;
2763 	conn_trace_t	*ctb;
2764 
2765 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2766 	last = connp->conn_trace_last;
2767 	last++;
2768 	if (last == CONN_TRACE_MAX)
2769 		last = 0;
2770 
2771 	ctb = &connp->conn_trace_buf[last];
2772 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2773 	connp->conn_trace_last = last;
2774 	return (1);
2775 }
2776 
2777 int
2778 conn_untrace_ref(conn_t *connp)
2779 {
2780 	int	last;
2781 	conn_trace_t	*ctb;
2782 
2783 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2784 	last = connp->conn_trace_last;
2785 	last++;
2786 	if (last == CONN_TRACE_MAX)
2787 		last = 0;
2788 
2789 	ctb = &connp->conn_trace_buf[last];
2790 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2791 	connp->conn_trace_last = last;
2792 	return (1);
2793 }
2794 #endif
2795