xref: /illumos-gate/usr/src/uts/common/inet/ip/ipclassifier.c (revision b1d7ec75953cd517f5b7c3d9cb427ff8ec5d7d07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * IP PACKET CLASSIFIER
27  *
28  * The IP packet classifier provides mapping between IP packets and persistent
29  * connection state for connection-oriented protocols. It also provides
30  * interface for managing connection states.
31  *
32  * The connection state is kept in conn_t data structure and contains, among
33  * other things:
34  *
35  *	o local/remote address and ports
36  *	o Transport protocol
37  *	o squeue for the connection (for TCP only)
38  *	o reference counter
39  *	o Connection state
40  *	o hash table linkage
41  *	o interface/ire information
42  *	o credentials
43  *	o ipsec policy
44  *	o send and receive functions.
45  *	o mutex lock.
46  *
47  * Connections use a reference counting scheme. They are freed when the
48  * reference counter drops to zero. A reference is incremented when connection
49  * is placed in a list or table, when incoming packet for the connection arrives
50  * and when connection is processed via squeue (squeue processing may be
51  * asynchronous and the reference protects the connection from being destroyed
52  * before its processing is finished).
53  *
54  * conn_recv is used to pass up packets to the ULP.
55  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56  * a listener, and changes to tcp_input_listener as the listener has picked a
57  * good squeue. For other cases it is set to tcp_input_data.
58  *
59  * conn_recvicmp is used to pass up ICMP errors to the ULP.
60  *
61  * Classifier uses several hash tables:
62  *
63  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
64  *	ipcl_bind_fanout:	contains all connections in BOUND state
65  *	ipcl_proto_fanout:	IPv4 protocol fanout
66  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
67  *	ipcl_udp_fanout:	contains all UDP connections
68  *	ipcl_iptun_fanout:	contains all IP tunnel connections
69  *	ipcl_globalhash_fanout:	contains all connections
70  *
71  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72  * which need to view all existing connections.
73  *
74  * All tables are protected by per-bucket locks. When both per-bucket lock and
75  * connection lock need to be held, the per-bucket lock should be acquired
76  * first, followed by the connection lock.
77  *
78  * All functions doing search in one of these tables increment a reference
79  * counter on the connection found (if any). This reference should be dropped
80  * when the caller has finished processing the connection.
81  *
82  *
83  * INTERFACES:
84  * ===========
85  *
86  * Connection Lookup:
87  * ------------------
88  *
89  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
91  *
92  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93  * it can't find any associated connection. If the connection is found, its
94  * reference counter is incremented.
95  *
96  *	mp:	mblock, containing packet header. The full header should fit
97  *		into a single mblock. It should also contain at least full IP
98  *		and TCP or UDP header.
99  *
100  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
101  *
102  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
103  *		 the packet.
104  *
105  * 	ira->ira_zoneid: The zone in which the returned connection must be; the
106  *		zoneid corresponding to the ire_zoneid on the IRE located for
107  *		the packet's destination address.
108  *
109  *	ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110  *		IRAF_TX_SHARED_ADDR flags
111  *
112  *	For TCP connections, the lookup order is as follows:
113  *		5-tuple {src, dst, protocol, local port, remote port}
114  *			lookup in ipcl_conn_fanout table.
115  *		3-tuple {dst, remote port, protocol} lookup in
116  *			ipcl_bind_fanout table.
117  *
118  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
119  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
120  *	these interfaces do not handle cases where a packets belongs
121  *	to multiple UDP clients, which is handled in IP itself.
122  *
123  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
124  * determine which actual zone gets the segment.  This is used only in a
125  * labeled environment.  The matching rules are:
126  *
127  *	- If it's not a multilevel port, then the label on the packet selects
128  *	  the zone.  Unlabeled packets are delivered to the global zone.
129  *
130  *	- If it's a multilevel port, then only the zone registered to receive
131  *	  packets on that port matches.
132  *
133  * Also, in a labeled environment, packet labels need to be checked.  For fully
134  * bound TCP connections, we can assume that the packet label was checked
135  * during connection establishment, and doesn't need to be checked on each
136  * packet.  For others, though, we need to check for strict equality or, for
137  * multilevel ports, membership in the range or set.  This part currently does
138  * a tnrh lookup on each packet, but could be optimized to use cached results
139  * if that were necessary.  (SCTP doesn't come through here, but if it did,
140  * we would apply the same rules as TCP.)
141  *
142  * An implication of the above is that fully-bound TCP sockets must always use
143  * distinct 4-tuples; they can't be discriminated by label alone.
144  *
145  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
146  * as there's no connection set-up handshake and no shared state.
147  *
148  * Labels on looped-back packets within a single zone do not need to be
149  * checked, as all processes in the same zone have the same label.
150  *
151  * Finally, for unlabeled packets received by a labeled system, special rules
152  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
153  * socket in the zone whose label matches the default label of the sender, if
154  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
155  * receiver's label must dominate the sender's default label.
156  *
157  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
158  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
159  *					 ip_stack);
160  *
161  *	Lookup routine to find a exact match for {src, dst, local port,
162  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
163  *	ports are read from the IP and TCP header respectively.
164  *
165  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
166  *					 zoneid, ip_stack);
167  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
168  *					 zoneid, ip_stack);
169  *
170  * 	Lookup routine to find a listener with the tuple {lport, laddr,
171  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
172  * 	parameter interface index is also compared.
173  *
174  * void ipcl_walk(func, arg, ip_stack)
175  *
176  * 	Apply 'func' to every connection available. The 'func' is called as
177  *	(*func)(connp, arg). The walk is non-atomic so connections may be
178  *	created and destroyed during the walk. The CONN_CONDEMNED and
179  *	CONN_INCIPIENT flags ensure that connections which are newly created
180  *	or being destroyed are not selected by the walker.
181  *
182  * Table Updates
183  * -------------
184  *
185  * int ipcl_conn_insert(connp);
186  * int ipcl_conn_insert_v4(connp);
187  * int ipcl_conn_insert_v6(connp);
188  *
189  *	Insert 'connp' in the ipcl_conn_fanout.
190  *	Arguements :
191  *		connp		conn_t to be inserted
192  *
193  *	Return value :
194  *		0		if connp was inserted
195  *		EADDRINUSE	if the connection with the same tuple
196  *				already exists.
197  *
198  * int ipcl_bind_insert(connp);
199  * int ipcl_bind_insert_v4(connp);
200  * int ipcl_bind_insert_v6(connp);
201  *
202  * 	Insert 'connp' in ipcl_bind_fanout.
203  * 	Arguements :
204  * 		connp		conn_t to be inserted
205  *
206  *
207  * void ipcl_hash_remove(connp);
208  *
209  * 	Removes the 'connp' from the connection fanout table.
210  *
211  * Connection Creation/Destruction
212  * -------------------------------
213  *
214  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
215  *
216  * 	Creates a new conn based on the type flag, inserts it into
217  * 	globalhash table.
218  *
219  *	type:	This flag determines the type of conn_t which needs to be
220  *		created i.e., which kmem_cache it comes from.
221  *		IPCL_TCPCONN	indicates a TCP connection
222  *		IPCL_SCTPCONN	indicates a SCTP connection
223  *		IPCL_UDPCONN	indicates a UDP conn_t.
224  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
225  *		IPCL_RTSCONN	indicates a RTS conn_t.
226  *		IPCL_IPCCONN	indicates all other connections.
227  *
228  * void ipcl_conn_destroy(connp)
229  *
230  * 	Destroys the connection state, removes it from the global
231  * 	connection hash table and frees its memory.
232  */
233 
234 #include <sys/types.h>
235 #include <sys/stream.h>
236 #include <sys/stropts.h>
237 #include <sys/sysmacros.h>
238 #include <sys/strsubr.h>
239 #include <sys/strsun.h>
240 #define	_SUN_TPI_VERSION 2
241 #include <sys/ddi.h>
242 #include <sys/cmn_err.h>
243 #include <sys/debug.h>
244 
245 #include <sys/systm.h>
246 #include <sys/param.h>
247 #include <sys/kmem.h>
248 #include <sys/isa_defs.h>
249 #include <inet/common.h>
250 #include <netinet/ip6.h>
251 #include <netinet/icmp6.h>
252 
253 #include <inet/ip.h>
254 #include <inet/ip_if.h>
255 #include <inet/ip_ire.h>
256 #include <inet/ip6.h>
257 #include <inet/ip_ndp.h>
258 #include <inet/ip_impl.h>
259 #include <inet/udp_impl.h>
260 #include <inet/sctp_ip.h>
261 #include <inet/sctp/sctp_impl.h>
262 #include <inet/rawip_impl.h>
263 #include <inet/rts_impl.h>
264 #include <inet/iptun/iptun_impl.h>
265 
266 #include <sys/cpuvar.h>
267 
268 #include <inet/ipclassifier.h>
269 #include <inet/tcp.h>
270 #include <inet/ipsec_impl.h>
271 
272 #include <sys/tsol/tnet.h>
273 #include <sys/sockio.h>
274 
275 /* Old value for compatibility. Setable in /etc/system */
276 uint_t tcp_conn_hash_size = 0;
277 
278 /* New value. Zero means choose automatically.  Setable in /etc/system */
279 uint_t ipcl_conn_hash_size = 0;
280 uint_t ipcl_conn_hash_memfactor = 8192;
281 uint_t ipcl_conn_hash_maxsize = 82500;
282 
283 /* bind/udp fanout table size */
284 uint_t ipcl_bind_fanout_size = 512;
285 uint_t ipcl_udp_fanout_size = 16384;
286 
287 /* Raw socket fanout size.  Must be a power of 2. */
288 uint_t ipcl_raw_fanout_size = 256;
289 
290 /*
291  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
292  * expect that most large deployments would have hundreds of tunnels, and
293  * thousands in the extreme case.
294  */
295 uint_t ipcl_iptun_fanout_size = 6143;
296 
297 /*
298  * Power of 2^N Primes useful for hashing for N of 0-28,
299  * these primes are the nearest prime <= 2^N - 2^(N-2).
300  */
301 
302 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
303 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
304 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
305 		50331599, 100663291, 201326557, 0}
306 
307 /*
308  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
309  * are aligned on cache lines.
310  */
311 typedef union itc_s {
312 	conn_t	itc_conn;
313 	char	itcu_filler[CACHE_ALIGN(conn_s)];
314 } itc_t;
315 
316 struct kmem_cache  *tcp_conn_cache;
317 struct kmem_cache  *ip_conn_cache;
318 extern struct kmem_cache  *sctp_conn_cache;
319 struct kmem_cache  *udp_conn_cache;
320 struct kmem_cache  *rawip_conn_cache;
321 struct kmem_cache  *rts_conn_cache;
322 
323 extern void	tcp_timermp_free(tcp_t *);
324 extern mblk_t	*tcp_timermp_alloc(int);
325 
326 static int	ip_conn_constructor(void *, void *, int);
327 static void	ip_conn_destructor(void *, void *);
328 
329 static int	tcp_conn_constructor(void *, void *, int);
330 static void	tcp_conn_destructor(void *, void *);
331 
332 static int	udp_conn_constructor(void *, void *, int);
333 static void	udp_conn_destructor(void *, void *);
334 
335 static int	rawip_conn_constructor(void *, void *, int);
336 static void	rawip_conn_destructor(void *, void *);
337 
338 static int	rts_conn_constructor(void *, void *, int);
339 static void	rts_conn_destructor(void *, void *);
340 
341 /*
342  * Global (for all stack instances) init routine
343  */
344 void
345 ipcl_g_init(void)
346 {
347 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
348 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
349 	    ip_conn_constructor, ip_conn_destructor,
350 	    NULL, NULL, NULL, 0);
351 
352 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
353 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
354 	    tcp_conn_constructor, tcp_conn_destructor,
355 	    tcp_conn_reclaim, NULL, NULL, 0);
356 
357 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
358 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
359 	    udp_conn_constructor, udp_conn_destructor,
360 	    NULL, NULL, NULL, 0);
361 
362 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
363 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
364 	    rawip_conn_constructor, rawip_conn_destructor,
365 	    NULL, NULL, NULL, 0);
366 
367 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
368 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
369 	    rts_conn_constructor, rts_conn_destructor,
370 	    NULL, NULL, NULL, 0);
371 }
372 
373 /*
374  * ipclassifier intialization routine, sets up hash tables.
375  */
376 void
377 ipcl_init(ip_stack_t *ipst)
378 {
379 	int i;
380 	int sizes[] = P2Ps();
381 
382 	/*
383 	 * Calculate size of conn fanout table from /etc/system settings
384 	 */
385 	if (ipcl_conn_hash_size != 0) {
386 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
387 	} else if (tcp_conn_hash_size != 0) {
388 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
389 	} else {
390 		extern pgcnt_t freemem;
391 
392 		ipst->ips_ipcl_conn_fanout_size =
393 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
394 
395 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
396 			ipst->ips_ipcl_conn_fanout_size =
397 			    ipcl_conn_hash_maxsize;
398 		}
399 	}
400 
401 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
402 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
403 			break;
404 		}
405 	}
406 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
407 		/* Out of range, use the 2^16 value */
408 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
409 	}
410 
411 	/* Take values from /etc/system */
412 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
413 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
414 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
415 	ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
416 
417 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
418 
419 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
420 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
421 
422 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
423 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
424 		    MUTEX_DEFAULT, NULL);
425 	}
426 
427 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
428 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
429 
430 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
431 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
432 		    MUTEX_DEFAULT, NULL);
433 	}
434 
435 	ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
436 	    sizeof (connf_t), KM_SLEEP);
437 	for (i = 0; i < IPPROTO_MAX; i++) {
438 		mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
439 		    MUTEX_DEFAULT, NULL);
440 	}
441 
442 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
443 	    sizeof (connf_t), KM_SLEEP);
444 	for (i = 0; i < IPPROTO_MAX; i++) {
445 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
446 		    MUTEX_DEFAULT, NULL);
447 	}
448 
449 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
450 	mutex_init(&ipst->ips_rts_clients->connf_lock,
451 	    NULL, MUTEX_DEFAULT, NULL);
452 
453 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
454 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
455 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
456 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
457 		    MUTEX_DEFAULT, NULL);
458 	}
459 
460 	ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
461 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
462 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
463 		mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
464 		    MUTEX_DEFAULT, NULL);
465 	}
466 
467 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
468 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
469 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
470 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
471 		    MUTEX_DEFAULT, NULL);
472 	}
473 
474 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
475 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
476 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
477 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
478 		    NULL, MUTEX_DEFAULT, NULL);
479 	}
480 }
481 
482 void
483 ipcl_g_destroy(void)
484 {
485 	kmem_cache_destroy(ip_conn_cache);
486 	kmem_cache_destroy(tcp_conn_cache);
487 	kmem_cache_destroy(udp_conn_cache);
488 	kmem_cache_destroy(rawip_conn_cache);
489 	kmem_cache_destroy(rts_conn_cache);
490 }
491 
492 /*
493  * All user-level and kernel use of the stack must be gone
494  * by now.
495  */
496 void
497 ipcl_destroy(ip_stack_t *ipst)
498 {
499 	int i;
500 
501 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
502 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
503 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
504 	}
505 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
506 	    sizeof (connf_t));
507 	ipst->ips_ipcl_conn_fanout = NULL;
508 
509 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
510 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
511 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
512 	}
513 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
514 	    sizeof (connf_t));
515 	ipst->ips_ipcl_bind_fanout = NULL;
516 
517 	for (i = 0; i < IPPROTO_MAX; i++) {
518 		ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
519 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
520 	}
521 	kmem_free(ipst->ips_ipcl_proto_fanout_v4,
522 	    IPPROTO_MAX * sizeof (connf_t));
523 	ipst->ips_ipcl_proto_fanout_v4 = NULL;
524 
525 	for (i = 0; i < IPPROTO_MAX; i++) {
526 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
527 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
528 	}
529 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
530 	    IPPROTO_MAX * sizeof (connf_t));
531 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
532 
533 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
534 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
535 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
536 	}
537 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
538 	    sizeof (connf_t));
539 	ipst->ips_ipcl_udp_fanout = NULL;
540 
541 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
542 		ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
543 		mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
544 	}
545 	kmem_free(ipst->ips_ipcl_iptun_fanout,
546 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
547 	ipst->ips_ipcl_iptun_fanout = NULL;
548 
549 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
550 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
551 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
552 	}
553 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
554 	    sizeof (connf_t));
555 	ipst->ips_ipcl_raw_fanout = NULL;
556 
557 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
558 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
559 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
560 	}
561 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
562 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
563 	ipst->ips_ipcl_globalhash_fanout = NULL;
564 
565 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
566 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
567 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
568 	ipst->ips_rts_clients = NULL;
569 }
570 
571 /*
572  * conn creation routine. initialize the conn, sets the reference
573  * and inserts it in the global hash table.
574  */
575 conn_t *
576 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
577 {
578 	conn_t	*connp;
579 	struct kmem_cache *conn_cache;
580 
581 	switch (type) {
582 	case IPCL_SCTPCONN:
583 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
584 			return (NULL);
585 		sctp_conn_init(connp);
586 		netstack_hold(ns);
587 		connp->conn_netstack = ns;
588 		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
589 		connp->conn_ixa->ixa_conn_id = (long)connp;
590 		ipcl_globalhash_insert(connp);
591 		return (connp);
592 
593 	case IPCL_TCPCONN:
594 		conn_cache = tcp_conn_cache;
595 		break;
596 
597 	case IPCL_UDPCONN:
598 		conn_cache = udp_conn_cache;
599 		break;
600 
601 	case IPCL_RAWIPCONN:
602 		conn_cache = rawip_conn_cache;
603 		break;
604 
605 	case IPCL_RTSCONN:
606 		conn_cache = rts_conn_cache;
607 		break;
608 
609 	case IPCL_IPCCONN:
610 		conn_cache = ip_conn_cache;
611 		break;
612 
613 	default:
614 		connp = NULL;
615 		ASSERT(0);
616 	}
617 
618 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
619 		return (NULL);
620 
621 	connp->conn_ref = 1;
622 	netstack_hold(ns);
623 	connp->conn_netstack = ns;
624 	connp->conn_ixa->ixa_ipst = ns->netstack_ip;
625 	connp->conn_ixa->ixa_conn_id = (long)connp;
626 	ipcl_globalhash_insert(connp);
627 	return (connp);
628 }
629 
630 void
631 ipcl_conn_destroy(conn_t *connp)
632 {
633 	mblk_t	*mp;
634 	netstack_t	*ns = connp->conn_netstack;
635 
636 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
637 	ASSERT(connp->conn_ref == 0);
638 
639 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
640 
641 	if (connp->conn_cred != NULL) {
642 		crfree(connp->conn_cred);
643 		connp->conn_cred = NULL;
644 		/* ixa_cred done in ipcl_conn_cleanup below */
645 	}
646 
647 	if (connp->conn_ht_iphc != NULL) {
648 		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
649 		connp->conn_ht_iphc = NULL;
650 		connp->conn_ht_iphc_allocated = 0;
651 		connp->conn_ht_iphc_len = 0;
652 		connp->conn_ht_ulp = NULL;
653 		connp->conn_ht_ulp_len = 0;
654 	}
655 	ip_pkt_free(&connp->conn_xmit_ipp);
656 
657 	ipcl_globalhash_remove(connp);
658 
659 	if (connp->conn_latch != NULL) {
660 		IPLATCH_REFRELE(connp->conn_latch);
661 		connp->conn_latch = NULL;
662 	}
663 	if (connp->conn_latch_in_policy != NULL) {
664 		IPPOL_REFRELE(connp->conn_latch_in_policy);
665 		connp->conn_latch_in_policy = NULL;
666 	}
667 	if (connp->conn_latch_in_action != NULL) {
668 		IPACT_REFRELE(connp->conn_latch_in_action);
669 		connp->conn_latch_in_action = NULL;
670 	}
671 	if (connp->conn_policy != NULL) {
672 		IPPH_REFRELE(connp->conn_policy, ns);
673 		connp->conn_policy = NULL;
674 	}
675 
676 	if (connp->conn_ipsec_opt_mp != NULL) {
677 		freemsg(connp->conn_ipsec_opt_mp);
678 		connp->conn_ipsec_opt_mp = NULL;
679 	}
680 
681 	if (connp->conn_flags & IPCL_TCPCONN) {
682 		tcp_t *tcp = connp->conn_tcp;
683 
684 		tcp_free(tcp);
685 		mp = tcp->tcp_timercache;
686 
687 		tcp->tcp_tcps = NULL;
688 
689 		/*
690 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
691 		 * the mblk.
692 		 */
693 		if (tcp->tcp_rsrv_mp != NULL) {
694 			freeb(tcp->tcp_rsrv_mp);
695 			tcp->tcp_rsrv_mp = NULL;
696 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
697 		}
698 
699 		ipcl_conn_cleanup(connp);
700 		connp->conn_flags = IPCL_TCPCONN;
701 		if (ns != NULL) {
702 			ASSERT(tcp->tcp_tcps == NULL);
703 			connp->conn_netstack = NULL;
704 			connp->conn_ixa->ixa_ipst = NULL;
705 			netstack_rele(ns);
706 		}
707 
708 		bzero(tcp, sizeof (tcp_t));
709 
710 		tcp->tcp_timercache = mp;
711 		tcp->tcp_connp = connp;
712 		kmem_cache_free(tcp_conn_cache, connp);
713 		return;
714 	}
715 
716 	if (connp->conn_flags & IPCL_SCTPCONN) {
717 		ASSERT(ns != NULL);
718 		sctp_free(connp);
719 		return;
720 	}
721 
722 	ipcl_conn_cleanup(connp);
723 	if (ns != NULL) {
724 		connp->conn_netstack = NULL;
725 		connp->conn_ixa->ixa_ipst = NULL;
726 		netstack_rele(ns);
727 	}
728 
729 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
730 	if (connp->conn_flags & IPCL_UDPCONN) {
731 		connp->conn_flags = IPCL_UDPCONN;
732 		kmem_cache_free(udp_conn_cache, connp);
733 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
734 		connp->conn_flags = IPCL_RAWIPCONN;
735 		connp->conn_proto = IPPROTO_ICMP;
736 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
737 		kmem_cache_free(rawip_conn_cache, connp);
738 	} else if (connp->conn_flags & IPCL_RTSCONN) {
739 		connp->conn_flags = IPCL_RTSCONN;
740 		kmem_cache_free(rts_conn_cache, connp);
741 	} else {
742 		connp->conn_flags = IPCL_IPCCONN;
743 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
744 		ASSERT(connp->conn_priv == NULL);
745 		kmem_cache_free(ip_conn_cache, connp);
746 	}
747 }
748 
749 /*
750  * Running in cluster mode - deregister listener information
751  */
752 static void
753 ipcl_conn_unlisten(conn_t *connp)
754 {
755 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
756 	ASSERT(connp->conn_lport != 0);
757 
758 	if (cl_inet_unlisten != NULL) {
759 		sa_family_t	addr_family;
760 		uint8_t		*laddrp;
761 
762 		if (connp->conn_ipversion == IPV6_VERSION) {
763 			addr_family = AF_INET6;
764 			laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
765 		} else {
766 			addr_family = AF_INET;
767 			laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
768 		}
769 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
770 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
771 	}
772 	connp->conn_flags &= ~IPCL_CL_LISTENER;
773 }
774 
775 /*
776  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
777  * which table the conn belonged to). So for debugging we can see which hash
778  * table this connection was in.
779  */
780 #define	IPCL_HASH_REMOVE(connp)	{					\
781 	connf_t	*connfp = (connp)->conn_fanout;				\
782 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
783 	if (connfp != NULL) {						\
784 		mutex_enter(&connfp->connf_lock);			\
785 		if ((connp)->conn_next != NULL)				\
786 			(connp)->conn_next->conn_prev =			\
787 			    (connp)->conn_prev;				\
788 		if ((connp)->conn_prev != NULL)				\
789 			(connp)->conn_prev->conn_next =			\
790 			    (connp)->conn_next;				\
791 		else							\
792 			connfp->connf_head = (connp)->conn_next;	\
793 		(connp)->conn_fanout = NULL;				\
794 		(connp)->conn_next = NULL;				\
795 		(connp)->conn_prev = NULL;				\
796 		(connp)->conn_flags |= IPCL_REMOVED;			\
797 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
798 			ipcl_conn_unlisten((connp));			\
799 		CONN_DEC_REF((connp));					\
800 		mutex_exit(&connfp->connf_lock);			\
801 	}								\
802 }
803 
804 void
805 ipcl_hash_remove(conn_t *connp)
806 {
807 	uint8_t		protocol = connp->conn_proto;
808 
809 	IPCL_HASH_REMOVE(connp);
810 	if (protocol == IPPROTO_RSVP)
811 		ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
812 }
813 
814 /*
815  * The whole purpose of this function is allow removal of
816  * a conn_t from the connected hash for timewait reclaim.
817  * This is essentially a TW reclaim fastpath where timewait
818  * collector checks under fanout lock (so no one else can
819  * get access to the conn_t) that refcnt is 2 i.e. one for
820  * TCP and one for the classifier hash list. If ref count
821  * is indeed 2, we can just remove the conn under lock and
822  * avoid cleaning up the conn under squeue. This gives us
823  * improved performance.
824  */
825 void
826 ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
827 {
828 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
829 	ASSERT(MUTEX_HELD(&connp->conn_lock));
830 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
831 
832 	if ((connp)->conn_next != NULL) {
833 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
834 	}
835 	if ((connp)->conn_prev != NULL) {
836 		(connp)->conn_prev->conn_next = (connp)->conn_next;
837 	} else {
838 		connfp->connf_head = (connp)->conn_next;
839 	}
840 	(connp)->conn_fanout = NULL;
841 	(connp)->conn_next = NULL;
842 	(connp)->conn_prev = NULL;
843 	(connp)->conn_flags |= IPCL_REMOVED;
844 	ASSERT((connp)->conn_ref == 2);
845 	(connp)->conn_ref--;
846 }
847 
848 #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
849 	ASSERT((connp)->conn_fanout == NULL);				\
850 	ASSERT((connp)->conn_next == NULL);				\
851 	ASSERT((connp)->conn_prev == NULL);				\
852 	if ((connfp)->connf_head != NULL) {				\
853 		(connfp)->connf_head->conn_prev = (connp);		\
854 		(connp)->conn_next = (connfp)->connf_head;		\
855 	}								\
856 	(connp)->conn_fanout = (connfp);				\
857 	(connfp)->connf_head = (connp);					\
858 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
859 	    IPCL_CONNECTED;						\
860 	CONN_INC_REF(connp);						\
861 }
862 
863 #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
864 	IPCL_HASH_REMOVE((connp));					\
865 	mutex_enter(&(connfp)->connf_lock);				\
866 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
867 	mutex_exit(&(connfp)->connf_lock);				\
868 }
869 
870 #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
871 	conn_t *pconnp = NULL, *nconnp;					\
872 	IPCL_HASH_REMOVE((connp));					\
873 	mutex_enter(&(connfp)->connf_lock);				\
874 	nconnp = (connfp)->connf_head;					\
875 	while (nconnp != NULL &&					\
876 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {		\
877 		pconnp = nconnp;					\
878 		nconnp = nconnp->conn_next;				\
879 	}								\
880 	if (pconnp != NULL) {						\
881 		pconnp->conn_next = (connp);				\
882 		(connp)->conn_prev = pconnp;				\
883 	} else {							\
884 		(connfp)->connf_head = (connp);				\
885 	}								\
886 	if (nconnp != NULL) {						\
887 		(connp)->conn_next = nconnp;				\
888 		nconnp->conn_prev = (connp);				\
889 	}								\
890 	(connp)->conn_fanout = (connfp);				\
891 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
892 	    IPCL_BOUND;							\
893 	CONN_INC_REF(connp);						\
894 	mutex_exit(&(connfp)->connf_lock);				\
895 }
896 
897 #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
898 	conn_t **list, *prev, *next;					\
899 	boolean_t isv4mapped =						\
900 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);		\
901 	IPCL_HASH_REMOVE((connp));					\
902 	mutex_enter(&(connfp)->connf_lock);				\
903 	list = &(connfp)->connf_head;					\
904 	prev = NULL;							\
905 	while ((next = *list) != NULL) {				\
906 		if (isv4mapped &&					\
907 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&	\
908 		    connp->conn_zoneid == next->conn_zoneid) {		\
909 			(connp)->conn_next = next;			\
910 			if (prev != NULL)				\
911 				prev = next->conn_prev;			\
912 			next->conn_prev = (connp);			\
913 			break;						\
914 		}							\
915 		list = &next->conn_next;				\
916 		prev = next;						\
917 	}								\
918 	(connp)->conn_prev = prev;					\
919 	*list = (connp);						\
920 	(connp)->conn_fanout = (connfp);				\
921 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
922 	    IPCL_BOUND;							\
923 	CONN_INC_REF((connp));						\
924 	mutex_exit(&(connfp)->connf_lock);				\
925 }
926 
927 void
928 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
929 {
930 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
931 }
932 
933 /*
934  * Because the classifier is used to classify inbound packets, the destination
935  * address is meant to be our local tunnel address (tunnel source), and the
936  * source the remote tunnel address (tunnel destination).
937  *
938  * Note that conn_proto can't be used for fanout since the upper protocol
939  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
940  */
941 conn_t *
942 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
943 {
944 	connf_t	*connfp;
945 	conn_t	*connp;
946 
947 	/* first look for IPv4 tunnel links */
948 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
949 	mutex_enter(&connfp->connf_lock);
950 	for (connp = connfp->connf_head; connp != NULL;
951 	    connp = connp->conn_next) {
952 		if (IPCL_IPTUN_MATCH(connp, *dst, *src))
953 			break;
954 	}
955 	if (connp != NULL)
956 		goto done;
957 
958 	mutex_exit(&connfp->connf_lock);
959 
960 	/* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
961 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
962 	    INADDR_ANY)];
963 	mutex_enter(&connfp->connf_lock);
964 	for (connp = connfp->connf_head; connp != NULL;
965 	    connp = connp->conn_next) {
966 		if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
967 			break;
968 	}
969 done:
970 	if (connp != NULL)
971 		CONN_INC_REF(connp);
972 	mutex_exit(&connfp->connf_lock);
973 	return (connp);
974 }
975 
976 conn_t *
977 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
978 {
979 	connf_t	*connfp;
980 	conn_t	*connp;
981 
982 	/* Look for an IPv6 tunnel link */
983 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
984 	mutex_enter(&connfp->connf_lock);
985 	for (connp = connfp->connf_head; connp != NULL;
986 	    connp = connp->conn_next) {
987 		if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
988 			CONN_INC_REF(connp);
989 			break;
990 		}
991 	}
992 	mutex_exit(&connfp->connf_lock);
993 	return (connp);
994 }
995 
996 /*
997  * This function is used only for inserting SCTP raw socket now.
998  * This may change later.
999  *
1000  * Note that only one raw socket can be bound to a port.  The param
1001  * lport is in network byte order.
1002  */
1003 static int
1004 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1005 {
1006 	connf_t	*connfp;
1007 	conn_t	*oconnp;
1008 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1009 
1010 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1011 
1012 	/* Check for existing raw socket already bound to the port. */
1013 	mutex_enter(&connfp->connf_lock);
1014 	for (oconnp = connfp->connf_head; oconnp != NULL;
1015 	    oconnp = oconnp->conn_next) {
1016 		if (oconnp->conn_lport == lport &&
1017 		    oconnp->conn_zoneid == connp->conn_zoneid &&
1018 		    oconnp->conn_family == connp->conn_family &&
1019 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1020 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1021 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1022 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1023 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1024 		    &connp->conn_laddr_v6))) {
1025 			break;
1026 		}
1027 	}
1028 	mutex_exit(&connfp->connf_lock);
1029 	if (oconnp != NULL)
1030 		return (EADDRNOTAVAIL);
1031 
1032 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1033 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1034 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1035 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1036 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1037 		} else {
1038 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1039 		}
1040 	} else {
1041 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1042 	}
1043 	return (0);
1044 }
1045 
1046 static int
1047 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1048 {
1049 	connf_t	*connfp;
1050 	conn_t	*tconnp;
1051 	ipaddr_t laddr = connp->conn_laddr_v4;
1052 	ipaddr_t faddr = connp->conn_faddr_v4;
1053 
1054 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1055 	mutex_enter(&connfp->connf_lock);
1056 	for (tconnp = connfp->connf_head; tconnp != NULL;
1057 	    tconnp = tconnp->conn_next) {
1058 		if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1059 			/* A tunnel is already bound to these addresses. */
1060 			mutex_exit(&connfp->connf_lock);
1061 			return (EADDRINUSE);
1062 		}
1063 	}
1064 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1065 	mutex_exit(&connfp->connf_lock);
1066 	return (0);
1067 }
1068 
1069 static int
1070 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1071 {
1072 	connf_t	*connfp;
1073 	conn_t	*tconnp;
1074 	in6_addr_t *laddr = &connp->conn_laddr_v6;
1075 	in6_addr_t *faddr = &connp->conn_faddr_v6;
1076 
1077 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1078 	mutex_enter(&connfp->connf_lock);
1079 	for (tconnp = connfp->connf_head; tconnp != NULL;
1080 	    tconnp = tconnp->conn_next) {
1081 		if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1082 			/* A tunnel is already bound to these addresses. */
1083 			mutex_exit(&connfp->connf_lock);
1084 			return (EADDRINUSE);
1085 		}
1086 	}
1087 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1088 	mutex_exit(&connfp->connf_lock);
1089 	return (0);
1090 }
1091 
1092 /*
1093  * Check for a MAC exemption conflict on a labeled system.  Note that for
1094  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1095  * transport layer.  This check is for binding all other protocols.
1096  *
1097  * Returns true if there's a conflict.
1098  */
1099 static boolean_t
1100 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1101 {
1102 	connf_t	*connfp;
1103 	conn_t *tconn;
1104 
1105 	connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1106 	mutex_enter(&connfp->connf_lock);
1107 	for (tconn = connfp->connf_head; tconn != NULL;
1108 	    tconn = tconn->conn_next) {
1109 		/* We don't allow v4 fallback for v6 raw socket */
1110 		if (connp->conn_family != tconn->conn_family)
1111 			continue;
1112 		/* If neither is exempt, then there's no conflict */
1113 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1114 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1115 			continue;
1116 		/* We are only concerned about sockets for a different zone */
1117 		if (connp->conn_zoneid == tconn->conn_zoneid)
1118 			continue;
1119 		/* If both are bound to different specific addrs, ok */
1120 		if (connp->conn_laddr_v4 != INADDR_ANY &&
1121 		    tconn->conn_laddr_v4 != INADDR_ANY &&
1122 		    connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1123 			continue;
1124 		/* These two conflict; fail */
1125 		break;
1126 	}
1127 	mutex_exit(&connfp->connf_lock);
1128 	return (tconn != NULL);
1129 }
1130 
1131 static boolean_t
1132 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1133 {
1134 	connf_t	*connfp;
1135 	conn_t *tconn;
1136 
1137 	connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1138 	mutex_enter(&connfp->connf_lock);
1139 	for (tconn = connfp->connf_head; tconn != NULL;
1140 	    tconn = tconn->conn_next) {
1141 		/* We don't allow v4 fallback for v6 raw socket */
1142 		if (connp->conn_family != tconn->conn_family)
1143 			continue;
1144 		/* If neither is exempt, then there's no conflict */
1145 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1146 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1147 			continue;
1148 		/* We are only concerned about sockets for a different zone */
1149 		if (connp->conn_zoneid == tconn->conn_zoneid)
1150 			continue;
1151 		/* If both are bound to different addrs, ok */
1152 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1153 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1154 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1155 		    &tconn->conn_laddr_v6))
1156 			continue;
1157 		/* These two conflict; fail */
1158 		break;
1159 	}
1160 	mutex_exit(&connfp->connf_lock);
1161 	return (tconn != NULL);
1162 }
1163 
1164 /*
1165  * (v4, v6) bind hash insertion routines
1166  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1167  */
1168 
1169 int
1170 ipcl_bind_insert(conn_t *connp)
1171 {
1172 	if (connp->conn_ipversion == IPV6_VERSION)
1173 		return (ipcl_bind_insert_v6(connp));
1174 	else
1175 		return (ipcl_bind_insert_v4(connp));
1176 }
1177 
1178 int
1179 ipcl_bind_insert_v4(conn_t *connp)
1180 {
1181 	connf_t	*connfp;
1182 	int	ret = 0;
1183 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1184 	uint16_t	lport = connp->conn_lport;
1185 	uint8_t		protocol = connp->conn_proto;
1186 
1187 	if (IPCL_IS_IPTUN(connp))
1188 		return (ipcl_iptun_hash_insert(connp, ipst));
1189 
1190 	switch (protocol) {
1191 	default:
1192 		if (is_system_labeled() &&
1193 		    check_exempt_conflict_v4(connp, ipst))
1194 			return (EADDRINUSE);
1195 		/* FALLTHROUGH */
1196 	case IPPROTO_UDP:
1197 		if (protocol == IPPROTO_UDP) {
1198 			connfp = &ipst->ips_ipcl_udp_fanout[
1199 			    IPCL_UDP_HASH(lport, ipst)];
1200 		} else {
1201 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1202 		}
1203 
1204 		if (connp->conn_faddr_v4 != INADDR_ANY) {
1205 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1206 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
1207 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1208 		} else {
1209 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1210 		}
1211 		if (protocol == IPPROTO_RSVP)
1212 			ill_set_inputfn_all(ipst);
1213 		break;
1214 
1215 	case IPPROTO_TCP:
1216 		/* Insert it in the Bind Hash */
1217 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1218 		connfp = &ipst->ips_ipcl_bind_fanout[
1219 		    IPCL_BIND_HASH(lport, ipst)];
1220 		if (connp->conn_laddr_v4 != INADDR_ANY) {
1221 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1222 		} else {
1223 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1224 		}
1225 		if (cl_inet_listen != NULL) {
1226 			ASSERT(connp->conn_ipversion == IPV4_VERSION);
1227 			connp->conn_flags |= IPCL_CL_LISTENER;
1228 			(*cl_inet_listen)(
1229 			    connp->conn_netstack->netstack_stackid,
1230 			    IPPROTO_TCP, AF_INET,
1231 			    (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1232 		}
1233 		break;
1234 
1235 	case IPPROTO_SCTP:
1236 		ret = ipcl_sctp_hash_insert(connp, lport);
1237 		break;
1238 	}
1239 
1240 	return (ret);
1241 }
1242 
1243 int
1244 ipcl_bind_insert_v6(conn_t *connp)
1245 {
1246 	connf_t		*connfp;
1247 	int		ret = 0;
1248 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1249 	uint16_t	lport = connp->conn_lport;
1250 	uint8_t		protocol = connp->conn_proto;
1251 
1252 	if (IPCL_IS_IPTUN(connp)) {
1253 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
1254 	}
1255 
1256 	switch (protocol) {
1257 	default:
1258 		if (is_system_labeled() &&
1259 		    check_exempt_conflict_v6(connp, ipst))
1260 			return (EADDRINUSE);
1261 		/* FALLTHROUGH */
1262 	case IPPROTO_UDP:
1263 		if (protocol == IPPROTO_UDP) {
1264 			connfp = &ipst->ips_ipcl_udp_fanout[
1265 			    IPCL_UDP_HASH(lport, ipst)];
1266 		} else {
1267 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1268 		}
1269 
1270 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1271 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1272 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1273 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1274 		} else {
1275 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1276 		}
1277 		break;
1278 
1279 	case IPPROTO_TCP:
1280 		/* Insert it in the Bind Hash */
1281 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1282 		connfp = &ipst->ips_ipcl_bind_fanout[
1283 		    IPCL_BIND_HASH(lport, ipst)];
1284 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1285 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1286 		} else {
1287 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1288 		}
1289 		if (cl_inet_listen != NULL) {
1290 			sa_family_t	addr_family;
1291 			uint8_t		*laddrp;
1292 
1293 			if (connp->conn_ipversion == IPV6_VERSION) {
1294 				addr_family = AF_INET6;
1295 				laddrp =
1296 				    (uint8_t *)&connp->conn_bound_addr_v6;
1297 			} else {
1298 				addr_family = AF_INET;
1299 				laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1300 			}
1301 			connp->conn_flags |= IPCL_CL_LISTENER;
1302 			(*cl_inet_listen)(
1303 			    connp->conn_netstack->netstack_stackid,
1304 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1305 		}
1306 		break;
1307 
1308 	case IPPROTO_SCTP:
1309 		ret = ipcl_sctp_hash_insert(connp, lport);
1310 		break;
1311 	}
1312 
1313 	return (ret);
1314 }
1315 
1316 /*
1317  * ipcl_conn_hash insertion routines.
1318  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1319  */
1320 
1321 int
1322 ipcl_conn_insert(conn_t *connp)
1323 {
1324 	if (connp->conn_ipversion == IPV6_VERSION)
1325 		return (ipcl_conn_insert_v6(connp));
1326 	else
1327 		return (ipcl_conn_insert_v4(connp));
1328 }
1329 
1330 int
1331 ipcl_conn_insert_v4(conn_t *connp)
1332 {
1333 	connf_t		*connfp;
1334 	conn_t		*tconnp;
1335 	int		ret = 0;
1336 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1337 	uint16_t	lport = connp->conn_lport;
1338 	uint8_t		protocol = connp->conn_proto;
1339 
1340 	if (IPCL_IS_IPTUN(connp))
1341 		return (ipcl_iptun_hash_insert(connp, ipst));
1342 
1343 	switch (protocol) {
1344 	case IPPROTO_TCP:
1345 		/*
1346 		 * For TCP, we check whether the connection tuple already
1347 		 * exists before allowing the connection to proceed.  We
1348 		 * also allow indexing on the zoneid. This is to allow
1349 		 * multiple shared stack zones to have the same tcp
1350 		 * connection tuple. In practice this only happens for
1351 		 * INADDR_LOOPBACK as it's the only local address which
1352 		 * doesn't have to be unique.
1353 		 */
1354 		connfp = &ipst->ips_ipcl_conn_fanout[
1355 		    IPCL_CONN_HASH(connp->conn_faddr_v4,
1356 		    connp->conn_ports, ipst)];
1357 		mutex_enter(&connfp->connf_lock);
1358 		for (tconnp = connfp->connf_head; tconnp != NULL;
1359 		    tconnp = tconnp->conn_next) {
1360 			if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1361 			    connp->conn_faddr_v4, connp->conn_laddr_v4,
1362 			    connp->conn_ports) &&
1363 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1364 				/* Already have a conn. bail out */
1365 				mutex_exit(&connfp->connf_lock);
1366 				return (EADDRINUSE);
1367 			}
1368 		}
1369 		if (connp->conn_fanout != NULL) {
1370 			/*
1371 			 * Probably a XTI/TLI application trying to do a
1372 			 * rebind. Let it happen.
1373 			 */
1374 			mutex_exit(&connfp->connf_lock);
1375 			IPCL_HASH_REMOVE(connp);
1376 			mutex_enter(&connfp->connf_lock);
1377 		}
1378 
1379 		ASSERT(connp->conn_recv != NULL);
1380 		ASSERT(connp->conn_recvicmp != NULL);
1381 
1382 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1383 		mutex_exit(&connfp->connf_lock);
1384 		break;
1385 
1386 	case IPPROTO_SCTP:
1387 		/*
1388 		 * The raw socket may have already been bound, remove it
1389 		 * from the hash first.
1390 		 */
1391 		IPCL_HASH_REMOVE(connp);
1392 		ret = ipcl_sctp_hash_insert(connp, lport);
1393 		break;
1394 
1395 	default:
1396 		/*
1397 		 * Check for conflicts among MAC exempt bindings.  For
1398 		 * transports with port numbers, this is done by the upper
1399 		 * level per-transport binding logic.  For all others, it's
1400 		 * done here.
1401 		 */
1402 		if (is_system_labeled() &&
1403 		    check_exempt_conflict_v4(connp, ipst))
1404 			return (EADDRINUSE);
1405 		/* FALLTHROUGH */
1406 
1407 	case IPPROTO_UDP:
1408 		if (protocol == IPPROTO_UDP) {
1409 			connfp = &ipst->ips_ipcl_udp_fanout[
1410 			    IPCL_UDP_HASH(lport, ipst)];
1411 		} else {
1412 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1413 		}
1414 
1415 		if (connp->conn_faddr_v4 != INADDR_ANY) {
1416 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1417 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
1418 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1419 		} else {
1420 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1421 		}
1422 		break;
1423 	}
1424 
1425 	return (ret);
1426 }
1427 
1428 int
1429 ipcl_conn_insert_v6(conn_t *connp)
1430 {
1431 	connf_t		*connfp;
1432 	conn_t		*tconnp;
1433 	int		ret = 0;
1434 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1435 	uint16_t	lport = connp->conn_lport;
1436 	uint8_t		protocol = connp->conn_proto;
1437 	uint_t		ifindex = connp->conn_bound_if;
1438 
1439 	if (IPCL_IS_IPTUN(connp))
1440 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
1441 
1442 	switch (protocol) {
1443 	case IPPROTO_TCP:
1444 
1445 		/*
1446 		 * For tcp, we check whether the connection tuple already
1447 		 * exists before allowing the connection to proceed.  We
1448 		 * also allow indexing on the zoneid. This is to allow
1449 		 * multiple shared stack zones to have the same tcp
1450 		 * connection tuple. In practice this only happens for
1451 		 * ipv6_loopback as it's the only local address which
1452 		 * doesn't have to be unique.
1453 		 */
1454 		connfp = &ipst->ips_ipcl_conn_fanout[
1455 		    IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1456 		    ipst)];
1457 		mutex_enter(&connfp->connf_lock);
1458 		for (tconnp = connfp->connf_head; tconnp != NULL;
1459 		    tconnp = tconnp->conn_next) {
1460 			/* NOTE: need to match zoneid. Bug in onnv-gate */
1461 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1462 			    connp->conn_faddr_v6, connp->conn_laddr_v6,
1463 			    connp->conn_ports) &&
1464 			    (tconnp->conn_bound_if == 0 ||
1465 			    tconnp->conn_bound_if == ifindex) &&
1466 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1467 				/* Already have a conn. bail out */
1468 				mutex_exit(&connfp->connf_lock);
1469 				return (EADDRINUSE);
1470 			}
1471 		}
1472 		if (connp->conn_fanout != NULL) {
1473 			/*
1474 			 * Probably a XTI/TLI application trying to do a
1475 			 * rebind. Let it happen.
1476 			 */
1477 			mutex_exit(&connfp->connf_lock);
1478 			IPCL_HASH_REMOVE(connp);
1479 			mutex_enter(&connfp->connf_lock);
1480 		}
1481 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1482 		mutex_exit(&connfp->connf_lock);
1483 		break;
1484 
1485 	case IPPROTO_SCTP:
1486 		IPCL_HASH_REMOVE(connp);
1487 		ret = ipcl_sctp_hash_insert(connp, lport);
1488 		break;
1489 
1490 	default:
1491 		if (is_system_labeled() &&
1492 		    check_exempt_conflict_v6(connp, ipst))
1493 			return (EADDRINUSE);
1494 		/* FALLTHROUGH */
1495 	case IPPROTO_UDP:
1496 		if (protocol == IPPROTO_UDP) {
1497 			connfp = &ipst->ips_ipcl_udp_fanout[
1498 			    IPCL_UDP_HASH(lport, ipst)];
1499 		} else {
1500 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1501 		}
1502 
1503 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1504 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1505 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1506 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1507 		} else {
1508 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1509 		}
1510 		break;
1511 	}
1512 
1513 	return (ret);
1514 }
1515 
1516 /*
1517  * v4 packet classifying function. looks up the fanout table to
1518  * find the conn, the packet belongs to. returns the conn with
1519  * the reference held, null otherwise.
1520  *
1521  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1522  * Lookup" comment block are applied.  Labels are also checked as described
1523  * above.  If the packet is from the inside (looped back), and is from the same
1524  * zone, then label checks are omitted.
1525  */
1526 conn_t *
1527 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1528     ip_recv_attr_t *ira, ip_stack_t *ipst)
1529 {
1530 	ipha_t	*ipha;
1531 	connf_t	*connfp, *bind_connfp;
1532 	uint16_t lport;
1533 	uint16_t fport;
1534 	uint32_t ports;
1535 	conn_t	*connp;
1536 	uint16_t  *up;
1537 	zoneid_t	zoneid = ira->ira_zoneid;
1538 
1539 	ipha = (ipha_t *)mp->b_rptr;
1540 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1541 
1542 	switch (protocol) {
1543 	case IPPROTO_TCP:
1544 		ports = *(uint32_t *)up;
1545 		connfp =
1546 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1547 		    ports, ipst)];
1548 		mutex_enter(&connfp->connf_lock);
1549 		for (connp = connfp->connf_head; connp != NULL;
1550 		    connp = connp->conn_next) {
1551 			if (IPCL_CONN_MATCH(connp, protocol,
1552 			    ipha->ipha_src, ipha->ipha_dst, ports) &&
1553 			    (connp->conn_zoneid == zoneid ||
1554 			    connp->conn_allzones ||
1555 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1556 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1557 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1558 				break;
1559 		}
1560 
1561 		if (connp != NULL) {
1562 			/*
1563 			 * We have a fully-bound TCP connection.
1564 			 *
1565 			 * For labeled systems, there's no need to check the
1566 			 * label here.  It's known to be good as we checked
1567 			 * before allowing the connection to become bound.
1568 			 */
1569 			CONN_INC_REF(connp);
1570 			mutex_exit(&connfp->connf_lock);
1571 			return (connp);
1572 		}
1573 
1574 		mutex_exit(&connfp->connf_lock);
1575 		lport = up[1];
1576 		bind_connfp =
1577 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1578 		mutex_enter(&bind_connfp->connf_lock);
1579 		for (connp = bind_connfp->connf_head; connp != NULL;
1580 		    connp = connp->conn_next) {
1581 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1582 			    lport) &&
1583 			    (connp->conn_zoneid == zoneid ||
1584 			    connp->conn_allzones ||
1585 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1586 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1587 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1588 				break;
1589 		}
1590 
1591 		/*
1592 		 * If the matching connection is SLP on a private address, then
1593 		 * the label on the packet must match the local zone's label.
1594 		 * Otherwise, it must be in the label range defined by tnrh.
1595 		 * This is ensured by tsol_receive_local.
1596 		 *
1597 		 * Note that we don't check tsol_receive_local for
1598 		 * the connected case.
1599 		 */
1600 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1601 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1602 		    ira, connp)) {
1603 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1604 			    char *, "connp(1) could not receive mp(2)",
1605 			    conn_t *, connp, mblk_t *, mp);
1606 			connp = NULL;
1607 		}
1608 
1609 		if (connp != NULL) {
1610 			/* Have a listener at least */
1611 			CONN_INC_REF(connp);
1612 			mutex_exit(&bind_connfp->connf_lock);
1613 			return (connp);
1614 		}
1615 
1616 		mutex_exit(&bind_connfp->connf_lock);
1617 		break;
1618 
1619 	case IPPROTO_UDP:
1620 		lport = up[1];
1621 		fport = up[0];
1622 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1623 		mutex_enter(&connfp->connf_lock);
1624 		for (connp = connfp->connf_head; connp != NULL;
1625 		    connp = connp->conn_next) {
1626 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1627 			    fport, ipha->ipha_src) &&
1628 			    (connp->conn_zoneid == zoneid ||
1629 			    connp->conn_allzones ||
1630 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1631 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1632 				break;
1633 		}
1634 
1635 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1636 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1637 		    ira, connp)) {
1638 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1639 			    char *, "connp(1) could not receive mp(2)",
1640 			    conn_t *, connp, mblk_t *, mp);
1641 			connp = NULL;
1642 		}
1643 
1644 		if (connp != NULL) {
1645 			CONN_INC_REF(connp);
1646 			mutex_exit(&connfp->connf_lock);
1647 			return (connp);
1648 		}
1649 
1650 		/*
1651 		 * We shouldn't come here for multicast/broadcast packets
1652 		 */
1653 		mutex_exit(&connfp->connf_lock);
1654 
1655 		break;
1656 
1657 	case IPPROTO_ENCAP:
1658 	case IPPROTO_IPV6:
1659 		return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1660 		    &ipha->ipha_dst, ipst));
1661 	}
1662 
1663 	return (NULL);
1664 }
1665 
1666 conn_t *
1667 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1668     ip_recv_attr_t *ira, ip_stack_t *ipst)
1669 {
1670 	ip6_t		*ip6h;
1671 	connf_t		*connfp, *bind_connfp;
1672 	uint16_t	lport;
1673 	uint16_t	fport;
1674 	tcpha_t		*tcpha;
1675 	uint32_t	ports;
1676 	conn_t		*connp;
1677 	uint16_t	*up;
1678 	zoneid_t	zoneid = ira->ira_zoneid;
1679 
1680 	ip6h = (ip6_t *)mp->b_rptr;
1681 
1682 	switch (protocol) {
1683 	case IPPROTO_TCP:
1684 		tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1685 		up = &tcpha->tha_lport;
1686 		ports = *(uint32_t *)up;
1687 
1688 		connfp =
1689 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1690 		    ports, ipst)];
1691 		mutex_enter(&connfp->connf_lock);
1692 		for (connp = connfp->connf_head; connp != NULL;
1693 		    connp = connp->conn_next) {
1694 			if (IPCL_CONN_MATCH_V6(connp, protocol,
1695 			    ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1696 			    (connp->conn_zoneid == zoneid ||
1697 			    connp->conn_allzones ||
1698 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1699 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1700 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1701 				break;
1702 		}
1703 
1704 		if (connp != NULL) {
1705 			/*
1706 			 * We have a fully-bound TCP connection.
1707 			 *
1708 			 * For labeled systems, there's no need to check the
1709 			 * label here.  It's known to be good as we checked
1710 			 * before allowing the connection to become bound.
1711 			 */
1712 			CONN_INC_REF(connp);
1713 			mutex_exit(&connfp->connf_lock);
1714 			return (connp);
1715 		}
1716 
1717 		mutex_exit(&connfp->connf_lock);
1718 
1719 		lport = up[1];
1720 		bind_connfp =
1721 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1722 		mutex_enter(&bind_connfp->connf_lock);
1723 		for (connp = bind_connfp->connf_head; connp != NULL;
1724 		    connp = connp->conn_next) {
1725 			if (IPCL_BIND_MATCH_V6(connp, protocol,
1726 			    ip6h->ip6_dst, lport) &&
1727 			    (connp->conn_zoneid == zoneid ||
1728 			    connp->conn_allzones ||
1729 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1730 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1731 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1732 				break;
1733 		}
1734 
1735 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1736 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1737 		    ira, connp)) {
1738 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1739 			    char *, "connp(1) could not receive mp(2)",
1740 			    conn_t *, connp, mblk_t *, mp);
1741 			connp = NULL;
1742 		}
1743 
1744 		if (connp != NULL) {
1745 			/* Have a listner at least */
1746 			CONN_INC_REF(connp);
1747 			mutex_exit(&bind_connfp->connf_lock);
1748 			return (connp);
1749 		}
1750 
1751 		mutex_exit(&bind_connfp->connf_lock);
1752 		break;
1753 
1754 	case IPPROTO_UDP:
1755 		up = (uint16_t *)&mp->b_rptr[hdr_len];
1756 		lport = up[1];
1757 		fport = up[0];
1758 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1759 		mutex_enter(&connfp->connf_lock);
1760 		for (connp = connfp->connf_head; connp != NULL;
1761 		    connp = connp->conn_next) {
1762 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1763 			    fport, ip6h->ip6_src) &&
1764 			    (connp->conn_zoneid == zoneid ||
1765 			    connp->conn_allzones ||
1766 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1767 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1768 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1769 				break;
1770 		}
1771 
1772 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1773 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1774 		    ira, connp)) {
1775 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1776 			    char *, "connp(1) could not receive mp(2)",
1777 			    conn_t *, connp, mblk_t *, mp);
1778 			connp = NULL;
1779 		}
1780 
1781 		if (connp != NULL) {
1782 			CONN_INC_REF(connp);
1783 			mutex_exit(&connfp->connf_lock);
1784 			return (connp);
1785 		}
1786 
1787 		/*
1788 		 * We shouldn't come here for multicast/broadcast packets
1789 		 */
1790 		mutex_exit(&connfp->connf_lock);
1791 		break;
1792 	case IPPROTO_ENCAP:
1793 	case IPPROTO_IPV6:
1794 		return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1795 		    &ip6h->ip6_dst, ipst));
1796 	}
1797 
1798 	return (NULL);
1799 }
1800 
1801 /*
1802  * wrapper around ipcl_classify_(v4,v6) routines.
1803  */
1804 conn_t *
1805 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1806 {
1807 	if (ira->ira_flags & IRAF_IS_IPV4) {
1808 		return (ipcl_classify_v4(mp, ira->ira_protocol,
1809 		    ira->ira_ip_hdr_length, ira, ipst));
1810 	} else {
1811 		return (ipcl_classify_v6(mp, ira->ira_protocol,
1812 		    ira->ira_ip_hdr_length, ira, ipst));
1813 	}
1814 }
1815 
1816 /*
1817  * Only used to classify SCTP RAW sockets
1818  */
1819 conn_t *
1820 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1821     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1822 {
1823 	connf_t		*connfp;
1824 	conn_t		*connp;
1825 	in_port_t	lport;
1826 	int		ipversion;
1827 	const void	*dst;
1828 	zoneid_t	zoneid = ira->ira_zoneid;
1829 
1830 	lport = ((uint16_t *)&ports)[1];
1831 	if (ira->ira_flags & IRAF_IS_IPV4) {
1832 		dst = (const void *)&ipha->ipha_dst;
1833 		ipversion = IPV4_VERSION;
1834 	} else {
1835 		dst = (const void *)&ip6h->ip6_dst;
1836 		ipversion = IPV6_VERSION;
1837 	}
1838 
1839 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1840 	mutex_enter(&connfp->connf_lock);
1841 	for (connp = connfp->connf_head; connp != NULL;
1842 	    connp = connp->conn_next) {
1843 		/* We don't allow v4 fallback for v6 raw socket. */
1844 		if (ipversion != connp->conn_ipversion)
1845 			continue;
1846 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1847 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1848 			if (ipversion == IPV4_VERSION) {
1849 				if (!IPCL_CONN_MATCH(connp, protocol,
1850 				    ipha->ipha_src, ipha->ipha_dst, ports))
1851 					continue;
1852 			} else {
1853 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
1854 				    ip6h->ip6_src, ip6h->ip6_dst, ports))
1855 					continue;
1856 			}
1857 		} else {
1858 			if (ipversion == IPV4_VERSION) {
1859 				if (!IPCL_BIND_MATCH(connp, protocol,
1860 				    ipha->ipha_dst, lport))
1861 					continue;
1862 			} else {
1863 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
1864 				    ip6h->ip6_dst, lport))
1865 					continue;
1866 			}
1867 		}
1868 
1869 		if (connp->conn_zoneid == zoneid ||
1870 		    connp->conn_allzones ||
1871 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1872 		    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1873 		    (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1874 			break;
1875 	}
1876 
1877 	if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1878 	    !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1879 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1880 		    char *, "connp(1) could not receive mp(2)",
1881 		    conn_t *, connp, mblk_t *, mp);
1882 		connp = NULL;
1883 	}
1884 
1885 	if (connp != NULL)
1886 		goto found;
1887 	mutex_exit(&connfp->connf_lock);
1888 
1889 	/* Try to look for a wildcard SCTP RAW socket match. */
1890 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1891 	mutex_enter(&connfp->connf_lock);
1892 	for (connp = connfp->connf_head; connp != NULL;
1893 	    connp = connp->conn_next) {
1894 		/* We don't allow v4 fallback for v6 raw socket. */
1895 		if (ipversion != connp->conn_ipversion)
1896 			continue;
1897 		if (!IPCL_ZONE_MATCH(connp, zoneid))
1898 			continue;
1899 
1900 		if (ipversion == IPV4_VERSION) {
1901 			if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1902 				break;
1903 		} else {
1904 			if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1905 				break;
1906 			}
1907 		}
1908 	}
1909 
1910 	if (connp != NULL)
1911 		goto found;
1912 
1913 	mutex_exit(&connfp->connf_lock);
1914 	return (NULL);
1915 
1916 found:
1917 	ASSERT(connp != NULL);
1918 	CONN_INC_REF(connp);
1919 	mutex_exit(&connfp->connf_lock);
1920 	return (connp);
1921 }
1922 
1923 /* ARGSUSED */
1924 static int
1925 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1926 {
1927 	itc_t	*itc = (itc_t *)buf;
1928 	conn_t 	*connp = &itc->itc_conn;
1929 	tcp_t	*tcp = (tcp_t *)&itc[1];
1930 
1931 	bzero(connp, sizeof (conn_t));
1932 	bzero(tcp, sizeof (tcp_t));
1933 
1934 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1935 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1936 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1937 	tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1938 	if (tcp->tcp_timercache == NULL)
1939 		return (ENOMEM);
1940 	connp->conn_tcp = tcp;
1941 	connp->conn_flags = IPCL_TCPCONN;
1942 	connp->conn_proto = IPPROTO_TCP;
1943 	tcp->tcp_connp = connp;
1944 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1945 
1946 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1947 	if (connp->conn_ixa == NULL) {
1948 		tcp_timermp_free(tcp);
1949 		return (ENOMEM);
1950 	}
1951 	connp->conn_ixa->ixa_refcnt = 1;
1952 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
1953 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1954 	return (0);
1955 }
1956 
1957 /* ARGSUSED */
1958 static void
1959 tcp_conn_destructor(void *buf, void *cdrarg)
1960 {
1961 	itc_t	*itc = (itc_t *)buf;
1962 	conn_t 	*connp = &itc->itc_conn;
1963 	tcp_t	*tcp = (tcp_t *)&itc[1];
1964 
1965 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
1966 	ASSERT(tcp->tcp_connp == connp);
1967 	ASSERT(connp->conn_tcp == tcp);
1968 	tcp_timermp_free(tcp);
1969 	mutex_destroy(&connp->conn_lock);
1970 	cv_destroy(&connp->conn_cv);
1971 	cv_destroy(&connp->conn_sq_cv);
1972 	rw_destroy(&connp->conn_ilg_lock);
1973 
1974 	/* Can be NULL if constructor failed */
1975 	if (connp->conn_ixa != NULL) {
1976 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1977 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
1978 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
1979 		ixa_refrele(connp->conn_ixa);
1980 	}
1981 }
1982 
1983 /* ARGSUSED */
1984 static int
1985 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1986 {
1987 	itc_t	*itc = (itc_t *)buf;
1988 	conn_t 	*connp = &itc->itc_conn;
1989 
1990 	bzero(connp, sizeof (conn_t));
1991 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1992 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1993 	connp->conn_flags = IPCL_IPCCONN;
1994 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1995 
1996 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1997 	if (connp->conn_ixa == NULL)
1998 		return (ENOMEM);
1999 	connp->conn_ixa->ixa_refcnt = 1;
2000 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2001 	return (0);
2002 }
2003 
2004 /* ARGSUSED */
2005 static void
2006 ip_conn_destructor(void *buf, void *cdrarg)
2007 {
2008 	itc_t	*itc = (itc_t *)buf;
2009 	conn_t 	*connp = &itc->itc_conn;
2010 
2011 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
2012 	ASSERT(connp->conn_priv == NULL);
2013 	mutex_destroy(&connp->conn_lock);
2014 	cv_destroy(&connp->conn_cv);
2015 	rw_destroy(&connp->conn_ilg_lock);
2016 
2017 	/* Can be NULL if constructor failed */
2018 	if (connp->conn_ixa != NULL) {
2019 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2020 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2021 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2022 		ixa_refrele(connp->conn_ixa);
2023 	}
2024 }
2025 
2026 /* ARGSUSED */
2027 static int
2028 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2029 {
2030 	itc_t	*itc = (itc_t *)buf;
2031 	conn_t 	*connp = &itc->itc_conn;
2032 	udp_t	*udp = (udp_t *)&itc[1];
2033 
2034 	bzero(connp, sizeof (conn_t));
2035 	bzero(udp, sizeof (udp_t));
2036 
2037 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2038 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2039 	connp->conn_udp = udp;
2040 	connp->conn_flags = IPCL_UDPCONN;
2041 	connp->conn_proto = IPPROTO_UDP;
2042 	udp->udp_connp = connp;
2043 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2044 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2045 	if (connp->conn_ixa == NULL)
2046 		return (ENOMEM);
2047 	connp->conn_ixa->ixa_refcnt = 1;
2048 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2049 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2050 	return (0);
2051 }
2052 
2053 /* ARGSUSED */
2054 static void
2055 udp_conn_destructor(void *buf, void *cdrarg)
2056 {
2057 	itc_t	*itc = (itc_t *)buf;
2058 	conn_t 	*connp = &itc->itc_conn;
2059 	udp_t	*udp = (udp_t *)&itc[1];
2060 
2061 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2062 	ASSERT(udp->udp_connp == connp);
2063 	ASSERT(connp->conn_udp == udp);
2064 	mutex_destroy(&connp->conn_lock);
2065 	cv_destroy(&connp->conn_cv);
2066 	rw_destroy(&connp->conn_ilg_lock);
2067 
2068 	/* Can be NULL if constructor failed */
2069 	if (connp->conn_ixa != NULL) {
2070 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2071 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2072 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2073 		ixa_refrele(connp->conn_ixa);
2074 	}
2075 }
2076 
2077 /* ARGSUSED */
2078 static int
2079 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2080 {
2081 	itc_t	*itc = (itc_t *)buf;
2082 	conn_t 	*connp = &itc->itc_conn;
2083 	icmp_t	*icmp = (icmp_t *)&itc[1];
2084 
2085 	bzero(connp, sizeof (conn_t));
2086 	bzero(icmp, sizeof (icmp_t));
2087 
2088 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2089 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2090 	connp->conn_icmp = icmp;
2091 	connp->conn_flags = IPCL_RAWIPCONN;
2092 	connp->conn_proto = IPPROTO_ICMP;
2093 	icmp->icmp_connp = connp;
2094 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2095 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2096 	if (connp->conn_ixa == NULL)
2097 		return (ENOMEM);
2098 	connp->conn_ixa->ixa_refcnt = 1;
2099 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2100 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2101 	return (0);
2102 }
2103 
2104 /* ARGSUSED */
2105 static void
2106 rawip_conn_destructor(void *buf, void *cdrarg)
2107 {
2108 	itc_t	*itc = (itc_t *)buf;
2109 	conn_t 	*connp = &itc->itc_conn;
2110 	icmp_t	*icmp = (icmp_t *)&itc[1];
2111 
2112 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2113 	ASSERT(icmp->icmp_connp == connp);
2114 	ASSERT(connp->conn_icmp == icmp);
2115 	mutex_destroy(&connp->conn_lock);
2116 	cv_destroy(&connp->conn_cv);
2117 	rw_destroy(&connp->conn_ilg_lock);
2118 
2119 	/* Can be NULL if constructor failed */
2120 	if (connp->conn_ixa != NULL) {
2121 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2122 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2123 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2124 		ixa_refrele(connp->conn_ixa);
2125 	}
2126 }
2127 
2128 /* ARGSUSED */
2129 static int
2130 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2131 {
2132 	itc_t	*itc = (itc_t *)buf;
2133 	conn_t 	*connp = &itc->itc_conn;
2134 	rts_t	*rts = (rts_t *)&itc[1];
2135 
2136 	bzero(connp, sizeof (conn_t));
2137 	bzero(rts, sizeof (rts_t));
2138 
2139 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2140 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2141 	connp->conn_rts = rts;
2142 	connp->conn_flags = IPCL_RTSCONN;
2143 	rts->rts_connp = connp;
2144 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2145 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2146 	if (connp->conn_ixa == NULL)
2147 		return (ENOMEM);
2148 	connp->conn_ixa->ixa_refcnt = 1;
2149 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2150 	return (0);
2151 }
2152 
2153 /* ARGSUSED */
2154 static void
2155 rts_conn_destructor(void *buf, void *cdrarg)
2156 {
2157 	itc_t	*itc = (itc_t *)buf;
2158 	conn_t 	*connp = &itc->itc_conn;
2159 	rts_t	*rts = (rts_t *)&itc[1];
2160 
2161 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
2162 	ASSERT(rts->rts_connp == connp);
2163 	ASSERT(connp->conn_rts == rts);
2164 	mutex_destroy(&connp->conn_lock);
2165 	cv_destroy(&connp->conn_cv);
2166 	rw_destroy(&connp->conn_ilg_lock);
2167 
2168 	/* Can be NULL if constructor failed */
2169 	if (connp->conn_ixa != NULL) {
2170 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2171 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2172 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2173 		ixa_refrele(connp->conn_ixa);
2174 	}
2175 }
2176 
2177 /*
2178  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2179  * in the conn_t.
2180  *
2181  * Below we list all the pointers in the conn_t as a documentation aid.
2182  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2183  * If you add any pointers to the conn_t please add an ASSERT here
2184  * and #ifdef it out if it can't be actually asserted to be NULL.
2185  * In any case, we bzero most of the conn_t at the end of the function.
2186  */
2187 void
2188 ipcl_conn_cleanup(conn_t *connp)
2189 {
2190 	ip_xmit_attr_t	*ixa;
2191 
2192 	ASSERT(connp->conn_latch == NULL);
2193 	ASSERT(connp->conn_latch_in_policy == NULL);
2194 	ASSERT(connp->conn_latch_in_action == NULL);
2195 #ifdef notdef
2196 	ASSERT(connp->conn_rq == NULL);
2197 	ASSERT(connp->conn_wq == NULL);
2198 #endif
2199 	ASSERT(connp->conn_cred == NULL);
2200 	ASSERT(connp->conn_g_fanout == NULL);
2201 	ASSERT(connp->conn_g_next == NULL);
2202 	ASSERT(connp->conn_g_prev == NULL);
2203 	ASSERT(connp->conn_policy == NULL);
2204 	ASSERT(connp->conn_fanout == NULL);
2205 	ASSERT(connp->conn_next == NULL);
2206 	ASSERT(connp->conn_prev == NULL);
2207 	ASSERT(connp->conn_oper_pending_ill == NULL);
2208 	ASSERT(connp->conn_ilg == NULL);
2209 	ASSERT(connp->conn_drain_next == NULL);
2210 	ASSERT(connp->conn_drain_prev == NULL);
2211 #ifdef notdef
2212 	/* conn_idl is not cleared when removed from idl list */
2213 	ASSERT(connp->conn_idl == NULL);
2214 #endif
2215 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
2216 #ifdef notdef
2217 	/* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2218 	ASSERT(connp->conn_netstack == NULL);
2219 #endif
2220 
2221 	ASSERT(connp->conn_helper_info == NULL);
2222 	ASSERT(connp->conn_ixa != NULL);
2223 	ixa = connp->conn_ixa;
2224 	ASSERT(ixa->ixa_refcnt == 1);
2225 	/* Need to preserve ixa_protocol */
2226 	ixa_cleanup(ixa);
2227 	ixa->ixa_flags = 0;
2228 
2229 	/* Clear out the conn_t fields that are not preserved */
2230 	bzero(&connp->conn_start_clr,
2231 	    sizeof (conn_t) -
2232 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2233 }
2234 
2235 /*
2236  * All conns are inserted in a global multi-list for the benefit of
2237  * walkers. The walk is guaranteed to walk all open conns at the time
2238  * of the start of the walk exactly once. This property is needed to
2239  * achieve some cleanups during unplumb of interfaces. This is achieved
2240  * as follows.
2241  *
2242  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2243  * call the insert and delete functions below at creation and deletion
2244  * time respectively. The conn never moves or changes its position in this
2245  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2246  * won't increase due to walkers, once the conn deletion has started. Note
2247  * that we can't remove the conn from the global list and then wait for
2248  * the refcnt to drop to zero, since walkers would then see a truncated
2249  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2250  * conns until ip_open is ready to make them globally visible.
2251  * The global round robin multi-list locks are held only to get the
2252  * next member/insertion/deletion and contention should be negligible
2253  * if the multi-list is much greater than the number of cpus.
2254  */
2255 void
2256 ipcl_globalhash_insert(conn_t *connp)
2257 {
2258 	int	index;
2259 	struct connf_s	*connfp;
2260 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
2261 
2262 	/*
2263 	 * No need for atomic here. Approximate even distribution
2264 	 * in the global lists is sufficient.
2265 	 */
2266 	ipst->ips_conn_g_index++;
2267 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2268 
2269 	connp->conn_g_prev = NULL;
2270 	/*
2271 	 * Mark as INCIPIENT, so that walkers will ignore this
2272 	 * for now, till ip_open is ready to make it visible globally.
2273 	 */
2274 	connp->conn_state_flags |= CONN_INCIPIENT;
2275 
2276 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2277 	/* Insert at the head of the list */
2278 	mutex_enter(&connfp->connf_lock);
2279 	connp->conn_g_next = connfp->connf_head;
2280 	if (connp->conn_g_next != NULL)
2281 		connp->conn_g_next->conn_g_prev = connp;
2282 	connfp->connf_head = connp;
2283 
2284 	/* The fanout bucket this conn points to */
2285 	connp->conn_g_fanout = connfp;
2286 
2287 	mutex_exit(&connfp->connf_lock);
2288 }
2289 
2290 void
2291 ipcl_globalhash_remove(conn_t *connp)
2292 {
2293 	struct connf_s	*connfp;
2294 
2295 	/*
2296 	 * We were never inserted in the global multi list.
2297 	 * IPCL_NONE variety is never inserted in the global multilist
2298 	 * since it is presumed to not need any cleanup and is transient.
2299 	 */
2300 	if (connp->conn_g_fanout == NULL)
2301 		return;
2302 
2303 	connfp = connp->conn_g_fanout;
2304 	mutex_enter(&connfp->connf_lock);
2305 	if (connp->conn_g_prev != NULL)
2306 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2307 	else
2308 		connfp->connf_head = connp->conn_g_next;
2309 	if (connp->conn_g_next != NULL)
2310 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2311 	mutex_exit(&connfp->connf_lock);
2312 
2313 	/* Better to stumble on a null pointer than to corrupt memory */
2314 	connp->conn_g_next = NULL;
2315 	connp->conn_g_prev = NULL;
2316 	connp->conn_g_fanout = NULL;
2317 }
2318 
2319 /*
2320  * Walk the list of all conn_t's in the system, calling the function provided
2321  * With the specified argument for each.
2322  * Applies to both IPv4 and IPv6.
2323  *
2324  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2325  * conn_oper_pending_ill). To guard against stale pointers
2326  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2327  * unplumbed or removed. New conn_t's that are created while we are walking
2328  * may be missed by this walk, because they are not necessarily inserted
2329  * at the tail of the list. They are new conn_t's and thus don't have any
2330  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2331  * is created to the struct that is going away.
2332  */
2333 void
2334 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2335 {
2336 	int	i;
2337 	conn_t	*connp;
2338 	conn_t	*prev_connp;
2339 
2340 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2341 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2342 		prev_connp = NULL;
2343 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2344 		while (connp != NULL) {
2345 			mutex_enter(&connp->conn_lock);
2346 			if (connp->conn_state_flags &
2347 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
2348 				mutex_exit(&connp->conn_lock);
2349 				connp = connp->conn_g_next;
2350 				continue;
2351 			}
2352 			CONN_INC_REF_LOCKED(connp);
2353 			mutex_exit(&connp->conn_lock);
2354 			mutex_exit(
2355 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2356 			(*func)(connp, arg);
2357 			if (prev_connp != NULL)
2358 				CONN_DEC_REF(prev_connp);
2359 			mutex_enter(
2360 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2361 			prev_connp = connp;
2362 			connp = connp->conn_g_next;
2363 		}
2364 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2365 		if (prev_connp != NULL)
2366 			CONN_DEC_REF(prev_connp);
2367 	}
2368 }
2369 
2370 /*
2371  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2372  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2373  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2374  * (peer tcp in ESTABLISHED state).
2375  */
2376 conn_t *
2377 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2378     ip_stack_t *ipst)
2379 {
2380 	uint32_t ports;
2381 	uint16_t *pports = (uint16_t *)&ports;
2382 	connf_t	*connfp;
2383 	conn_t	*tconnp;
2384 	boolean_t zone_chk;
2385 
2386 	/*
2387 	 * If either the source of destination address is loopback, then
2388 	 * both endpoints must be in the same Zone.  Otherwise, both of
2389 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2390 	 * state) and the endpoints may reside in different Zones.
2391 	 */
2392 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2393 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2394 
2395 	pports[0] = tcpha->tha_fport;
2396 	pports[1] = tcpha->tha_lport;
2397 
2398 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2399 	    ports, ipst)];
2400 
2401 	mutex_enter(&connfp->connf_lock);
2402 	for (tconnp = connfp->connf_head; tconnp != NULL;
2403 	    tconnp = tconnp->conn_next) {
2404 
2405 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2406 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2407 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2408 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2409 
2410 			ASSERT(tconnp != connp);
2411 			CONN_INC_REF(tconnp);
2412 			mutex_exit(&connfp->connf_lock);
2413 			return (tconnp);
2414 		}
2415 	}
2416 	mutex_exit(&connfp->connf_lock);
2417 	return (NULL);
2418 }
2419 
2420 /*
2421  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2422  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2423  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2424  * (peer tcp in ESTABLISHED state).
2425  */
2426 conn_t *
2427 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2428     ip_stack_t *ipst)
2429 {
2430 	uint32_t ports;
2431 	uint16_t *pports = (uint16_t *)&ports;
2432 	connf_t	*connfp;
2433 	conn_t	*tconnp;
2434 	boolean_t zone_chk;
2435 
2436 	/*
2437 	 * If either the source of destination address is loopback, then
2438 	 * both endpoints must be in the same Zone.  Otherwise, both of
2439 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2440 	 * state) and the endpoints may reside in different Zones.  We
2441 	 * don't do Zone check for link local address(es) because the
2442 	 * current Zone implementation treats each link local address as
2443 	 * being unique per system node, i.e. they belong to global Zone.
2444 	 */
2445 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2446 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2447 
2448 	pports[0] = tcpha->tha_fport;
2449 	pports[1] = tcpha->tha_lport;
2450 
2451 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2452 	    ports, ipst)];
2453 
2454 	mutex_enter(&connfp->connf_lock);
2455 	for (tconnp = connfp->connf_head; tconnp != NULL;
2456 	    tconnp = tconnp->conn_next) {
2457 
2458 		/* We skip conn_bound_if check here as this is loopback tcp */
2459 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2460 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2461 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2462 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2463 
2464 			ASSERT(tconnp != connp);
2465 			CONN_INC_REF(tconnp);
2466 			mutex_exit(&connfp->connf_lock);
2467 			return (tconnp);
2468 		}
2469 	}
2470 	mutex_exit(&connfp->connf_lock);
2471 	return (NULL);
2472 }
2473 
2474 /*
2475  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2476  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2477  * Only checks for connected entries i.e. no INADDR_ANY checks.
2478  */
2479 conn_t *
2480 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2481     ip_stack_t *ipst)
2482 {
2483 	uint32_t ports;
2484 	uint16_t *pports;
2485 	connf_t	*connfp;
2486 	conn_t	*tconnp;
2487 
2488 	pports = (uint16_t *)&ports;
2489 	pports[0] = tcpha->tha_fport;
2490 	pports[1] = tcpha->tha_lport;
2491 
2492 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2493 	    ports, ipst)];
2494 
2495 	mutex_enter(&connfp->connf_lock);
2496 	for (tconnp = connfp->connf_head; tconnp != NULL;
2497 	    tconnp = tconnp->conn_next) {
2498 
2499 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2500 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2501 		    tconnp->conn_tcp->tcp_state >= min_state) {
2502 
2503 			CONN_INC_REF(tconnp);
2504 			mutex_exit(&connfp->connf_lock);
2505 			return (tconnp);
2506 		}
2507 	}
2508 	mutex_exit(&connfp->connf_lock);
2509 	return (NULL);
2510 }
2511 
2512 /*
2513  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2514  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2515  * Only checks for connected entries i.e. no INADDR_ANY checks.
2516  * Match on ifindex in addition to addresses.
2517  */
2518 conn_t *
2519 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2520     uint_t ifindex, ip_stack_t *ipst)
2521 {
2522 	tcp_t	*tcp;
2523 	uint32_t ports;
2524 	uint16_t *pports;
2525 	connf_t	*connfp;
2526 	conn_t	*tconnp;
2527 
2528 	pports = (uint16_t *)&ports;
2529 	pports[0] = tcpha->tha_fport;
2530 	pports[1] = tcpha->tha_lport;
2531 
2532 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2533 	    ports, ipst)];
2534 
2535 	mutex_enter(&connfp->connf_lock);
2536 	for (tconnp = connfp->connf_head; tconnp != NULL;
2537 	    tconnp = tconnp->conn_next) {
2538 
2539 		tcp = tconnp->conn_tcp;
2540 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2541 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2542 		    tcp->tcp_state >= min_state &&
2543 		    (tconnp->conn_bound_if == 0 ||
2544 		    tconnp->conn_bound_if == ifindex)) {
2545 
2546 			CONN_INC_REF(tconnp);
2547 			mutex_exit(&connfp->connf_lock);
2548 			return (tconnp);
2549 		}
2550 	}
2551 	mutex_exit(&connfp->connf_lock);
2552 	return (NULL);
2553 }
2554 
2555 /*
2556  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2557  * a listener when changing state.
2558  */
2559 conn_t *
2560 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2561     ip_stack_t *ipst)
2562 {
2563 	connf_t		*bind_connfp;
2564 	conn_t		*connp;
2565 	tcp_t		*tcp;
2566 
2567 	/*
2568 	 * Avoid false matches for packets sent to an IP destination of
2569 	 * all zeros.
2570 	 */
2571 	if (laddr == 0)
2572 		return (NULL);
2573 
2574 	ASSERT(zoneid != ALL_ZONES);
2575 
2576 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2577 	mutex_enter(&bind_connfp->connf_lock);
2578 	for (connp = bind_connfp->connf_head; connp != NULL;
2579 	    connp = connp->conn_next) {
2580 		tcp = connp->conn_tcp;
2581 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2582 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2583 		    (tcp->tcp_listener == NULL)) {
2584 			CONN_INC_REF(connp);
2585 			mutex_exit(&bind_connfp->connf_lock);
2586 			return (connp);
2587 		}
2588 	}
2589 	mutex_exit(&bind_connfp->connf_lock);
2590 	return (NULL);
2591 }
2592 
2593 /*
2594  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2595  * a listener when changing state.
2596  */
2597 conn_t *
2598 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2599     zoneid_t zoneid, ip_stack_t *ipst)
2600 {
2601 	connf_t		*bind_connfp;
2602 	conn_t		*connp = NULL;
2603 	tcp_t		*tcp;
2604 
2605 	/*
2606 	 * Avoid false matches for packets sent to an IP destination of
2607 	 * all zeros.
2608 	 */
2609 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2610 		return (NULL);
2611 
2612 	ASSERT(zoneid != ALL_ZONES);
2613 
2614 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2615 	mutex_enter(&bind_connfp->connf_lock);
2616 	for (connp = bind_connfp->connf_head; connp != NULL;
2617 	    connp = connp->conn_next) {
2618 		tcp = connp->conn_tcp;
2619 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2620 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2621 		    (connp->conn_bound_if == 0 ||
2622 		    connp->conn_bound_if == ifindex) &&
2623 		    tcp->tcp_listener == NULL) {
2624 			CONN_INC_REF(connp);
2625 			mutex_exit(&bind_connfp->connf_lock);
2626 			return (connp);
2627 		}
2628 	}
2629 	mutex_exit(&bind_connfp->connf_lock);
2630 	return (NULL);
2631 }
2632 
2633 /*
2634  * ipcl_get_next_conn
2635  *	get the next entry in the conn global list
2636  *	and put a reference on the next_conn.
2637  *	decrement the reference on the current conn.
2638  *
2639  * This is an iterator based walker function that also provides for
2640  * some selection by the caller. It walks through the conn_hash bucket
2641  * searching for the next valid connp in the list, and selects connections
2642  * that are neither closed nor condemned. It also REFHOLDS the conn
2643  * thus ensuring that the conn exists when the caller uses the conn.
2644  */
2645 conn_t *
2646 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2647 {
2648 	conn_t	*next_connp;
2649 
2650 	if (connfp == NULL)
2651 		return (NULL);
2652 
2653 	mutex_enter(&connfp->connf_lock);
2654 
2655 	next_connp = (connp == NULL) ?
2656 	    connfp->connf_head : connp->conn_g_next;
2657 
2658 	while (next_connp != NULL) {
2659 		mutex_enter(&next_connp->conn_lock);
2660 		if (!(next_connp->conn_flags & conn_flags) ||
2661 		    (next_connp->conn_state_flags &
2662 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2663 			/*
2664 			 * This conn has been condemned or
2665 			 * is closing, or the flags don't match
2666 			 */
2667 			mutex_exit(&next_connp->conn_lock);
2668 			next_connp = next_connp->conn_g_next;
2669 			continue;
2670 		}
2671 		CONN_INC_REF_LOCKED(next_connp);
2672 		mutex_exit(&next_connp->conn_lock);
2673 		break;
2674 	}
2675 
2676 	mutex_exit(&connfp->connf_lock);
2677 
2678 	if (connp != NULL)
2679 		CONN_DEC_REF(connp);
2680 
2681 	return (next_connp);
2682 }
2683 
2684 #ifdef CONN_DEBUG
2685 /*
2686  * Trace of the last NBUF refhold/refrele
2687  */
2688 int
2689 conn_trace_ref(conn_t *connp)
2690 {
2691 	int	last;
2692 	conn_trace_t	*ctb;
2693 
2694 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2695 	last = connp->conn_trace_last;
2696 	last++;
2697 	if (last == CONN_TRACE_MAX)
2698 		last = 0;
2699 
2700 	ctb = &connp->conn_trace_buf[last];
2701 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2702 	connp->conn_trace_last = last;
2703 	return (1);
2704 }
2705 
2706 int
2707 conn_untrace_ref(conn_t *connp)
2708 {
2709 	int	last;
2710 	conn_trace_t	*ctb;
2711 
2712 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2713 	last = connp->conn_trace_last;
2714 	last++;
2715 	if (last == CONN_TRACE_MAX)
2716 		last = 0;
2717 
2718 	ctb = &connp->conn_trace_buf[last];
2719 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2720 	connp->conn_trace_last = last;
2721 	return (1);
2722 }
2723 #endif
2724