xref: /titanic_44/usr/src/uts/common/inet/ip/ipclassifier.c (revision feef89cf5f5fee792c1a396bb0e48070935cf65a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IP PACKET CLASSIFIER
28  *
29  * The IP packet classifier provides mapping between IP packets and persistent
30  * connection state for connection-oriented protocols. It also provides
31  * interface for managing connection states.
32  *
33  * The connection state is kept in conn_t data structure and contains, among
34  * other things:
35  *
36  *	o local/remote address and ports
37  *	o Transport protocol
38  *	o squeue for the connection (for TCP only)
39  *	o reference counter
40  *	o Connection state
41  *	o hash table linkage
42  *	o interface/ire information
43  *	o credentials
44  *	o ipsec policy
45  *	o send and receive functions.
46  *	o mutex lock.
47  *
48  * Connections use a reference counting scheme. They are freed when the
49  * reference counter drops to zero. A reference is incremented when connection
50  * is placed in a list or table, when incoming packet for the connection arrives
51  * and when connection is processed via squeue (squeue processing may be
52  * asynchronous and the reference protects the connection from being destroyed
53  * before its processing is finished).
54  *
55  * conn_recv is used to pass up packets to the ULP.
56  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
57  * a listener, and changes to tcp_input_listener as the listener has picked a
58  * good squeue. For other cases it is set to tcp_input_data.
59  *
60  * conn_recvicmp is used to pass up ICMP errors to the ULP.
61  *
62  * Classifier uses several hash tables:
63  *
64  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
65  *	ipcl_bind_fanout:	contains all connections in BOUND state
66  *	ipcl_proto_fanout:	IPv4 protocol fanout
67  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
68  *	ipcl_udp_fanout:	contains all UDP connections
69  *	ipcl_iptun_fanout:	contains all IP tunnel connections
70  *	ipcl_globalhash_fanout:	contains all connections
71  *
72  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
73  * which need to view all existing connections.
74  *
75  * All tables are protected by per-bucket locks. When both per-bucket lock and
76  * connection lock need to be held, the per-bucket lock should be acquired
77  * first, followed by the connection lock.
78  *
79  * All functions doing search in one of these tables increment a reference
80  * counter on the connection found (if any). This reference should be dropped
81  * when the caller has finished processing the connection.
82  *
83  *
84  * INTERFACES:
85  * ===========
86  *
87  * Connection Lookup:
88  * ------------------
89  *
90  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
91  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
92  *
93  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
94  * it can't find any associated connection. If the connection is found, its
95  * reference counter is incremented.
96  *
97  *	mp:	mblock, containing packet header. The full header should fit
98  *		into a single mblock. It should also contain at least full IP
99  *		and TCP or UDP header.
100  *
101  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
102  *
103  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
104  *		 the packet.
105  *
106  * 	ira->ira_zoneid: The zone in which the returned connection must be; the
107  *		zoneid corresponding to the ire_zoneid on the IRE located for
108  *		the packet's destination address.
109  *
110  *	ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
111  *		IRAF_TX_SHARED_ADDR flags
112  *
113  *	For TCP connections, the lookup order is as follows:
114  *		5-tuple {src, dst, protocol, local port, remote port}
115  *			lookup in ipcl_conn_fanout table.
116  *		3-tuple {dst, remote port, protocol} lookup in
117  *			ipcl_bind_fanout table.
118  *
119  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
120  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
121  *	these interfaces do not handle cases where a packets belongs
122  *	to multiple UDP clients, which is handled in IP itself.
123  *
124  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
125  * determine which actual zone gets the segment.  This is used only in a
126  * labeled environment.  The matching rules are:
127  *
128  *	- If it's not a multilevel port, then the label on the packet selects
129  *	  the zone.  Unlabeled packets are delivered to the global zone.
130  *
131  *	- If it's a multilevel port, then only the zone registered to receive
132  *	  packets on that port matches.
133  *
134  * Also, in a labeled environment, packet labels need to be checked.  For fully
135  * bound TCP connections, we can assume that the packet label was checked
136  * during connection establishment, and doesn't need to be checked on each
137  * packet.  For others, though, we need to check for strict equality or, for
138  * multilevel ports, membership in the range or set.  This part currently does
139  * a tnrh lookup on each packet, but could be optimized to use cached results
140  * if that were necessary.  (SCTP doesn't come through here, but if it did,
141  * we would apply the same rules as TCP.)
142  *
143  * An implication of the above is that fully-bound TCP sockets must always use
144  * distinct 4-tuples; they can't be discriminated by label alone.
145  *
146  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
147  * as there's no connection set-up handshake and no shared state.
148  *
149  * Labels on looped-back packets within a single zone do not need to be
150  * checked, as all processes in the same zone have the same label.
151  *
152  * Finally, for unlabeled packets received by a labeled system, special rules
153  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
154  * socket in the zone whose label matches the default label of the sender, if
155  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
156  * receiver's label must dominate the sender's default label.
157  *
158  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
159  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
160  *					 ip_stack);
161  *
162  *	Lookup routine to find a exact match for {src, dst, local port,
163  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
164  *	ports are read from the IP and TCP header respectively.
165  *
166  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
167  *					 zoneid, ip_stack);
168  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
169  *					 zoneid, ip_stack);
170  *
171  * 	Lookup routine to find a listener with the tuple {lport, laddr,
172  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
173  * 	parameter interface index is also compared.
174  *
175  * void ipcl_walk(func, arg, ip_stack)
176  *
177  * 	Apply 'func' to every connection available. The 'func' is called as
178  *	(*func)(connp, arg). The walk is non-atomic so connections may be
179  *	created and destroyed during the walk. The CONN_CONDEMNED and
180  *	CONN_INCIPIENT flags ensure that connections which are newly created
181  *	or being destroyed are not selected by the walker.
182  *
183  * Table Updates
184  * -------------
185  *
186  * int ipcl_conn_insert(connp);
187  * int ipcl_conn_insert_v4(connp);
188  * int ipcl_conn_insert_v6(connp);
189  *
190  *	Insert 'connp' in the ipcl_conn_fanout.
191  *	Arguements :
192  *		connp		conn_t to be inserted
193  *
194  *	Return value :
195  *		0		if connp was inserted
196  *		EADDRINUSE	if the connection with the same tuple
197  *				already exists.
198  *
199  * int ipcl_bind_insert(connp);
200  * int ipcl_bind_insert_v4(connp);
201  * int ipcl_bind_insert_v6(connp);
202  *
203  * 	Insert 'connp' in ipcl_bind_fanout.
204  * 	Arguements :
205  * 		connp		conn_t to be inserted
206  *
207  *
208  * void ipcl_hash_remove(connp);
209  *
210  * 	Removes the 'connp' from the connection fanout table.
211  *
212  * Connection Creation/Destruction
213  * -------------------------------
214  *
215  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
216  *
217  * 	Creates a new conn based on the type flag, inserts it into
218  * 	globalhash table.
219  *
220  *	type:	This flag determines the type of conn_t which needs to be
221  *		created i.e., which kmem_cache it comes from.
222  *		IPCL_TCPCONN	indicates a TCP connection
223  *		IPCL_SCTPCONN	indicates a SCTP connection
224  *		IPCL_UDPCONN	indicates a UDP conn_t.
225  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
226  *		IPCL_RTSCONN	indicates a RTS conn_t.
227  *		IPCL_IPCCONN	indicates all other connections.
228  *
229  * void ipcl_conn_destroy(connp)
230  *
231  * 	Destroys the connection state, removes it from the global
232  * 	connection hash table and frees its memory.
233  */
234 
235 #include <sys/types.h>
236 #include <sys/stream.h>
237 #include <sys/stropts.h>
238 #include <sys/sysmacros.h>
239 #include <sys/strsubr.h>
240 #include <sys/strsun.h>
241 #define	_SUN_TPI_VERSION 2
242 #include <sys/ddi.h>
243 #include <sys/cmn_err.h>
244 #include <sys/debug.h>
245 
246 #include <sys/systm.h>
247 #include <sys/param.h>
248 #include <sys/kmem.h>
249 #include <sys/isa_defs.h>
250 #include <inet/common.h>
251 #include <netinet/ip6.h>
252 #include <netinet/icmp6.h>
253 
254 #include <inet/ip.h>
255 #include <inet/ip_if.h>
256 #include <inet/ip_ire.h>
257 #include <inet/ip6.h>
258 #include <inet/ip_ndp.h>
259 #include <inet/ip_impl.h>
260 #include <inet/udp_impl.h>
261 #include <inet/sctp_ip.h>
262 #include <inet/sctp/sctp_impl.h>
263 #include <inet/rawip_impl.h>
264 #include <inet/rts_impl.h>
265 #include <inet/iptun/iptun_impl.h>
266 
267 #include <sys/cpuvar.h>
268 
269 #include <inet/ipclassifier.h>
270 #include <inet/tcp.h>
271 #include <inet/ipsec_impl.h>
272 
273 #include <sys/tsol/tnet.h>
274 #include <sys/sockio.h>
275 
276 /* Old value for compatibility. Setable in /etc/system */
277 uint_t tcp_conn_hash_size = 0;
278 
279 /* New value. Zero means choose automatically.  Setable in /etc/system */
280 uint_t ipcl_conn_hash_size = 0;
281 uint_t ipcl_conn_hash_memfactor = 8192;
282 uint_t ipcl_conn_hash_maxsize = 82500;
283 
284 /* bind/udp fanout table size */
285 uint_t ipcl_bind_fanout_size = 512;
286 uint_t ipcl_udp_fanout_size = 16384;
287 
288 /* Raw socket fanout size.  Must be a power of 2. */
289 uint_t ipcl_raw_fanout_size = 256;
290 
291 /*
292  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
293  * expect that most large deployments would have hundreds of tunnels, and
294  * thousands in the extreme case.
295  */
296 uint_t ipcl_iptun_fanout_size = 6143;
297 
298 /*
299  * Power of 2^N Primes useful for hashing for N of 0-28,
300  * these primes are the nearest prime <= 2^N - 2^(N-2).
301  */
302 
303 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
304 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
305 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
306 		50331599, 100663291, 201326557, 0}
307 
308 /*
309  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
310  * are aligned on cache lines.
311  */
312 typedef union itc_s {
313 	conn_t	itc_conn;
314 	char	itcu_filler[CACHE_ALIGN(conn_s)];
315 } itc_t;
316 
317 struct kmem_cache  *tcp_conn_cache;
318 struct kmem_cache  *ip_conn_cache;
319 extern struct kmem_cache  *sctp_conn_cache;
320 extern struct kmem_cache  *tcp_sack_info_cache;
321 struct kmem_cache  *udp_conn_cache;
322 struct kmem_cache  *rawip_conn_cache;
323 struct kmem_cache  *rts_conn_cache;
324 
325 extern void	tcp_timermp_free(tcp_t *);
326 extern mblk_t	*tcp_timermp_alloc(int);
327 
328 static int	ip_conn_constructor(void *, void *, int);
329 static void	ip_conn_destructor(void *, void *);
330 
331 static int	tcp_conn_constructor(void *, void *, int);
332 static void	tcp_conn_destructor(void *, void *);
333 
334 static int	udp_conn_constructor(void *, void *, int);
335 static void	udp_conn_destructor(void *, void *);
336 
337 static int	rawip_conn_constructor(void *, void *, int);
338 static void	rawip_conn_destructor(void *, void *);
339 
340 static int	rts_conn_constructor(void *, void *, int);
341 static void	rts_conn_destructor(void *, void *);
342 
343 /*
344  * Global (for all stack instances) init routine
345  */
346 void
347 ipcl_g_init(void)
348 {
349 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
350 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
351 	    ip_conn_constructor, ip_conn_destructor,
352 	    NULL, NULL, NULL, 0);
353 
354 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
355 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
356 	    tcp_conn_constructor, tcp_conn_destructor,
357 	    tcp_conn_reclaim, NULL, NULL, 0);
358 
359 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
360 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
361 	    udp_conn_constructor, udp_conn_destructor,
362 	    NULL, NULL, NULL, 0);
363 
364 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
365 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
366 	    rawip_conn_constructor, rawip_conn_destructor,
367 	    NULL, NULL, NULL, 0);
368 
369 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
370 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
371 	    rts_conn_constructor, rts_conn_destructor,
372 	    NULL, NULL, NULL, 0);
373 }
374 
375 /*
376  * ipclassifier intialization routine, sets up hash tables.
377  */
378 void
379 ipcl_init(ip_stack_t *ipst)
380 {
381 	int i;
382 	int sizes[] = P2Ps();
383 
384 	/*
385 	 * Calculate size of conn fanout table from /etc/system settings
386 	 */
387 	if (ipcl_conn_hash_size != 0) {
388 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
389 	} else if (tcp_conn_hash_size != 0) {
390 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
391 	} else {
392 		extern pgcnt_t freemem;
393 
394 		ipst->ips_ipcl_conn_fanout_size =
395 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
396 
397 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
398 			ipst->ips_ipcl_conn_fanout_size =
399 			    ipcl_conn_hash_maxsize;
400 		}
401 	}
402 
403 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
404 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
405 			break;
406 		}
407 	}
408 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
409 		/* Out of range, use the 2^16 value */
410 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
411 	}
412 
413 	/* Take values from /etc/system */
414 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
415 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
416 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
417 	ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
418 
419 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
420 
421 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
422 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
423 
424 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
425 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
426 		    MUTEX_DEFAULT, NULL);
427 	}
428 
429 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
430 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
431 
432 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
433 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
434 		    MUTEX_DEFAULT, NULL);
435 	}
436 
437 	ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
438 	    sizeof (connf_t), KM_SLEEP);
439 	for (i = 0; i < IPPROTO_MAX; i++) {
440 		mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
441 		    MUTEX_DEFAULT, NULL);
442 	}
443 
444 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
445 	    sizeof (connf_t), KM_SLEEP);
446 	for (i = 0; i < IPPROTO_MAX; i++) {
447 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
448 		    MUTEX_DEFAULT, NULL);
449 	}
450 
451 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
452 	mutex_init(&ipst->ips_rts_clients->connf_lock,
453 	    NULL, MUTEX_DEFAULT, NULL);
454 
455 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
456 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
457 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
458 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
459 		    MUTEX_DEFAULT, NULL);
460 	}
461 
462 	ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
463 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
464 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
465 		mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
466 		    MUTEX_DEFAULT, NULL);
467 	}
468 
469 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
470 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
471 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
472 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
473 		    MUTEX_DEFAULT, NULL);
474 	}
475 
476 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
477 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
478 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
479 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
480 		    NULL, MUTEX_DEFAULT, NULL);
481 	}
482 }
483 
484 void
485 ipcl_g_destroy(void)
486 {
487 	kmem_cache_destroy(ip_conn_cache);
488 	kmem_cache_destroy(tcp_conn_cache);
489 	kmem_cache_destroy(udp_conn_cache);
490 	kmem_cache_destroy(rawip_conn_cache);
491 	kmem_cache_destroy(rts_conn_cache);
492 }
493 
494 /*
495  * All user-level and kernel use of the stack must be gone
496  * by now.
497  */
498 void
499 ipcl_destroy(ip_stack_t *ipst)
500 {
501 	int i;
502 
503 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
504 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
505 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
506 	}
507 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
508 	    sizeof (connf_t));
509 	ipst->ips_ipcl_conn_fanout = NULL;
510 
511 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
512 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
513 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
514 	}
515 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
516 	    sizeof (connf_t));
517 	ipst->ips_ipcl_bind_fanout = NULL;
518 
519 	for (i = 0; i < IPPROTO_MAX; i++) {
520 		ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
521 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
522 	}
523 	kmem_free(ipst->ips_ipcl_proto_fanout_v4,
524 	    IPPROTO_MAX * sizeof (connf_t));
525 	ipst->ips_ipcl_proto_fanout_v4 = NULL;
526 
527 	for (i = 0; i < IPPROTO_MAX; i++) {
528 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
529 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
530 	}
531 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
532 	    IPPROTO_MAX * sizeof (connf_t));
533 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
534 
535 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
536 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
537 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
538 	}
539 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
540 	    sizeof (connf_t));
541 	ipst->ips_ipcl_udp_fanout = NULL;
542 
543 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
544 		ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
545 		mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
546 	}
547 	kmem_free(ipst->ips_ipcl_iptun_fanout,
548 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
549 	ipst->ips_ipcl_iptun_fanout = NULL;
550 
551 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
552 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
553 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
554 	}
555 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
556 	    sizeof (connf_t));
557 	ipst->ips_ipcl_raw_fanout = NULL;
558 
559 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
560 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
561 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
562 	}
563 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
564 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
565 	ipst->ips_ipcl_globalhash_fanout = NULL;
566 
567 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
568 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
569 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
570 	ipst->ips_rts_clients = NULL;
571 }
572 
573 /*
574  * conn creation routine. initialize the conn, sets the reference
575  * and inserts it in the global hash table.
576  */
577 conn_t *
578 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
579 {
580 	conn_t	*connp;
581 	struct kmem_cache *conn_cache;
582 
583 	switch (type) {
584 	case IPCL_SCTPCONN:
585 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
586 			return (NULL);
587 		sctp_conn_init(connp);
588 		netstack_hold(ns);
589 		connp->conn_netstack = ns;
590 		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
591 		ipcl_globalhash_insert(connp);
592 		return (connp);
593 
594 	case IPCL_TCPCONN:
595 		conn_cache = tcp_conn_cache;
596 		break;
597 
598 	case IPCL_UDPCONN:
599 		conn_cache = udp_conn_cache;
600 		break;
601 
602 	case IPCL_RAWIPCONN:
603 		conn_cache = rawip_conn_cache;
604 		break;
605 
606 	case IPCL_RTSCONN:
607 		conn_cache = rts_conn_cache;
608 		break;
609 
610 	case IPCL_IPCCONN:
611 		conn_cache = ip_conn_cache;
612 		break;
613 
614 	default:
615 		connp = NULL;
616 		ASSERT(0);
617 	}
618 
619 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
620 		return (NULL);
621 
622 	connp->conn_ref = 1;
623 	netstack_hold(ns);
624 	connp->conn_netstack = ns;
625 	connp->conn_ixa->ixa_ipst = ns->netstack_ip;
626 	ipcl_globalhash_insert(connp);
627 	return (connp);
628 }
629 
630 void
631 ipcl_conn_destroy(conn_t *connp)
632 {
633 	mblk_t	*mp;
634 	netstack_t	*ns = connp->conn_netstack;
635 
636 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
637 	ASSERT(connp->conn_ref == 0);
638 
639 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
640 
641 	if (connp->conn_cred != NULL) {
642 		crfree(connp->conn_cred);
643 		connp->conn_cred = NULL;
644 		/* ixa_cred done in ipcl_conn_cleanup below */
645 	}
646 
647 	if (connp->conn_ht_iphc != NULL) {
648 		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
649 		connp->conn_ht_iphc = NULL;
650 		connp->conn_ht_iphc_allocated = 0;
651 		connp->conn_ht_iphc_len = 0;
652 		connp->conn_ht_ulp = NULL;
653 		connp->conn_ht_ulp_len = 0;
654 	}
655 	ip_pkt_free(&connp->conn_xmit_ipp);
656 
657 	ipcl_globalhash_remove(connp);
658 
659 	if (connp->conn_latch != NULL) {
660 		IPLATCH_REFRELE(connp->conn_latch);
661 		connp->conn_latch = NULL;
662 	}
663 	if (connp->conn_latch_in_policy != NULL) {
664 		IPPOL_REFRELE(connp->conn_latch_in_policy);
665 		connp->conn_latch_in_policy = NULL;
666 	}
667 	if (connp->conn_latch_in_action != NULL) {
668 		IPACT_REFRELE(connp->conn_latch_in_action);
669 		connp->conn_latch_in_action = NULL;
670 	}
671 	if (connp->conn_policy != NULL) {
672 		IPPH_REFRELE(connp->conn_policy, ns);
673 		connp->conn_policy = NULL;
674 	}
675 
676 	if (connp->conn_ipsec_opt_mp != NULL) {
677 		freemsg(connp->conn_ipsec_opt_mp);
678 		connp->conn_ipsec_opt_mp = NULL;
679 	}
680 
681 	if (connp->conn_flags & IPCL_TCPCONN) {
682 		tcp_t *tcp = connp->conn_tcp;
683 
684 		tcp_free(tcp);
685 		mp = tcp->tcp_timercache;
686 
687 		tcp->tcp_tcps = NULL;
688 
689 		if (tcp->tcp_sack_info != NULL) {
690 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
691 			kmem_cache_free(tcp_sack_info_cache,
692 			    tcp->tcp_sack_info);
693 		}
694 
695 		/*
696 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
697 		 * the mblk.
698 		 */
699 		if (tcp->tcp_rsrv_mp != NULL) {
700 			freeb(tcp->tcp_rsrv_mp);
701 			tcp->tcp_rsrv_mp = NULL;
702 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
703 		}
704 
705 		ipcl_conn_cleanup(connp);
706 		connp->conn_flags = IPCL_TCPCONN;
707 		if (ns != NULL) {
708 			ASSERT(tcp->tcp_tcps == NULL);
709 			connp->conn_netstack = NULL;
710 			connp->conn_ixa->ixa_ipst = NULL;
711 			netstack_rele(ns);
712 		}
713 
714 		bzero(tcp, sizeof (tcp_t));
715 
716 		tcp->tcp_timercache = mp;
717 		tcp->tcp_connp = connp;
718 		kmem_cache_free(tcp_conn_cache, connp);
719 		return;
720 	}
721 
722 	if (connp->conn_flags & IPCL_SCTPCONN) {
723 		ASSERT(ns != NULL);
724 		sctp_free(connp);
725 		return;
726 	}
727 
728 	ipcl_conn_cleanup(connp);
729 	if (ns != NULL) {
730 		connp->conn_netstack = NULL;
731 		connp->conn_ixa->ixa_ipst = NULL;
732 		netstack_rele(ns);
733 	}
734 
735 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
736 	if (connp->conn_flags & IPCL_UDPCONN) {
737 		connp->conn_flags = IPCL_UDPCONN;
738 		kmem_cache_free(udp_conn_cache, connp);
739 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
740 		connp->conn_flags = IPCL_RAWIPCONN;
741 		connp->conn_proto = IPPROTO_ICMP;
742 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
743 		kmem_cache_free(rawip_conn_cache, connp);
744 	} else if (connp->conn_flags & IPCL_RTSCONN) {
745 		connp->conn_flags = IPCL_RTSCONN;
746 		kmem_cache_free(rts_conn_cache, connp);
747 	} else {
748 		connp->conn_flags = IPCL_IPCCONN;
749 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
750 		ASSERT(connp->conn_priv == NULL);
751 		kmem_cache_free(ip_conn_cache, connp);
752 	}
753 }
754 
755 /*
756  * Running in cluster mode - deregister listener information
757  */
758 static void
759 ipcl_conn_unlisten(conn_t *connp)
760 {
761 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
762 	ASSERT(connp->conn_lport != 0);
763 
764 	if (cl_inet_unlisten != NULL) {
765 		sa_family_t	addr_family;
766 		uint8_t		*laddrp;
767 
768 		if (connp->conn_ipversion == IPV6_VERSION) {
769 			addr_family = AF_INET6;
770 			laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
771 		} else {
772 			addr_family = AF_INET;
773 			laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
774 		}
775 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
776 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
777 	}
778 	connp->conn_flags &= ~IPCL_CL_LISTENER;
779 }
780 
781 /*
782  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
783  * which table the conn belonged to). So for debugging we can see which hash
784  * table this connection was in.
785  */
786 #define	IPCL_HASH_REMOVE(connp)	{					\
787 	connf_t	*connfp = (connp)->conn_fanout;				\
788 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
789 	if (connfp != NULL) {						\
790 		mutex_enter(&connfp->connf_lock);			\
791 		if ((connp)->conn_next != NULL)				\
792 			(connp)->conn_next->conn_prev =			\
793 			    (connp)->conn_prev;				\
794 		if ((connp)->conn_prev != NULL)				\
795 			(connp)->conn_prev->conn_next =			\
796 			    (connp)->conn_next;				\
797 		else							\
798 			connfp->connf_head = (connp)->conn_next;	\
799 		(connp)->conn_fanout = NULL;				\
800 		(connp)->conn_next = NULL;				\
801 		(connp)->conn_prev = NULL;				\
802 		(connp)->conn_flags |= IPCL_REMOVED;			\
803 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
804 			ipcl_conn_unlisten((connp));			\
805 		CONN_DEC_REF((connp));					\
806 		mutex_exit(&connfp->connf_lock);			\
807 	}								\
808 }
809 
810 void
811 ipcl_hash_remove(conn_t *connp)
812 {
813 	uint8_t		protocol = connp->conn_proto;
814 
815 	IPCL_HASH_REMOVE(connp);
816 	if (protocol == IPPROTO_RSVP)
817 		ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
818 }
819 
820 /*
821  * The whole purpose of this function is allow removal of
822  * a conn_t from the connected hash for timewait reclaim.
823  * This is essentially a TW reclaim fastpath where timewait
824  * collector checks under fanout lock (so no one else can
825  * get access to the conn_t) that refcnt is 2 i.e. one for
826  * TCP and one for the classifier hash list. If ref count
827  * is indeed 2, we can just remove the conn under lock and
828  * avoid cleaning up the conn under squeue. This gives us
829  * improved performance.
830  */
831 void
832 ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
833 {
834 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
835 	ASSERT(MUTEX_HELD(&connp->conn_lock));
836 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
837 
838 	if ((connp)->conn_next != NULL) {
839 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
840 	}
841 	if ((connp)->conn_prev != NULL) {
842 		(connp)->conn_prev->conn_next = (connp)->conn_next;
843 	} else {
844 		connfp->connf_head = (connp)->conn_next;
845 	}
846 	(connp)->conn_fanout = NULL;
847 	(connp)->conn_next = NULL;
848 	(connp)->conn_prev = NULL;
849 	(connp)->conn_flags |= IPCL_REMOVED;
850 	ASSERT((connp)->conn_ref == 2);
851 	(connp)->conn_ref--;
852 }
853 
854 #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
855 	ASSERT((connp)->conn_fanout == NULL);				\
856 	ASSERT((connp)->conn_next == NULL);				\
857 	ASSERT((connp)->conn_prev == NULL);				\
858 	if ((connfp)->connf_head != NULL) {				\
859 		(connfp)->connf_head->conn_prev = (connp);		\
860 		(connp)->conn_next = (connfp)->connf_head;		\
861 	}								\
862 	(connp)->conn_fanout = (connfp);				\
863 	(connfp)->connf_head = (connp);					\
864 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
865 	    IPCL_CONNECTED;						\
866 	CONN_INC_REF(connp);						\
867 }
868 
869 #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
870 	IPCL_HASH_REMOVE((connp));					\
871 	mutex_enter(&(connfp)->connf_lock);				\
872 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
873 	mutex_exit(&(connfp)->connf_lock);				\
874 }
875 
876 #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
877 	conn_t *pconnp = NULL, *nconnp;					\
878 	IPCL_HASH_REMOVE((connp));					\
879 	mutex_enter(&(connfp)->connf_lock);				\
880 	nconnp = (connfp)->connf_head;					\
881 	while (nconnp != NULL &&					\
882 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {		\
883 		pconnp = nconnp;					\
884 		nconnp = nconnp->conn_next;				\
885 	}								\
886 	if (pconnp != NULL) {						\
887 		pconnp->conn_next = (connp);				\
888 		(connp)->conn_prev = pconnp;				\
889 	} else {							\
890 		(connfp)->connf_head = (connp);				\
891 	}								\
892 	if (nconnp != NULL) {						\
893 		(connp)->conn_next = nconnp;				\
894 		nconnp->conn_prev = (connp);				\
895 	}								\
896 	(connp)->conn_fanout = (connfp);				\
897 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
898 	    IPCL_BOUND;							\
899 	CONN_INC_REF(connp);						\
900 	mutex_exit(&(connfp)->connf_lock);				\
901 }
902 
903 #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
904 	conn_t **list, *prev, *next;					\
905 	boolean_t isv4mapped =						\
906 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);		\
907 	IPCL_HASH_REMOVE((connp));					\
908 	mutex_enter(&(connfp)->connf_lock);				\
909 	list = &(connfp)->connf_head;					\
910 	prev = NULL;							\
911 	while ((next = *list) != NULL) {				\
912 		if (isv4mapped &&					\
913 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&	\
914 		    connp->conn_zoneid == next->conn_zoneid) {		\
915 			(connp)->conn_next = next;			\
916 			if (prev != NULL)				\
917 				prev = next->conn_prev;			\
918 			next->conn_prev = (connp);			\
919 			break;						\
920 		}							\
921 		list = &next->conn_next;				\
922 		prev = next;						\
923 	}								\
924 	(connp)->conn_prev = prev;					\
925 	*list = (connp);						\
926 	(connp)->conn_fanout = (connfp);				\
927 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
928 	    IPCL_BOUND;							\
929 	CONN_INC_REF((connp));						\
930 	mutex_exit(&(connfp)->connf_lock);				\
931 }
932 
933 void
934 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
935 {
936 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
937 }
938 
939 /*
940  * Because the classifier is used to classify inbound packets, the destination
941  * address is meant to be our local tunnel address (tunnel source), and the
942  * source the remote tunnel address (tunnel destination).
943  *
944  * Note that conn_proto can't be used for fanout since the upper protocol
945  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
946  */
947 conn_t *
948 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
949 {
950 	connf_t	*connfp;
951 	conn_t	*connp;
952 
953 	/* first look for IPv4 tunnel links */
954 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
955 	mutex_enter(&connfp->connf_lock);
956 	for (connp = connfp->connf_head; connp != NULL;
957 	    connp = connp->conn_next) {
958 		if (IPCL_IPTUN_MATCH(connp, *dst, *src))
959 			break;
960 	}
961 	if (connp != NULL)
962 		goto done;
963 
964 	mutex_exit(&connfp->connf_lock);
965 
966 	/* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
967 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
968 	    INADDR_ANY)];
969 	mutex_enter(&connfp->connf_lock);
970 	for (connp = connfp->connf_head; connp != NULL;
971 	    connp = connp->conn_next) {
972 		if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
973 			break;
974 	}
975 done:
976 	if (connp != NULL)
977 		CONN_INC_REF(connp);
978 	mutex_exit(&connfp->connf_lock);
979 	return (connp);
980 }
981 
982 conn_t *
983 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
984 {
985 	connf_t	*connfp;
986 	conn_t	*connp;
987 
988 	/* Look for an IPv6 tunnel link */
989 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
990 	mutex_enter(&connfp->connf_lock);
991 	for (connp = connfp->connf_head; connp != NULL;
992 	    connp = connp->conn_next) {
993 		if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
994 			CONN_INC_REF(connp);
995 			break;
996 		}
997 	}
998 	mutex_exit(&connfp->connf_lock);
999 	return (connp);
1000 }
1001 
1002 /*
1003  * This function is used only for inserting SCTP raw socket now.
1004  * This may change later.
1005  *
1006  * Note that only one raw socket can be bound to a port.  The param
1007  * lport is in network byte order.
1008  */
1009 static int
1010 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1011 {
1012 	connf_t	*connfp;
1013 	conn_t	*oconnp;
1014 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1015 
1016 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1017 
1018 	/* Check for existing raw socket already bound to the port. */
1019 	mutex_enter(&connfp->connf_lock);
1020 	for (oconnp = connfp->connf_head; oconnp != NULL;
1021 	    oconnp = oconnp->conn_next) {
1022 		if (oconnp->conn_lport == lport &&
1023 		    oconnp->conn_zoneid == connp->conn_zoneid &&
1024 		    oconnp->conn_family == connp->conn_family &&
1025 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1026 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1027 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1028 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1029 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1030 		    &connp->conn_laddr_v6))) {
1031 			break;
1032 		}
1033 	}
1034 	mutex_exit(&connfp->connf_lock);
1035 	if (oconnp != NULL)
1036 		return (EADDRNOTAVAIL);
1037 
1038 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1039 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1040 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1041 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1042 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1043 		} else {
1044 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1045 		}
1046 	} else {
1047 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1048 	}
1049 	return (0);
1050 }
1051 
1052 static int
1053 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1054 {
1055 	connf_t	*connfp;
1056 	conn_t	*tconnp;
1057 	ipaddr_t laddr = connp->conn_laddr_v4;
1058 	ipaddr_t faddr = connp->conn_faddr_v4;
1059 
1060 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1061 	mutex_enter(&connfp->connf_lock);
1062 	for (tconnp = connfp->connf_head; tconnp != NULL;
1063 	    tconnp = tconnp->conn_next) {
1064 		if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1065 			/* A tunnel is already bound to these addresses. */
1066 			mutex_exit(&connfp->connf_lock);
1067 			return (EADDRINUSE);
1068 		}
1069 	}
1070 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1071 	mutex_exit(&connfp->connf_lock);
1072 	return (0);
1073 }
1074 
1075 static int
1076 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1077 {
1078 	connf_t	*connfp;
1079 	conn_t	*tconnp;
1080 	in6_addr_t *laddr = &connp->conn_laddr_v6;
1081 	in6_addr_t *faddr = &connp->conn_faddr_v6;
1082 
1083 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1084 	mutex_enter(&connfp->connf_lock);
1085 	for (tconnp = connfp->connf_head; tconnp != NULL;
1086 	    tconnp = tconnp->conn_next) {
1087 		if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1088 			/* A tunnel is already bound to these addresses. */
1089 			mutex_exit(&connfp->connf_lock);
1090 			return (EADDRINUSE);
1091 		}
1092 	}
1093 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1094 	mutex_exit(&connfp->connf_lock);
1095 	return (0);
1096 }
1097 
1098 /*
1099  * Check for a MAC exemption conflict on a labeled system.  Note that for
1100  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1101  * transport layer.  This check is for binding all other protocols.
1102  *
1103  * Returns true if there's a conflict.
1104  */
1105 static boolean_t
1106 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1107 {
1108 	connf_t	*connfp;
1109 	conn_t *tconn;
1110 
1111 	connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1112 	mutex_enter(&connfp->connf_lock);
1113 	for (tconn = connfp->connf_head; tconn != NULL;
1114 	    tconn = tconn->conn_next) {
1115 		/* We don't allow v4 fallback for v6 raw socket */
1116 		if (connp->conn_family != tconn->conn_family)
1117 			continue;
1118 		/* If neither is exempt, then there's no conflict */
1119 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1120 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1121 			continue;
1122 		/* We are only concerned about sockets for a different zone */
1123 		if (connp->conn_zoneid == tconn->conn_zoneid)
1124 			continue;
1125 		/* If both are bound to different specific addrs, ok */
1126 		if (connp->conn_laddr_v4 != INADDR_ANY &&
1127 		    tconn->conn_laddr_v4 != INADDR_ANY &&
1128 		    connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1129 			continue;
1130 		/* These two conflict; fail */
1131 		break;
1132 	}
1133 	mutex_exit(&connfp->connf_lock);
1134 	return (tconn != NULL);
1135 }
1136 
1137 static boolean_t
1138 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1139 {
1140 	connf_t	*connfp;
1141 	conn_t *tconn;
1142 
1143 	connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1144 	mutex_enter(&connfp->connf_lock);
1145 	for (tconn = connfp->connf_head; tconn != NULL;
1146 	    tconn = tconn->conn_next) {
1147 		/* We don't allow v4 fallback for v6 raw socket */
1148 		if (connp->conn_family != tconn->conn_family)
1149 			continue;
1150 		/* If neither is exempt, then there's no conflict */
1151 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1152 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1153 			continue;
1154 		/* We are only concerned about sockets for a different zone */
1155 		if (connp->conn_zoneid == tconn->conn_zoneid)
1156 			continue;
1157 		/* If both are bound to different addrs, ok */
1158 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1159 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1160 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1161 		    &tconn->conn_laddr_v6))
1162 			continue;
1163 		/* These two conflict; fail */
1164 		break;
1165 	}
1166 	mutex_exit(&connfp->connf_lock);
1167 	return (tconn != NULL);
1168 }
1169 
1170 /*
1171  * (v4, v6) bind hash insertion routines
1172  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1173  */
1174 
1175 int
1176 ipcl_bind_insert(conn_t *connp)
1177 {
1178 	if (connp->conn_ipversion == IPV6_VERSION)
1179 		return (ipcl_bind_insert_v6(connp));
1180 	else
1181 		return (ipcl_bind_insert_v4(connp));
1182 }
1183 
1184 int
1185 ipcl_bind_insert_v4(conn_t *connp)
1186 {
1187 	connf_t	*connfp;
1188 	int	ret = 0;
1189 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1190 	uint16_t	lport = connp->conn_lport;
1191 	uint8_t		protocol = connp->conn_proto;
1192 
1193 	if (IPCL_IS_IPTUN(connp))
1194 		return (ipcl_iptun_hash_insert(connp, ipst));
1195 
1196 	switch (protocol) {
1197 	default:
1198 		if (is_system_labeled() &&
1199 		    check_exempt_conflict_v4(connp, ipst))
1200 			return (EADDRINUSE);
1201 		/* FALLTHROUGH */
1202 	case IPPROTO_UDP:
1203 		if (protocol == IPPROTO_UDP) {
1204 			connfp = &ipst->ips_ipcl_udp_fanout[
1205 			    IPCL_UDP_HASH(lport, ipst)];
1206 		} else {
1207 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1208 		}
1209 
1210 		if (connp->conn_faddr_v4 != INADDR_ANY) {
1211 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1212 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
1213 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1214 		} else {
1215 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1216 		}
1217 		if (protocol == IPPROTO_RSVP)
1218 			ill_set_inputfn_all(ipst);
1219 		break;
1220 
1221 	case IPPROTO_TCP:
1222 		/* Insert it in the Bind Hash */
1223 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1224 		connfp = &ipst->ips_ipcl_bind_fanout[
1225 		    IPCL_BIND_HASH(lport, ipst)];
1226 		if (connp->conn_laddr_v4 != INADDR_ANY) {
1227 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1228 		} else {
1229 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1230 		}
1231 		if (cl_inet_listen != NULL) {
1232 			ASSERT(connp->conn_ipversion == IPV4_VERSION);
1233 			connp->conn_flags |= IPCL_CL_LISTENER;
1234 			(*cl_inet_listen)(
1235 			    connp->conn_netstack->netstack_stackid,
1236 			    IPPROTO_TCP, AF_INET,
1237 			    (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1238 		}
1239 		break;
1240 
1241 	case IPPROTO_SCTP:
1242 		ret = ipcl_sctp_hash_insert(connp, lport);
1243 		break;
1244 	}
1245 
1246 	return (ret);
1247 }
1248 
1249 int
1250 ipcl_bind_insert_v6(conn_t *connp)
1251 {
1252 	connf_t		*connfp;
1253 	int		ret = 0;
1254 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1255 	uint16_t	lport = connp->conn_lport;
1256 	uint8_t		protocol = connp->conn_proto;
1257 
1258 	if (IPCL_IS_IPTUN(connp)) {
1259 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
1260 	}
1261 
1262 	switch (protocol) {
1263 	default:
1264 		if (is_system_labeled() &&
1265 		    check_exempt_conflict_v6(connp, ipst))
1266 			return (EADDRINUSE);
1267 		/* FALLTHROUGH */
1268 	case IPPROTO_UDP:
1269 		if (protocol == IPPROTO_UDP) {
1270 			connfp = &ipst->ips_ipcl_udp_fanout[
1271 			    IPCL_UDP_HASH(lport, ipst)];
1272 		} else {
1273 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1274 		}
1275 
1276 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1277 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1278 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1279 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1280 		} else {
1281 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1282 		}
1283 		break;
1284 
1285 	case IPPROTO_TCP:
1286 		/* Insert it in the Bind Hash */
1287 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1288 		connfp = &ipst->ips_ipcl_bind_fanout[
1289 		    IPCL_BIND_HASH(lport, ipst)];
1290 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1291 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1292 		} else {
1293 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1294 		}
1295 		if (cl_inet_listen != NULL) {
1296 			sa_family_t	addr_family;
1297 			uint8_t		*laddrp;
1298 
1299 			if (connp->conn_ipversion == IPV6_VERSION) {
1300 				addr_family = AF_INET6;
1301 				laddrp =
1302 				    (uint8_t *)&connp->conn_bound_addr_v6;
1303 			} else {
1304 				addr_family = AF_INET;
1305 				laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1306 			}
1307 			connp->conn_flags |= IPCL_CL_LISTENER;
1308 			(*cl_inet_listen)(
1309 			    connp->conn_netstack->netstack_stackid,
1310 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1311 		}
1312 		break;
1313 
1314 	case IPPROTO_SCTP:
1315 		ret = ipcl_sctp_hash_insert(connp, lport);
1316 		break;
1317 	}
1318 
1319 	return (ret);
1320 }
1321 
1322 /*
1323  * ipcl_conn_hash insertion routines.
1324  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1325  */
1326 
1327 int
1328 ipcl_conn_insert(conn_t *connp)
1329 {
1330 	if (connp->conn_ipversion == IPV6_VERSION)
1331 		return (ipcl_conn_insert_v6(connp));
1332 	else
1333 		return (ipcl_conn_insert_v4(connp));
1334 }
1335 
1336 int
1337 ipcl_conn_insert_v4(conn_t *connp)
1338 {
1339 	connf_t		*connfp;
1340 	conn_t		*tconnp;
1341 	int		ret = 0;
1342 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1343 	uint16_t	lport = connp->conn_lport;
1344 	uint8_t		protocol = connp->conn_proto;
1345 
1346 	if (IPCL_IS_IPTUN(connp))
1347 		return (ipcl_iptun_hash_insert(connp, ipst));
1348 
1349 	switch (protocol) {
1350 	case IPPROTO_TCP:
1351 		/*
1352 		 * For TCP, we check whether the connection tuple already
1353 		 * exists before allowing the connection to proceed.  We
1354 		 * also allow indexing on the zoneid. This is to allow
1355 		 * multiple shared stack zones to have the same tcp
1356 		 * connection tuple. In practice this only happens for
1357 		 * INADDR_LOOPBACK as it's the only local address which
1358 		 * doesn't have to be unique.
1359 		 */
1360 		connfp = &ipst->ips_ipcl_conn_fanout[
1361 		    IPCL_CONN_HASH(connp->conn_faddr_v4,
1362 		    connp->conn_ports, ipst)];
1363 		mutex_enter(&connfp->connf_lock);
1364 		for (tconnp = connfp->connf_head; tconnp != NULL;
1365 		    tconnp = tconnp->conn_next) {
1366 			if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1367 			    connp->conn_faddr_v4, connp->conn_laddr_v4,
1368 			    connp->conn_ports) &&
1369 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1370 				/* Already have a conn. bail out */
1371 				mutex_exit(&connfp->connf_lock);
1372 				return (EADDRINUSE);
1373 			}
1374 		}
1375 		if (connp->conn_fanout != NULL) {
1376 			/*
1377 			 * Probably a XTI/TLI application trying to do a
1378 			 * rebind. Let it happen.
1379 			 */
1380 			mutex_exit(&connfp->connf_lock);
1381 			IPCL_HASH_REMOVE(connp);
1382 			mutex_enter(&connfp->connf_lock);
1383 		}
1384 
1385 		ASSERT(connp->conn_recv != NULL);
1386 		ASSERT(connp->conn_recvicmp != NULL);
1387 
1388 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1389 		mutex_exit(&connfp->connf_lock);
1390 		break;
1391 
1392 	case IPPROTO_SCTP:
1393 		/*
1394 		 * The raw socket may have already been bound, remove it
1395 		 * from the hash first.
1396 		 */
1397 		IPCL_HASH_REMOVE(connp);
1398 		ret = ipcl_sctp_hash_insert(connp, lport);
1399 		break;
1400 
1401 	default:
1402 		/*
1403 		 * Check for conflicts among MAC exempt bindings.  For
1404 		 * transports with port numbers, this is done by the upper
1405 		 * level per-transport binding logic.  For all others, it's
1406 		 * done here.
1407 		 */
1408 		if (is_system_labeled() &&
1409 		    check_exempt_conflict_v4(connp, ipst))
1410 			return (EADDRINUSE);
1411 		/* FALLTHROUGH */
1412 
1413 	case IPPROTO_UDP:
1414 		if (protocol == IPPROTO_UDP) {
1415 			connfp = &ipst->ips_ipcl_udp_fanout[
1416 			    IPCL_UDP_HASH(lport, ipst)];
1417 		} else {
1418 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1419 		}
1420 
1421 		if (connp->conn_faddr_v4 != INADDR_ANY) {
1422 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1423 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
1424 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1425 		} else {
1426 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1427 		}
1428 		break;
1429 	}
1430 
1431 	return (ret);
1432 }
1433 
1434 int
1435 ipcl_conn_insert_v6(conn_t *connp)
1436 {
1437 	connf_t		*connfp;
1438 	conn_t		*tconnp;
1439 	int		ret = 0;
1440 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1441 	uint16_t	lport = connp->conn_lport;
1442 	uint8_t		protocol = connp->conn_proto;
1443 	uint_t		ifindex = connp->conn_bound_if;
1444 
1445 	if (IPCL_IS_IPTUN(connp))
1446 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
1447 
1448 	switch (protocol) {
1449 	case IPPROTO_TCP:
1450 
1451 		/*
1452 		 * For tcp, we check whether the connection tuple already
1453 		 * exists before allowing the connection to proceed.  We
1454 		 * also allow indexing on the zoneid. This is to allow
1455 		 * multiple shared stack zones to have the same tcp
1456 		 * connection tuple. In practice this only happens for
1457 		 * ipv6_loopback as it's the only local address which
1458 		 * doesn't have to be unique.
1459 		 */
1460 		connfp = &ipst->ips_ipcl_conn_fanout[
1461 		    IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1462 		    ipst)];
1463 		mutex_enter(&connfp->connf_lock);
1464 		for (tconnp = connfp->connf_head; tconnp != NULL;
1465 		    tconnp = tconnp->conn_next) {
1466 			/* NOTE: need to match zoneid. Bug in onnv-gate */
1467 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1468 			    connp->conn_faddr_v6, connp->conn_laddr_v6,
1469 			    connp->conn_ports) &&
1470 			    (tconnp->conn_bound_if == 0 ||
1471 			    tconnp->conn_bound_if == ifindex) &&
1472 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1473 				/* Already have a conn. bail out */
1474 				mutex_exit(&connfp->connf_lock);
1475 				return (EADDRINUSE);
1476 			}
1477 		}
1478 		if (connp->conn_fanout != NULL) {
1479 			/*
1480 			 * Probably a XTI/TLI application trying to do a
1481 			 * rebind. Let it happen.
1482 			 */
1483 			mutex_exit(&connfp->connf_lock);
1484 			IPCL_HASH_REMOVE(connp);
1485 			mutex_enter(&connfp->connf_lock);
1486 		}
1487 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1488 		mutex_exit(&connfp->connf_lock);
1489 		break;
1490 
1491 	case IPPROTO_SCTP:
1492 		IPCL_HASH_REMOVE(connp);
1493 		ret = ipcl_sctp_hash_insert(connp, lport);
1494 		break;
1495 
1496 	default:
1497 		if (is_system_labeled() &&
1498 		    check_exempt_conflict_v6(connp, ipst))
1499 			return (EADDRINUSE);
1500 		/* FALLTHROUGH */
1501 	case IPPROTO_UDP:
1502 		if (protocol == IPPROTO_UDP) {
1503 			connfp = &ipst->ips_ipcl_udp_fanout[
1504 			    IPCL_UDP_HASH(lport, ipst)];
1505 		} else {
1506 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1507 		}
1508 
1509 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1510 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1511 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1512 			IPCL_HASH_INSERT_BOUND(connfp, connp);
1513 		} else {
1514 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1515 		}
1516 		break;
1517 	}
1518 
1519 	return (ret);
1520 }
1521 
1522 /*
1523  * v4 packet classifying function. looks up the fanout table to
1524  * find the conn, the packet belongs to. returns the conn with
1525  * the reference held, null otherwise.
1526  *
1527  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1528  * Lookup" comment block are applied.  Labels are also checked as described
1529  * above.  If the packet is from the inside (looped back), and is from the same
1530  * zone, then label checks are omitted.
1531  */
1532 conn_t *
1533 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1534     ip_recv_attr_t *ira, ip_stack_t *ipst)
1535 {
1536 	ipha_t	*ipha;
1537 	connf_t	*connfp, *bind_connfp;
1538 	uint16_t lport;
1539 	uint16_t fport;
1540 	uint32_t ports;
1541 	conn_t	*connp;
1542 	uint16_t  *up;
1543 	zoneid_t	zoneid = ira->ira_zoneid;
1544 
1545 	ipha = (ipha_t *)mp->b_rptr;
1546 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1547 
1548 	switch (protocol) {
1549 	case IPPROTO_TCP:
1550 		ports = *(uint32_t *)up;
1551 		connfp =
1552 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1553 		    ports, ipst)];
1554 		mutex_enter(&connfp->connf_lock);
1555 		for (connp = connfp->connf_head; connp != NULL;
1556 		    connp = connp->conn_next) {
1557 			if (IPCL_CONN_MATCH(connp, protocol,
1558 			    ipha->ipha_src, ipha->ipha_dst, ports) &&
1559 			    (connp->conn_zoneid == zoneid ||
1560 			    connp->conn_allzones ||
1561 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1562 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1563 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1564 				break;
1565 		}
1566 
1567 		if (connp != NULL) {
1568 			/*
1569 			 * We have a fully-bound TCP connection.
1570 			 *
1571 			 * For labeled systems, there's no need to check the
1572 			 * label here.  It's known to be good as we checked
1573 			 * before allowing the connection to become bound.
1574 			 */
1575 			CONN_INC_REF(connp);
1576 			mutex_exit(&connfp->connf_lock);
1577 			return (connp);
1578 		}
1579 
1580 		mutex_exit(&connfp->connf_lock);
1581 		lport = up[1];
1582 		bind_connfp =
1583 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1584 		mutex_enter(&bind_connfp->connf_lock);
1585 		for (connp = bind_connfp->connf_head; connp != NULL;
1586 		    connp = connp->conn_next) {
1587 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1588 			    lport) &&
1589 			    (connp->conn_zoneid == zoneid ||
1590 			    connp->conn_allzones ||
1591 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1592 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1593 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1594 				break;
1595 		}
1596 
1597 		/*
1598 		 * If the matching connection is SLP on a private address, then
1599 		 * the label on the packet must match the local zone's label.
1600 		 * Otherwise, it must be in the label range defined by tnrh.
1601 		 * This is ensured by tsol_receive_local.
1602 		 *
1603 		 * Note that we don't check tsol_receive_local for
1604 		 * the connected case.
1605 		 */
1606 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1607 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1608 		    ira, connp)) {
1609 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1610 			    char *, "connp(1) could not receive mp(2)",
1611 			    conn_t *, connp, mblk_t *, mp);
1612 			connp = NULL;
1613 		}
1614 
1615 		if (connp != NULL) {
1616 			/* Have a listener at least */
1617 			CONN_INC_REF(connp);
1618 			mutex_exit(&bind_connfp->connf_lock);
1619 			return (connp);
1620 		}
1621 
1622 		mutex_exit(&bind_connfp->connf_lock);
1623 		break;
1624 
1625 	case IPPROTO_UDP:
1626 		lport = up[1];
1627 		fport = up[0];
1628 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1629 		mutex_enter(&connfp->connf_lock);
1630 		for (connp = connfp->connf_head; connp != NULL;
1631 		    connp = connp->conn_next) {
1632 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1633 			    fport, ipha->ipha_src) &&
1634 			    (connp->conn_zoneid == zoneid ||
1635 			    connp->conn_allzones ||
1636 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1637 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1638 				break;
1639 		}
1640 
1641 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1642 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1643 		    ira, connp)) {
1644 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1645 			    char *, "connp(1) could not receive mp(2)",
1646 			    conn_t *, connp, mblk_t *, mp);
1647 			connp = NULL;
1648 		}
1649 
1650 		if (connp != NULL) {
1651 			CONN_INC_REF(connp);
1652 			mutex_exit(&connfp->connf_lock);
1653 			return (connp);
1654 		}
1655 
1656 		/*
1657 		 * We shouldn't come here for multicast/broadcast packets
1658 		 */
1659 		mutex_exit(&connfp->connf_lock);
1660 
1661 		break;
1662 
1663 	case IPPROTO_ENCAP:
1664 	case IPPROTO_IPV6:
1665 		return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1666 		    &ipha->ipha_dst, ipst));
1667 	}
1668 
1669 	return (NULL);
1670 }
1671 
1672 conn_t *
1673 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1674     ip_recv_attr_t *ira, ip_stack_t *ipst)
1675 {
1676 	ip6_t		*ip6h;
1677 	connf_t		*connfp, *bind_connfp;
1678 	uint16_t	lport;
1679 	uint16_t	fport;
1680 	tcpha_t		*tcpha;
1681 	uint32_t	ports;
1682 	conn_t		*connp;
1683 	uint16_t	*up;
1684 	zoneid_t	zoneid = ira->ira_zoneid;
1685 
1686 	ip6h = (ip6_t *)mp->b_rptr;
1687 
1688 	switch (protocol) {
1689 	case IPPROTO_TCP:
1690 		tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1691 		up = &tcpha->tha_lport;
1692 		ports = *(uint32_t *)up;
1693 
1694 		connfp =
1695 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1696 		    ports, ipst)];
1697 		mutex_enter(&connfp->connf_lock);
1698 		for (connp = connfp->connf_head; connp != NULL;
1699 		    connp = connp->conn_next) {
1700 			if (IPCL_CONN_MATCH_V6(connp, protocol,
1701 			    ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1702 			    (connp->conn_zoneid == zoneid ||
1703 			    connp->conn_allzones ||
1704 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1705 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1706 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1707 				break;
1708 		}
1709 
1710 		if (connp != NULL) {
1711 			/*
1712 			 * We have a fully-bound TCP connection.
1713 			 *
1714 			 * For labeled systems, there's no need to check the
1715 			 * label here.  It's known to be good as we checked
1716 			 * before allowing the connection to become bound.
1717 			 */
1718 			CONN_INC_REF(connp);
1719 			mutex_exit(&connfp->connf_lock);
1720 			return (connp);
1721 		}
1722 
1723 		mutex_exit(&connfp->connf_lock);
1724 
1725 		lport = up[1];
1726 		bind_connfp =
1727 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1728 		mutex_enter(&bind_connfp->connf_lock);
1729 		for (connp = bind_connfp->connf_head; connp != NULL;
1730 		    connp = connp->conn_next) {
1731 			if (IPCL_BIND_MATCH_V6(connp, protocol,
1732 			    ip6h->ip6_dst, lport) &&
1733 			    (connp->conn_zoneid == zoneid ||
1734 			    connp->conn_allzones ||
1735 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1736 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1737 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1738 				break;
1739 		}
1740 
1741 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1742 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1743 		    ira, connp)) {
1744 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1745 			    char *, "connp(1) could not receive mp(2)",
1746 			    conn_t *, connp, mblk_t *, mp);
1747 			connp = NULL;
1748 		}
1749 
1750 		if (connp != NULL) {
1751 			/* Have a listner at least */
1752 			CONN_INC_REF(connp);
1753 			mutex_exit(&bind_connfp->connf_lock);
1754 			return (connp);
1755 		}
1756 
1757 		mutex_exit(&bind_connfp->connf_lock);
1758 		break;
1759 
1760 	case IPPROTO_UDP:
1761 		up = (uint16_t *)&mp->b_rptr[hdr_len];
1762 		lport = up[1];
1763 		fport = up[0];
1764 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1765 		mutex_enter(&connfp->connf_lock);
1766 		for (connp = connfp->connf_head; connp != NULL;
1767 		    connp = connp->conn_next) {
1768 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1769 			    fport, ip6h->ip6_src) &&
1770 			    (connp->conn_zoneid == zoneid ||
1771 			    connp->conn_allzones ||
1772 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1773 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1774 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1775 				break;
1776 		}
1777 
1778 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1779 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1780 		    ira, connp)) {
1781 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1782 			    char *, "connp(1) could not receive mp(2)",
1783 			    conn_t *, connp, mblk_t *, mp);
1784 			connp = NULL;
1785 		}
1786 
1787 		if (connp != NULL) {
1788 			CONN_INC_REF(connp);
1789 			mutex_exit(&connfp->connf_lock);
1790 			return (connp);
1791 		}
1792 
1793 		/*
1794 		 * We shouldn't come here for multicast/broadcast packets
1795 		 */
1796 		mutex_exit(&connfp->connf_lock);
1797 		break;
1798 	case IPPROTO_ENCAP:
1799 	case IPPROTO_IPV6:
1800 		return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1801 		    &ip6h->ip6_dst, ipst));
1802 	}
1803 
1804 	return (NULL);
1805 }
1806 
1807 /*
1808  * wrapper around ipcl_classify_(v4,v6) routines.
1809  */
1810 conn_t *
1811 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1812 {
1813 	if (ira->ira_flags & IRAF_IS_IPV4) {
1814 		return (ipcl_classify_v4(mp, ira->ira_protocol,
1815 		    ira->ira_ip_hdr_length, ira, ipst));
1816 	} else {
1817 		return (ipcl_classify_v6(mp, ira->ira_protocol,
1818 		    ira->ira_ip_hdr_length, ira, ipst));
1819 	}
1820 }
1821 
1822 /*
1823  * Only used to classify SCTP RAW sockets
1824  */
1825 conn_t *
1826 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1827     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1828 {
1829 	connf_t		*connfp;
1830 	conn_t		*connp;
1831 	in_port_t	lport;
1832 	int		ipversion;
1833 	const void	*dst;
1834 	zoneid_t	zoneid = ira->ira_zoneid;
1835 
1836 	lport = ((uint16_t *)&ports)[1];
1837 	if (ira->ira_flags & IRAF_IS_IPV4) {
1838 		dst = (const void *)&ipha->ipha_dst;
1839 		ipversion = IPV4_VERSION;
1840 	} else {
1841 		dst = (const void *)&ip6h->ip6_dst;
1842 		ipversion = IPV6_VERSION;
1843 	}
1844 
1845 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1846 	mutex_enter(&connfp->connf_lock);
1847 	for (connp = connfp->connf_head; connp != NULL;
1848 	    connp = connp->conn_next) {
1849 		/* We don't allow v4 fallback for v6 raw socket. */
1850 		if (ipversion != connp->conn_ipversion)
1851 			continue;
1852 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1853 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1854 			if (ipversion == IPV4_VERSION) {
1855 				if (!IPCL_CONN_MATCH(connp, protocol,
1856 				    ipha->ipha_src, ipha->ipha_dst, ports))
1857 					continue;
1858 			} else {
1859 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
1860 				    ip6h->ip6_src, ip6h->ip6_dst, ports))
1861 					continue;
1862 			}
1863 		} else {
1864 			if (ipversion == IPV4_VERSION) {
1865 				if (!IPCL_BIND_MATCH(connp, protocol,
1866 				    ipha->ipha_dst, lport))
1867 					continue;
1868 			} else {
1869 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
1870 				    ip6h->ip6_dst, lport))
1871 					continue;
1872 			}
1873 		}
1874 
1875 		if (connp->conn_zoneid == zoneid ||
1876 		    connp->conn_allzones ||
1877 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1878 		    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1879 		    (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1880 			break;
1881 	}
1882 
1883 	if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1884 	    !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1885 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1886 		    char *, "connp(1) could not receive mp(2)",
1887 		    conn_t *, connp, mblk_t *, mp);
1888 		connp = NULL;
1889 	}
1890 
1891 	if (connp != NULL)
1892 		goto found;
1893 	mutex_exit(&connfp->connf_lock);
1894 
1895 	/* Try to look for a wildcard SCTP RAW socket match. */
1896 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1897 	mutex_enter(&connfp->connf_lock);
1898 	for (connp = connfp->connf_head; connp != NULL;
1899 	    connp = connp->conn_next) {
1900 		/* We don't allow v4 fallback for v6 raw socket. */
1901 		if (ipversion != connp->conn_ipversion)
1902 			continue;
1903 		if (!IPCL_ZONE_MATCH(connp, zoneid))
1904 			continue;
1905 
1906 		if (ipversion == IPV4_VERSION) {
1907 			if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1908 				break;
1909 		} else {
1910 			if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1911 				break;
1912 			}
1913 		}
1914 	}
1915 
1916 	if (connp != NULL)
1917 		goto found;
1918 
1919 	mutex_exit(&connfp->connf_lock);
1920 	return (NULL);
1921 
1922 found:
1923 	ASSERT(connp != NULL);
1924 	CONN_INC_REF(connp);
1925 	mutex_exit(&connfp->connf_lock);
1926 	return (connp);
1927 }
1928 
1929 /* ARGSUSED */
1930 static int
1931 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1932 {
1933 	itc_t	*itc = (itc_t *)buf;
1934 	conn_t 	*connp = &itc->itc_conn;
1935 	tcp_t	*tcp = (tcp_t *)&itc[1];
1936 
1937 	bzero(connp, sizeof (conn_t));
1938 	bzero(tcp, sizeof (tcp_t));
1939 
1940 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1941 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1942 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1943 	tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1944 	if (tcp->tcp_timercache == NULL)
1945 		return (ENOMEM);
1946 	connp->conn_tcp = tcp;
1947 	connp->conn_flags = IPCL_TCPCONN;
1948 	connp->conn_proto = IPPROTO_TCP;
1949 	tcp->tcp_connp = connp;
1950 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1951 
1952 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1953 	if (connp->conn_ixa == NULL) {
1954 		tcp_timermp_free(tcp);
1955 		return (ENOMEM);
1956 	}
1957 	connp->conn_ixa->ixa_refcnt = 1;
1958 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
1959 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1960 	return (0);
1961 }
1962 
1963 /* ARGSUSED */
1964 static void
1965 tcp_conn_destructor(void *buf, void *cdrarg)
1966 {
1967 	itc_t	*itc = (itc_t *)buf;
1968 	conn_t 	*connp = &itc->itc_conn;
1969 	tcp_t	*tcp = (tcp_t *)&itc[1];
1970 
1971 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
1972 	ASSERT(tcp->tcp_connp == connp);
1973 	ASSERT(connp->conn_tcp == tcp);
1974 	tcp_timermp_free(tcp);
1975 	mutex_destroy(&connp->conn_lock);
1976 	cv_destroy(&connp->conn_cv);
1977 	cv_destroy(&connp->conn_sq_cv);
1978 	rw_destroy(&connp->conn_ilg_lock);
1979 
1980 	/* Can be NULL if constructor failed */
1981 	if (connp->conn_ixa != NULL) {
1982 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1983 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
1984 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
1985 		ixa_refrele(connp->conn_ixa);
1986 	}
1987 }
1988 
1989 /* ARGSUSED */
1990 static int
1991 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1992 {
1993 	itc_t	*itc = (itc_t *)buf;
1994 	conn_t 	*connp = &itc->itc_conn;
1995 
1996 	bzero(connp, sizeof (conn_t));
1997 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1998 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1999 	connp->conn_flags = IPCL_IPCCONN;
2000 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2001 
2002 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2003 	if (connp->conn_ixa == NULL)
2004 		return (ENOMEM);
2005 	connp->conn_ixa->ixa_refcnt = 1;
2006 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2007 	return (0);
2008 }
2009 
2010 /* ARGSUSED */
2011 static void
2012 ip_conn_destructor(void *buf, void *cdrarg)
2013 {
2014 	itc_t	*itc = (itc_t *)buf;
2015 	conn_t 	*connp = &itc->itc_conn;
2016 
2017 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
2018 	ASSERT(connp->conn_priv == NULL);
2019 	mutex_destroy(&connp->conn_lock);
2020 	cv_destroy(&connp->conn_cv);
2021 	rw_destroy(&connp->conn_ilg_lock);
2022 
2023 	/* Can be NULL if constructor failed */
2024 	if (connp->conn_ixa != NULL) {
2025 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2026 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2027 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2028 		ixa_refrele(connp->conn_ixa);
2029 	}
2030 }
2031 
2032 /* ARGSUSED */
2033 static int
2034 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2035 {
2036 	itc_t	*itc = (itc_t *)buf;
2037 	conn_t 	*connp = &itc->itc_conn;
2038 	udp_t	*udp = (udp_t *)&itc[1];
2039 
2040 	bzero(connp, sizeof (conn_t));
2041 	bzero(udp, sizeof (udp_t));
2042 
2043 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2044 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2045 	connp->conn_udp = udp;
2046 	connp->conn_flags = IPCL_UDPCONN;
2047 	connp->conn_proto = IPPROTO_UDP;
2048 	udp->udp_connp = connp;
2049 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2050 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2051 	if (connp->conn_ixa == NULL)
2052 		return (ENOMEM);
2053 	connp->conn_ixa->ixa_refcnt = 1;
2054 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2055 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2056 	return (0);
2057 }
2058 
2059 /* ARGSUSED */
2060 static void
2061 udp_conn_destructor(void *buf, void *cdrarg)
2062 {
2063 	itc_t	*itc = (itc_t *)buf;
2064 	conn_t 	*connp = &itc->itc_conn;
2065 	udp_t	*udp = (udp_t *)&itc[1];
2066 
2067 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2068 	ASSERT(udp->udp_connp == connp);
2069 	ASSERT(connp->conn_udp == udp);
2070 	mutex_destroy(&connp->conn_lock);
2071 	cv_destroy(&connp->conn_cv);
2072 	rw_destroy(&connp->conn_ilg_lock);
2073 
2074 	/* Can be NULL if constructor failed */
2075 	if (connp->conn_ixa != NULL) {
2076 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2077 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2078 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2079 		ixa_refrele(connp->conn_ixa);
2080 	}
2081 }
2082 
2083 /* ARGSUSED */
2084 static int
2085 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2086 {
2087 	itc_t	*itc = (itc_t *)buf;
2088 	conn_t 	*connp = &itc->itc_conn;
2089 	icmp_t	*icmp = (icmp_t *)&itc[1];
2090 
2091 	bzero(connp, sizeof (conn_t));
2092 	bzero(icmp, sizeof (icmp_t));
2093 
2094 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2095 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2096 	connp->conn_icmp = icmp;
2097 	connp->conn_flags = IPCL_RAWIPCONN;
2098 	connp->conn_proto = IPPROTO_ICMP;
2099 	icmp->icmp_connp = connp;
2100 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2101 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2102 	if (connp->conn_ixa == NULL)
2103 		return (ENOMEM);
2104 	connp->conn_ixa->ixa_refcnt = 1;
2105 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2106 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2107 	return (0);
2108 }
2109 
2110 /* ARGSUSED */
2111 static void
2112 rawip_conn_destructor(void *buf, void *cdrarg)
2113 {
2114 	itc_t	*itc = (itc_t *)buf;
2115 	conn_t 	*connp = &itc->itc_conn;
2116 	icmp_t	*icmp = (icmp_t *)&itc[1];
2117 
2118 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2119 	ASSERT(icmp->icmp_connp == connp);
2120 	ASSERT(connp->conn_icmp == icmp);
2121 	mutex_destroy(&connp->conn_lock);
2122 	cv_destroy(&connp->conn_cv);
2123 	rw_destroy(&connp->conn_ilg_lock);
2124 
2125 	/* Can be NULL if constructor failed */
2126 	if (connp->conn_ixa != NULL) {
2127 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2128 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2129 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2130 		ixa_refrele(connp->conn_ixa);
2131 	}
2132 }
2133 
2134 /* ARGSUSED */
2135 static int
2136 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2137 {
2138 	itc_t	*itc = (itc_t *)buf;
2139 	conn_t 	*connp = &itc->itc_conn;
2140 	rts_t	*rts = (rts_t *)&itc[1];
2141 
2142 	bzero(connp, sizeof (conn_t));
2143 	bzero(rts, sizeof (rts_t));
2144 
2145 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2146 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2147 	connp->conn_rts = rts;
2148 	connp->conn_flags = IPCL_RTSCONN;
2149 	rts->rts_connp = connp;
2150 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2151 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2152 	if (connp->conn_ixa == NULL)
2153 		return (ENOMEM);
2154 	connp->conn_ixa->ixa_refcnt = 1;
2155 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2156 	return (0);
2157 }
2158 
2159 /* ARGSUSED */
2160 static void
2161 rts_conn_destructor(void *buf, void *cdrarg)
2162 {
2163 	itc_t	*itc = (itc_t *)buf;
2164 	conn_t 	*connp = &itc->itc_conn;
2165 	rts_t	*rts = (rts_t *)&itc[1];
2166 
2167 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
2168 	ASSERT(rts->rts_connp == connp);
2169 	ASSERT(connp->conn_rts == rts);
2170 	mutex_destroy(&connp->conn_lock);
2171 	cv_destroy(&connp->conn_cv);
2172 	rw_destroy(&connp->conn_ilg_lock);
2173 
2174 	/* Can be NULL if constructor failed */
2175 	if (connp->conn_ixa != NULL) {
2176 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2177 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2178 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2179 		ixa_refrele(connp->conn_ixa);
2180 	}
2181 }
2182 
2183 /*
2184  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2185  * in the conn_t.
2186  *
2187  * Below we list all the pointers in the conn_t as a documentation aid.
2188  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2189  * If you add any pointers to the conn_t please add an ASSERT here
2190  * and #ifdef it out if it can't be actually asserted to be NULL.
2191  * In any case, we bzero most of the conn_t at the end of the function.
2192  */
2193 void
2194 ipcl_conn_cleanup(conn_t *connp)
2195 {
2196 	ip_xmit_attr_t	*ixa;
2197 
2198 	ASSERT(connp->conn_latch == NULL);
2199 	ASSERT(connp->conn_latch_in_policy == NULL);
2200 	ASSERT(connp->conn_latch_in_action == NULL);
2201 #ifdef notdef
2202 	ASSERT(connp->conn_rq == NULL);
2203 	ASSERT(connp->conn_wq == NULL);
2204 #endif
2205 	ASSERT(connp->conn_cred == NULL);
2206 	ASSERT(connp->conn_g_fanout == NULL);
2207 	ASSERT(connp->conn_g_next == NULL);
2208 	ASSERT(connp->conn_g_prev == NULL);
2209 	ASSERT(connp->conn_policy == NULL);
2210 	ASSERT(connp->conn_fanout == NULL);
2211 	ASSERT(connp->conn_next == NULL);
2212 	ASSERT(connp->conn_prev == NULL);
2213 	ASSERT(connp->conn_oper_pending_ill == NULL);
2214 	ASSERT(connp->conn_ilg == NULL);
2215 	ASSERT(connp->conn_drain_next == NULL);
2216 	ASSERT(connp->conn_drain_prev == NULL);
2217 #ifdef notdef
2218 	/* conn_idl is not cleared when removed from idl list */
2219 	ASSERT(connp->conn_idl == NULL);
2220 #endif
2221 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
2222 #ifdef notdef
2223 	/* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2224 	ASSERT(connp->conn_netstack == NULL);
2225 #endif
2226 
2227 	ASSERT(connp->conn_helper_info == NULL);
2228 	ASSERT(connp->conn_ixa != NULL);
2229 	ixa = connp->conn_ixa;
2230 	ASSERT(ixa->ixa_refcnt == 1);
2231 	/* Need to preserve ixa_protocol */
2232 	ixa_cleanup(ixa);
2233 	ixa->ixa_flags = 0;
2234 
2235 	/* Clear out the conn_t fields that are not preserved */
2236 	bzero(&connp->conn_start_clr,
2237 	    sizeof (conn_t) -
2238 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2239 }
2240 
2241 /*
2242  * All conns are inserted in a global multi-list for the benefit of
2243  * walkers. The walk is guaranteed to walk all open conns at the time
2244  * of the start of the walk exactly once. This property is needed to
2245  * achieve some cleanups during unplumb of interfaces. This is achieved
2246  * as follows.
2247  *
2248  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2249  * call the insert and delete functions below at creation and deletion
2250  * time respectively. The conn never moves or changes its position in this
2251  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2252  * won't increase due to walkers, once the conn deletion has started. Note
2253  * that we can't remove the conn from the global list and then wait for
2254  * the refcnt to drop to zero, since walkers would then see a truncated
2255  * list. CONN_INCIPIENT ensures that walkers don't start looking at
2256  * conns until ip_open is ready to make them globally visible.
2257  * The global round robin multi-list locks are held only to get the
2258  * next member/insertion/deletion and contention should be negligible
2259  * if the multi-list is much greater than the number of cpus.
2260  */
2261 void
2262 ipcl_globalhash_insert(conn_t *connp)
2263 {
2264 	int	index;
2265 	struct connf_s	*connfp;
2266 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
2267 
2268 	/*
2269 	 * No need for atomic here. Approximate even distribution
2270 	 * in the global lists is sufficient.
2271 	 */
2272 	ipst->ips_conn_g_index++;
2273 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2274 
2275 	connp->conn_g_prev = NULL;
2276 	/*
2277 	 * Mark as INCIPIENT, so that walkers will ignore this
2278 	 * for now, till ip_open is ready to make it visible globally.
2279 	 */
2280 	connp->conn_state_flags |= CONN_INCIPIENT;
2281 
2282 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2283 	/* Insert at the head of the list */
2284 	mutex_enter(&connfp->connf_lock);
2285 	connp->conn_g_next = connfp->connf_head;
2286 	if (connp->conn_g_next != NULL)
2287 		connp->conn_g_next->conn_g_prev = connp;
2288 	connfp->connf_head = connp;
2289 
2290 	/* The fanout bucket this conn points to */
2291 	connp->conn_g_fanout = connfp;
2292 
2293 	mutex_exit(&connfp->connf_lock);
2294 }
2295 
2296 void
2297 ipcl_globalhash_remove(conn_t *connp)
2298 {
2299 	struct connf_s	*connfp;
2300 
2301 	/*
2302 	 * We were never inserted in the global multi list.
2303 	 * IPCL_NONE variety is never inserted in the global multilist
2304 	 * since it is presumed to not need any cleanup and is transient.
2305 	 */
2306 	if (connp->conn_g_fanout == NULL)
2307 		return;
2308 
2309 	connfp = connp->conn_g_fanout;
2310 	mutex_enter(&connfp->connf_lock);
2311 	if (connp->conn_g_prev != NULL)
2312 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2313 	else
2314 		connfp->connf_head = connp->conn_g_next;
2315 	if (connp->conn_g_next != NULL)
2316 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2317 	mutex_exit(&connfp->connf_lock);
2318 
2319 	/* Better to stumble on a null pointer than to corrupt memory */
2320 	connp->conn_g_next = NULL;
2321 	connp->conn_g_prev = NULL;
2322 	connp->conn_g_fanout = NULL;
2323 }
2324 
2325 /*
2326  * Walk the list of all conn_t's in the system, calling the function provided
2327  * With the specified argument for each.
2328  * Applies to both IPv4 and IPv6.
2329  *
2330  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2331  * conn_oper_pending_ill). To guard against stale pointers
2332  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2333  * unplumbed or removed. New conn_t's that are created while we are walking
2334  * may be missed by this walk, because they are not necessarily inserted
2335  * at the tail of the list. They are new conn_t's and thus don't have any
2336  * stale pointers. The CONN_CLOSING flag ensures that no new reference
2337  * is created to the struct that is going away.
2338  */
2339 void
2340 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2341 {
2342 	int	i;
2343 	conn_t	*connp;
2344 	conn_t	*prev_connp;
2345 
2346 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2347 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2348 		prev_connp = NULL;
2349 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2350 		while (connp != NULL) {
2351 			mutex_enter(&connp->conn_lock);
2352 			if (connp->conn_state_flags &
2353 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
2354 				mutex_exit(&connp->conn_lock);
2355 				connp = connp->conn_g_next;
2356 				continue;
2357 			}
2358 			CONN_INC_REF_LOCKED(connp);
2359 			mutex_exit(&connp->conn_lock);
2360 			mutex_exit(
2361 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2362 			(*func)(connp, arg);
2363 			if (prev_connp != NULL)
2364 				CONN_DEC_REF(prev_connp);
2365 			mutex_enter(
2366 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2367 			prev_connp = connp;
2368 			connp = connp->conn_g_next;
2369 		}
2370 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2371 		if (prev_connp != NULL)
2372 			CONN_DEC_REF(prev_connp);
2373 	}
2374 }
2375 
2376 /*
2377  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2378  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2379  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2380  * (peer tcp in ESTABLISHED state).
2381  */
2382 conn_t *
2383 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2384     ip_stack_t *ipst)
2385 {
2386 	uint32_t ports;
2387 	uint16_t *pports = (uint16_t *)&ports;
2388 	connf_t	*connfp;
2389 	conn_t	*tconnp;
2390 	boolean_t zone_chk;
2391 
2392 	/*
2393 	 * If either the source of destination address is loopback, then
2394 	 * both endpoints must be in the same Zone.  Otherwise, both of
2395 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2396 	 * state) and the endpoints may reside in different Zones.
2397 	 */
2398 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2399 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2400 
2401 	pports[0] = tcpha->tha_fport;
2402 	pports[1] = tcpha->tha_lport;
2403 
2404 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2405 	    ports, ipst)];
2406 
2407 	mutex_enter(&connfp->connf_lock);
2408 	for (tconnp = connfp->connf_head; tconnp != NULL;
2409 	    tconnp = tconnp->conn_next) {
2410 
2411 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2412 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2413 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2414 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2415 
2416 			ASSERT(tconnp != connp);
2417 			CONN_INC_REF(tconnp);
2418 			mutex_exit(&connfp->connf_lock);
2419 			return (tconnp);
2420 		}
2421 	}
2422 	mutex_exit(&connfp->connf_lock);
2423 	return (NULL);
2424 }
2425 
2426 /*
2427  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2428  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2429  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2430  * (peer tcp in ESTABLISHED state).
2431  */
2432 conn_t *
2433 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2434     ip_stack_t *ipst)
2435 {
2436 	uint32_t ports;
2437 	uint16_t *pports = (uint16_t *)&ports;
2438 	connf_t	*connfp;
2439 	conn_t	*tconnp;
2440 	boolean_t zone_chk;
2441 
2442 	/*
2443 	 * If either the source of destination address is loopback, then
2444 	 * both endpoints must be in the same Zone.  Otherwise, both of
2445 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
2446 	 * state) and the endpoints may reside in different Zones.  We
2447 	 * don't do Zone check for link local address(es) because the
2448 	 * current Zone implementation treats each link local address as
2449 	 * being unique per system node, i.e. they belong to global Zone.
2450 	 */
2451 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2452 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2453 
2454 	pports[0] = tcpha->tha_fport;
2455 	pports[1] = tcpha->tha_lport;
2456 
2457 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2458 	    ports, ipst)];
2459 
2460 	mutex_enter(&connfp->connf_lock);
2461 	for (tconnp = connfp->connf_head; tconnp != NULL;
2462 	    tconnp = tconnp->conn_next) {
2463 
2464 		/* We skip conn_bound_if check here as this is loopback tcp */
2465 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2466 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2467 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2468 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2469 
2470 			ASSERT(tconnp != connp);
2471 			CONN_INC_REF(tconnp);
2472 			mutex_exit(&connfp->connf_lock);
2473 			return (tconnp);
2474 		}
2475 	}
2476 	mutex_exit(&connfp->connf_lock);
2477 	return (NULL);
2478 }
2479 
2480 /*
2481  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2482  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2483  * Only checks for connected entries i.e. no INADDR_ANY checks.
2484  */
2485 conn_t *
2486 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2487     ip_stack_t *ipst)
2488 {
2489 	uint32_t ports;
2490 	uint16_t *pports;
2491 	connf_t	*connfp;
2492 	conn_t	*tconnp;
2493 
2494 	pports = (uint16_t *)&ports;
2495 	pports[0] = tcpha->tha_fport;
2496 	pports[1] = tcpha->tha_lport;
2497 
2498 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2499 	    ports, ipst)];
2500 
2501 	mutex_enter(&connfp->connf_lock);
2502 	for (tconnp = connfp->connf_head; tconnp != NULL;
2503 	    tconnp = tconnp->conn_next) {
2504 
2505 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2506 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2507 		    tconnp->conn_tcp->tcp_state >= min_state) {
2508 
2509 			CONN_INC_REF(tconnp);
2510 			mutex_exit(&connfp->connf_lock);
2511 			return (tconnp);
2512 		}
2513 	}
2514 	mutex_exit(&connfp->connf_lock);
2515 	return (NULL);
2516 }
2517 
2518 /*
2519  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2520  * Returns with conn reference held. Caller must call CONN_DEC_REF.
2521  * Only checks for connected entries i.e. no INADDR_ANY checks.
2522  * Match on ifindex in addition to addresses.
2523  */
2524 conn_t *
2525 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2526     uint_t ifindex, ip_stack_t *ipst)
2527 {
2528 	tcp_t	*tcp;
2529 	uint32_t ports;
2530 	uint16_t *pports;
2531 	connf_t	*connfp;
2532 	conn_t	*tconnp;
2533 
2534 	pports = (uint16_t *)&ports;
2535 	pports[0] = tcpha->tha_fport;
2536 	pports[1] = tcpha->tha_lport;
2537 
2538 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2539 	    ports, ipst)];
2540 
2541 	mutex_enter(&connfp->connf_lock);
2542 	for (tconnp = connfp->connf_head; tconnp != NULL;
2543 	    tconnp = tconnp->conn_next) {
2544 
2545 		tcp = tconnp->conn_tcp;
2546 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2547 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2548 		    tcp->tcp_state >= min_state &&
2549 		    (tconnp->conn_bound_if == 0 ||
2550 		    tconnp->conn_bound_if == ifindex)) {
2551 
2552 			CONN_INC_REF(tconnp);
2553 			mutex_exit(&connfp->connf_lock);
2554 			return (tconnp);
2555 		}
2556 	}
2557 	mutex_exit(&connfp->connf_lock);
2558 	return (NULL);
2559 }
2560 
2561 /*
2562  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2563  * a listener when changing state.
2564  */
2565 conn_t *
2566 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2567     ip_stack_t *ipst)
2568 {
2569 	connf_t		*bind_connfp;
2570 	conn_t		*connp;
2571 	tcp_t		*tcp;
2572 
2573 	/*
2574 	 * Avoid false matches for packets sent to an IP destination of
2575 	 * all zeros.
2576 	 */
2577 	if (laddr == 0)
2578 		return (NULL);
2579 
2580 	ASSERT(zoneid != ALL_ZONES);
2581 
2582 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2583 	mutex_enter(&bind_connfp->connf_lock);
2584 	for (connp = bind_connfp->connf_head; connp != NULL;
2585 	    connp = connp->conn_next) {
2586 		tcp = connp->conn_tcp;
2587 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2588 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2589 		    (tcp->tcp_listener == NULL)) {
2590 			CONN_INC_REF(connp);
2591 			mutex_exit(&bind_connfp->connf_lock);
2592 			return (connp);
2593 		}
2594 	}
2595 	mutex_exit(&bind_connfp->connf_lock);
2596 	return (NULL);
2597 }
2598 
2599 /*
2600  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2601  * a listener when changing state.
2602  */
2603 conn_t *
2604 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2605     zoneid_t zoneid, ip_stack_t *ipst)
2606 {
2607 	connf_t		*bind_connfp;
2608 	conn_t		*connp = NULL;
2609 	tcp_t		*tcp;
2610 
2611 	/*
2612 	 * Avoid false matches for packets sent to an IP destination of
2613 	 * all zeros.
2614 	 */
2615 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2616 		return (NULL);
2617 
2618 	ASSERT(zoneid != ALL_ZONES);
2619 
2620 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2621 	mutex_enter(&bind_connfp->connf_lock);
2622 	for (connp = bind_connfp->connf_head; connp != NULL;
2623 	    connp = connp->conn_next) {
2624 		tcp = connp->conn_tcp;
2625 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2626 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2627 		    (connp->conn_bound_if == 0 ||
2628 		    connp->conn_bound_if == ifindex) &&
2629 		    tcp->tcp_listener == NULL) {
2630 			CONN_INC_REF(connp);
2631 			mutex_exit(&bind_connfp->connf_lock);
2632 			return (connp);
2633 		}
2634 	}
2635 	mutex_exit(&bind_connfp->connf_lock);
2636 	return (NULL);
2637 }
2638 
2639 /*
2640  * ipcl_get_next_conn
2641  *	get the next entry in the conn global list
2642  *	and put a reference on the next_conn.
2643  *	decrement the reference on the current conn.
2644  *
2645  * This is an iterator based walker function that also provides for
2646  * some selection by the caller. It walks through the conn_hash bucket
2647  * searching for the next valid connp in the list, and selects connections
2648  * that are neither closed nor condemned. It also REFHOLDS the conn
2649  * thus ensuring that the conn exists when the caller uses the conn.
2650  */
2651 conn_t *
2652 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2653 {
2654 	conn_t	*next_connp;
2655 
2656 	if (connfp == NULL)
2657 		return (NULL);
2658 
2659 	mutex_enter(&connfp->connf_lock);
2660 
2661 	next_connp = (connp == NULL) ?
2662 	    connfp->connf_head : connp->conn_g_next;
2663 
2664 	while (next_connp != NULL) {
2665 		mutex_enter(&next_connp->conn_lock);
2666 		if (!(next_connp->conn_flags & conn_flags) ||
2667 		    (next_connp->conn_state_flags &
2668 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2669 			/*
2670 			 * This conn has been condemned or
2671 			 * is closing, or the flags don't match
2672 			 */
2673 			mutex_exit(&next_connp->conn_lock);
2674 			next_connp = next_connp->conn_g_next;
2675 			continue;
2676 		}
2677 		CONN_INC_REF_LOCKED(next_connp);
2678 		mutex_exit(&next_connp->conn_lock);
2679 		break;
2680 	}
2681 
2682 	mutex_exit(&connfp->connf_lock);
2683 
2684 	if (connp != NULL)
2685 		CONN_DEC_REF(connp);
2686 
2687 	return (next_connp);
2688 }
2689 
2690 #ifdef CONN_DEBUG
2691 /*
2692  * Trace of the last NBUF refhold/refrele
2693  */
2694 int
2695 conn_trace_ref(conn_t *connp)
2696 {
2697 	int	last;
2698 	conn_trace_t	*ctb;
2699 
2700 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2701 	last = connp->conn_trace_last;
2702 	last++;
2703 	if (last == CONN_TRACE_MAX)
2704 		last = 0;
2705 
2706 	ctb = &connp->conn_trace_buf[last];
2707 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2708 	connp->conn_trace_last = last;
2709 	return (1);
2710 }
2711 
2712 int
2713 conn_untrace_ref(conn_t *connp)
2714 {
2715 	int	last;
2716 	conn_trace_t	*ctb;
2717 
2718 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2719 	last = connp->conn_trace_last;
2720 	last++;
2721 	if (last == CONN_TRACE_MAX)
2722 		last = 0;
2723 
2724 	ctb = &connp->conn_trace_buf[last];
2725 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2726 	connp->conn_trace_last = last;
2727 	return (1);
2728 }
2729 #endif
2730