1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IP PACKET CLASSIFIER 28 * 29 * The IP packet classifier provides mapping between IP packets and persistent 30 * connection state for connection-oriented protocols. It also provides 31 * interface for managing connection states. 32 * 33 * The connection state is kept in conn_t data structure and contains, among 34 * other things: 35 * 36 * o local/remote address and ports 37 * o Transport protocol 38 * o squeue for the connection (for TCP only) 39 * o reference counter 40 * o Connection state 41 * o hash table linkage 42 * o interface/ire information 43 * o credentials 44 * o ipsec policy 45 * o send and receive functions. 46 * o mutex lock. 47 * 48 * Connections use a reference counting scheme. They are freed when the 49 * reference counter drops to zero. A reference is incremented when connection 50 * is placed in a list or table, when incoming packet for the connection arrives 51 * and when connection is processed via squeue (squeue processing may be 52 * asynchronous and the reference protects the connection from being destroyed 53 * before its processing is finished). 54 * 55 * send and receive functions are currently used for TCP only. The send function 56 * determines the IP entry point for the packet once it leaves TCP to be sent to 57 * the destination address. The receive function is used by IP when the packet 58 * should be passed for TCP processing. When a new connection is created these 59 * are set to ip_output() and tcp_input() respectively. During the lifetime of 60 * the connection the send and receive functions may change depending on the 61 * changes in the connection state. For example, Once the connection is bound to 62 * an addresse, the receive function for this connection is set to 63 * tcp_conn_request(). This allows incoming SYNs to go directly into the 64 * listener SYN processing function without going to tcp_input() first. 65 * 66 * Classifier uses several hash tables: 67 * 68 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 69 * ipcl_bind_fanout: contains all connections in BOUND state 70 * ipcl_proto_fanout: IPv4 protocol fanout 71 * ipcl_proto_fanout_v6: IPv6 protocol fanout 72 * ipcl_udp_fanout: contains all UDP connections 73 * ipcl_iptun_fanout: contains all IP tunnel connections 74 * ipcl_globalhash_fanout: contains all connections 75 * 76 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 77 * which need to view all existing connections. 78 * 79 * All tables are protected by per-bucket locks. When both per-bucket lock and 80 * connection lock need to be held, the per-bucket lock should be acquired 81 * first, followed by the connection lock. 82 * 83 * All functions doing search in one of these tables increment a reference 84 * counter on the connection found (if any). This reference should be dropped 85 * when the caller has finished processing the connection. 86 * 87 * 88 * INTERFACES: 89 * =========== 90 * 91 * Connection Lookup: 92 * ------------------ 93 * 94 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 95 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 96 * 97 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 98 * it can't find any associated connection. If the connection is found, its 99 * reference counter is incremented. 100 * 101 * mp: mblock, containing packet header. The full header should fit 102 * into a single mblock. It should also contain at least full IP 103 * and TCP or UDP header. 104 * 105 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 106 * 107 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 108 * the packet. 109 * 110 * zoneid: The zone in which the returned connection must be; the zoneid 111 * corresponding to the ire_zoneid on the IRE located for the 112 * packet's destination address. 113 * 114 * For TCP connections, the lookup order is as follows: 115 * 5-tuple {src, dst, protocol, local port, remote port} 116 * lookup in ipcl_conn_fanout table. 117 * 3-tuple {dst, remote port, protocol} lookup in 118 * ipcl_bind_fanout table. 119 * 120 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 121 * remote port} lookup is done on ipcl_udp_fanout. Note that, 122 * these interfaces do not handle cases where a packets belongs 123 * to multiple UDP clients, which is handled in IP itself. 124 * 125 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 126 * determine which actual zone gets the segment. This is used only in a 127 * labeled environment. The matching rules are: 128 * 129 * - If it's not a multilevel port, then the label on the packet selects 130 * the zone. Unlabeled packets are delivered to the global zone. 131 * 132 * - If it's a multilevel port, then only the zone registered to receive 133 * packets on that port matches. 134 * 135 * Also, in a labeled environment, packet labels need to be checked. For fully 136 * bound TCP connections, we can assume that the packet label was checked 137 * during connection establishment, and doesn't need to be checked on each 138 * packet. For others, though, we need to check for strict equality or, for 139 * multilevel ports, membership in the range or set. This part currently does 140 * a tnrh lookup on each packet, but could be optimized to use cached results 141 * if that were necessary. (SCTP doesn't come through here, but if it did, 142 * we would apply the same rules as TCP.) 143 * 144 * An implication of the above is that fully-bound TCP sockets must always use 145 * distinct 4-tuples; they can't be discriminated by label alone. 146 * 147 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 148 * as there's no connection set-up handshake and no shared state. 149 * 150 * Labels on looped-back packets within a single zone do not need to be 151 * checked, as all processes in the same zone have the same label. 152 * 153 * Finally, for unlabeled packets received by a labeled system, special rules 154 * apply. We consider only the MLP if there is one. Otherwise, we prefer a 155 * socket in the zone whose label matches the default label of the sender, if 156 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 157 * receiver's label must dominate the sender's default label. 158 * 159 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 160 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 161 * ip_stack); 162 * 163 * Lookup routine to find a exact match for {src, dst, local port, 164 * remote port) for TCP connections in ipcl_conn_fanout. The address and 165 * ports are read from the IP and TCP header respectively. 166 * 167 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 168 * zoneid, ip_stack); 169 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 170 * zoneid, ip_stack); 171 * 172 * Lookup routine to find a listener with the tuple {lport, laddr, 173 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 174 * parameter interface index is also compared. 175 * 176 * void ipcl_walk(func, arg, ip_stack) 177 * 178 * Apply 'func' to every connection available. The 'func' is called as 179 * (*func)(connp, arg). The walk is non-atomic so connections may be 180 * created and destroyed during the walk. The CONN_CONDEMNED and 181 * CONN_INCIPIENT flags ensure that connections which are newly created 182 * or being destroyed are not selected by the walker. 183 * 184 * Table Updates 185 * ------------- 186 * 187 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 188 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 189 * 190 * Insert 'connp' in the ipcl_conn_fanout. 191 * Arguements : 192 * connp conn_t to be inserted 193 * protocol connection protocol 194 * src source address 195 * dst destination address 196 * ports local and remote port 197 * ifindex interface index for IPv6 connections 198 * 199 * Return value : 200 * 0 if connp was inserted 201 * EADDRINUSE if the connection with the same tuple 202 * already exists. 203 * 204 * int ipcl_bind_insert(connp, protocol, src, lport); 205 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 206 * 207 * Insert 'connp' in ipcl_bind_fanout. 208 * Arguements : 209 * connp conn_t to be inserted 210 * protocol connection protocol 211 * src source address connection wants 212 * to bind to 213 * lport local port connection wants to 214 * bind to 215 * 216 * 217 * void ipcl_hash_remove(connp); 218 * 219 * Removes the 'connp' from the connection fanout table. 220 * 221 * Connection Creation/Destruction 222 * ------------------------------- 223 * 224 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 225 * 226 * Creates a new conn based on the type flag, inserts it into 227 * globalhash table. 228 * 229 * type: This flag determines the type of conn_t which needs to be 230 * created i.e., which kmem_cache it comes from. 231 * IPCL_TCPCONN indicates a TCP connection 232 * IPCL_SCTPCONN indicates a SCTP connection 233 * IPCL_UDPCONN indicates a UDP conn_t. 234 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 235 * IPCL_RTSCONN indicates a RTS conn_t. 236 * IPCL_IPCCONN indicates all other connections. 237 * 238 * void ipcl_conn_destroy(connp) 239 * 240 * Destroys the connection state, removes it from the global 241 * connection hash table and frees its memory. 242 */ 243 244 #include <sys/types.h> 245 #include <sys/stream.h> 246 #include <sys/stropts.h> 247 #include <sys/sysmacros.h> 248 #include <sys/strsubr.h> 249 #include <sys/strsun.h> 250 #define _SUN_TPI_VERSION 2 251 #include <sys/ddi.h> 252 #include <sys/cmn_err.h> 253 #include <sys/debug.h> 254 255 #include <sys/systm.h> 256 #include <sys/param.h> 257 #include <sys/kmem.h> 258 #include <sys/isa_defs.h> 259 #include <inet/common.h> 260 #include <netinet/ip6.h> 261 #include <netinet/icmp6.h> 262 263 #include <inet/ip.h> 264 #include <inet/ip6.h> 265 #include <inet/ip_ndp.h> 266 #include <inet/ip_impl.h> 267 #include <inet/udp_impl.h> 268 #include <inet/sctp_ip.h> 269 #include <inet/sctp/sctp_impl.h> 270 #include <inet/rawip_impl.h> 271 #include <inet/rts_impl.h> 272 #include <inet/iptun/iptun_impl.h> 273 274 #include <sys/cpuvar.h> 275 276 #include <inet/ipclassifier.h> 277 #include <inet/tcp.h> 278 #include <inet/ipsec_impl.h> 279 280 #include <sys/tsol/tnet.h> 281 #include <sys/sockio.h> 282 283 #ifdef DEBUG 284 #define IPCL_DEBUG 285 #else 286 #undef IPCL_DEBUG 287 #endif 288 289 #ifdef IPCL_DEBUG 290 int ipcl_debug_level = 0; 291 #define IPCL_DEBUG_LVL(level, args) \ 292 if (ipcl_debug_level & level) { printf args; } 293 #else 294 #define IPCL_DEBUG_LVL(level, args) {; } 295 #endif 296 /* Old value for compatibility. Setable in /etc/system */ 297 uint_t tcp_conn_hash_size = 0; 298 299 /* New value. Zero means choose automatically. Setable in /etc/system */ 300 uint_t ipcl_conn_hash_size = 0; 301 uint_t ipcl_conn_hash_memfactor = 8192; 302 uint_t ipcl_conn_hash_maxsize = 82500; 303 304 /* bind/udp fanout table size */ 305 uint_t ipcl_bind_fanout_size = 512; 306 uint_t ipcl_udp_fanout_size = 16384; 307 308 /* Raw socket fanout size. Must be a power of 2. */ 309 uint_t ipcl_raw_fanout_size = 256; 310 311 /* 312 * The IPCL_IPTUN_HASH() function works best with a prime table size. We 313 * expect that most large deployments would have hundreds of tunnels, and 314 * thousands in the extreme case. 315 */ 316 uint_t ipcl_iptun_fanout_size = 6143; 317 318 /* 319 * Power of 2^N Primes useful for hashing for N of 0-28, 320 * these primes are the nearest prime <= 2^N - 2^(N-2). 321 */ 322 323 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 324 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 325 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 326 50331599, 100663291, 201326557, 0} 327 328 /* 329 * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 330 * are aligned on cache lines. 331 */ 332 typedef union itc_s { 333 conn_t itc_conn; 334 char itcu_filler[CACHE_ALIGN(conn_s)]; 335 } itc_t; 336 337 struct kmem_cache *tcp_conn_cache; 338 struct kmem_cache *ip_conn_cache; 339 struct kmem_cache *ip_helper_stream_cache; 340 extern struct kmem_cache *sctp_conn_cache; 341 extern struct kmem_cache *tcp_sack_info_cache; 342 extern struct kmem_cache *tcp_iphc_cache; 343 struct kmem_cache *udp_conn_cache; 344 struct kmem_cache *rawip_conn_cache; 345 struct kmem_cache *rts_conn_cache; 346 347 extern void tcp_timermp_free(tcp_t *); 348 extern mblk_t *tcp_timermp_alloc(int); 349 350 static int ip_conn_constructor(void *, void *, int); 351 static void ip_conn_destructor(void *, void *); 352 353 static int tcp_conn_constructor(void *, void *, int); 354 static void tcp_conn_destructor(void *, void *); 355 356 static int udp_conn_constructor(void *, void *, int); 357 static void udp_conn_destructor(void *, void *); 358 359 static int rawip_conn_constructor(void *, void *, int); 360 static void rawip_conn_destructor(void *, void *); 361 362 static int rts_conn_constructor(void *, void *, int); 363 static void rts_conn_destructor(void *, void *); 364 365 static int ip_helper_stream_constructor(void *, void *, int); 366 static void ip_helper_stream_destructor(void *, void *); 367 368 boolean_t ip_use_helper_cache = B_TRUE; 369 370 /* 371 * Hook functions to enable cluster networking 372 * On non-clustered systems these vectors must always be NULL. 373 */ 374 extern void (*cl_inet_listen)(netstackid_t, uint8_t, sa_family_t, 375 uint8_t *, in_port_t, void *); 376 extern void (*cl_inet_unlisten)(netstackid_t, uint8_t, sa_family_t, 377 uint8_t *, in_port_t, void *); 378 379 #ifdef IPCL_DEBUG 380 #define INET_NTOA_BUFSIZE 18 381 382 static char * 383 inet_ntoa_r(uint32_t in, char *b) 384 { 385 unsigned char *p; 386 387 p = (unsigned char *)∈ 388 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 389 return (b); 390 } 391 #endif 392 393 /* 394 * Global (for all stack instances) init routine 395 */ 396 void 397 ipcl_g_init(void) 398 { 399 ip_conn_cache = kmem_cache_create("ip_conn_cache", 400 sizeof (conn_t), CACHE_ALIGN_SIZE, 401 ip_conn_constructor, ip_conn_destructor, 402 NULL, NULL, NULL, 0); 403 404 tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 405 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 406 tcp_conn_constructor, tcp_conn_destructor, 407 NULL, NULL, NULL, 0); 408 409 udp_conn_cache = kmem_cache_create("udp_conn_cache", 410 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 411 udp_conn_constructor, udp_conn_destructor, 412 NULL, NULL, NULL, 0); 413 414 rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 415 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 416 rawip_conn_constructor, rawip_conn_destructor, 417 NULL, NULL, NULL, 0); 418 419 rts_conn_cache = kmem_cache_create("rts_conn_cache", 420 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 421 rts_conn_constructor, rts_conn_destructor, 422 NULL, NULL, NULL, 0); 423 424 if (ip_use_helper_cache) { 425 ip_helper_stream_cache = kmem_cache_create 426 ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t), 427 CACHE_ALIGN_SIZE, ip_helper_stream_constructor, 428 ip_helper_stream_destructor, NULL, NULL, NULL, 0); 429 } else { 430 ip_helper_stream_cache = NULL; 431 } 432 } 433 434 /* 435 * ipclassifier intialization routine, sets up hash tables. 436 */ 437 void 438 ipcl_init(ip_stack_t *ipst) 439 { 440 int i; 441 int sizes[] = P2Ps(); 442 443 /* 444 * Calculate size of conn fanout table from /etc/system settings 445 */ 446 if (ipcl_conn_hash_size != 0) { 447 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 448 } else if (tcp_conn_hash_size != 0) { 449 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 450 } else { 451 extern pgcnt_t freemem; 452 453 ipst->ips_ipcl_conn_fanout_size = 454 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 455 456 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 457 ipst->ips_ipcl_conn_fanout_size = 458 ipcl_conn_hash_maxsize; 459 } 460 } 461 462 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 463 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 464 break; 465 } 466 } 467 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 468 /* Out of range, use the 2^16 value */ 469 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 470 } 471 472 /* Take values from /etc/system */ 473 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 474 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 475 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 476 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 477 478 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 479 480 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 481 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 482 483 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 484 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 485 MUTEX_DEFAULT, NULL); 486 } 487 488 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 489 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 490 491 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 492 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 493 MUTEX_DEFAULT, NULL); 494 } 495 496 ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 497 sizeof (connf_t), KM_SLEEP); 498 for (i = 0; i < IPPROTO_MAX; i++) { 499 mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 500 MUTEX_DEFAULT, NULL); 501 } 502 503 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 504 sizeof (connf_t), KM_SLEEP); 505 for (i = 0; i < IPPROTO_MAX; i++) { 506 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 507 MUTEX_DEFAULT, NULL); 508 } 509 510 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 511 mutex_init(&ipst->ips_rts_clients->connf_lock, 512 NULL, MUTEX_DEFAULT, NULL); 513 514 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 515 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 516 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 517 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 518 MUTEX_DEFAULT, NULL); 519 } 520 521 ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 522 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 523 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 524 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 525 MUTEX_DEFAULT, NULL); 526 } 527 528 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 529 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 530 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 531 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 532 MUTEX_DEFAULT, NULL); 533 } 534 535 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 536 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 537 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 538 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 539 NULL, MUTEX_DEFAULT, NULL); 540 } 541 } 542 543 void 544 ipcl_g_destroy(void) 545 { 546 kmem_cache_destroy(ip_conn_cache); 547 kmem_cache_destroy(tcp_conn_cache); 548 kmem_cache_destroy(udp_conn_cache); 549 kmem_cache_destroy(rawip_conn_cache); 550 kmem_cache_destroy(rts_conn_cache); 551 } 552 553 /* 554 * All user-level and kernel use of the stack must be gone 555 * by now. 556 */ 557 void 558 ipcl_destroy(ip_stack_t *ipst) 559 { 560 int i; 561 562 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 563 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 564 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 565 } 566 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 567 sizeof (connf_t)); 568 ipst->ips_ipcl_conn_fanout = NULL; 569 570 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 571 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 572 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 573 } 574 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 575 sizeof (connf_t)); 576 ipst->ips_ipcl_bind_fanout = NULL; 577 578 for (i = 0; i < IPPROTO_MAX; i++) { 579 ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 580 mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 581 } 582 kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 583 ipst->ips_ipcl_proto_fanout = NULL; 584 585 for (i = 0; i < IPPROTO_MAX; i++) { 586 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 587 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 588 } 589 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 590 IPPROTO_MAX * sizeof (connf_t)); 591 ipst->ips_ipcl_proto_fanout_v6 = NULL; 592 593 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 594 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 595 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 596 } 597 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 598 sizeof (connf_t)); 599 ipst->ips_ipcl_udp_fanout = NULL; 600 601 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 602 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 603 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 604 } 605 kmem_free(ipst->ips_ipcl_iptun_fanout, 606 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 607 ipst->ips_ipcl_iptun_fanout = NULL; 608 609 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 610 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 611 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 612 } 613 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 614 sizeof (connf_t)); 615 ipst->ips_ipcl_raw_fanout = NULL; 616 617 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 618 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 619 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 620 } 621 kmem_free(ipst->ips_ipcl_globalhash_fanout, 622 sizeof (connf_t) * CONN_G_HASH_SIZE); 623 ipst->ips_ipcl_globalhash_fanout = NULL; 624 625 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 626 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 627 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 628 ipst->ips_rts_clients = NULL; 629 } 630 631 /* 632 * conn creation routine. initialize the conn, sets the reference 633 * and inserts it in the global hash table. 634 */ 635 conn_t * 636 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 637 { 638 conn_t *connp; 639 sctp_stack_t *sctps; 640 struct kmem_cache *conn_cache; 641 642 switch (type) { 643 case IPCL_SCTPCONN: 644 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 645 return (NULL); 646 sctp_conn_init(connp); 647 sctps = ns->netstack_sctp; 648 SCTP_G_Q_REFHOLD(sctps); 649 netstack_hold(ns); 650 connp->conn_netstack = ns; 651 return (connp); 652 653 case IPCL_TCPCONN: 654 conn_cache = tcp_conn_cache; 655 break; 656 657 case IPCL_UDPCONN: 658 conn_cache = udp_conn_cache; 659 break; 660 661 case IPCL_RAWIPCONN: 662 conn_cache = rawip_conn_cache; 663 break; 664 665 case IPCL_RTSCONN: 666 conn_cache = rts_conn_cache; 667 break; 668 669 case IPCL_IPCCONN: 670 conn_cache = ip_conn_cache; 671 break; 672 673 default: 674 connp = NULL; 675 ASSERT(0); 676 } 677 678 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 679 return (NULL); 680 681 connp->conn_ref = 1; 682 netstack_hold(ns); 683 connp->conn_netstack = ns; 684 ipcl_globalhash_insert(connp); 685 return (connp); 686 } 687 688 void 689 ipcl_conn_destroy(conn_t *connp) 690 { 691 mblk_t *mp; 692 netstack_t *ns = connp->conn_netstack; 693 694 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 695 ASSERT(connp->conn_ref == 0); 696 ASSERT(connp->conn_ire_cache == NULL); 697 698 DTRACE_PROBE1(conn__destroy, conn_t *, connp); 699 700 if (connp->conn_effective_cred != NULL) { 701 crfree(connp->conn_effective_cred); 702 connp->conn_effective_cred = NULL; 703 } 704 705 if (connp->conn_cred != NULL) { 706 crfree(connp->conn_cred); 707 connp->conn_cred = NULL; 708 } 709 710 ipcl_globalhash_remove(connp); 711 712 /* FIXME: add separate tcp_conn_free()? */ 713 if (connp->conn_flags & IPCL_TCPCONN) { 714 tcp_t *tcp = connp->conn_tcp; 715 tcp_stack_t *tcps; 716 717 ASSERT(tcp != NULL); 718 tcps = tcp->tcp_tcps; 719 if (tcps != NULL) { 720 if (connp->conn_latch != NULL) { 721 IPLATCH_REFRELE(connp->conn_latch, ns); 722 connp->conn_latch = NULL; 723 } 724 if (connp->conn_policy != NULL) { 725 IPPH_REFRELE(connp->conn_policy, ns); 726 connp->conn_policy = NULL; 727 } 728 tcp->tcp_tcps = NULL; 729 TCPS_REFRELE(tcps); 730 } 731 732 tcp_free(tcp); 733 mp = tcp->tcp_timercache; 734 tcp->tcp_cred = NULL; 735 736 if (tcp->tcp_sack_info != NULL) { 737 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 738 kmem_cache_free(tcp_sack_info_cache, 739 tcp->tcp_sack_info); 740 } 741 if (tcp->tcp_iphc != NULL) { 742 if (tcp->tcp_hdr_grown) { 743 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 744 } else { 745 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 746 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 747 } 748 tcp->tcp_iphc_len = 0; 749 } 750 ASSERT(tcp->tcp_iphc_len == 0); 751 752 /* 753 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 754 * the mblk. 755 */ 756 if (tcp->tcp_rsrv_mp != NULL) { 757 freeb(tcp->tcp_rsrv_mp); 758 tcp->tcp_rsrv_mp = NULL; 759 mutex_destroy(&tcp->tcp_rsrv_mp_lock); 760 } 761 762 ASSERT(connp->conn_latch == NULL); 763 ASSERT(connp->conn_policy == NULL); 764 765 if (ns != NULL) { 766 ASSERT(tcp->tcp_tcps == NULL); 767 connp->conn_netstack = NULL; 768 netstack_rele(ns); 769 } 770 771 ipcl_conn_cleanup(connp); 772 connp->conn_flags = IPCL_TCPCONN; 773 bzero(tcp, sizeof (tcp_t)); 774 775 tcp->tcp_timercache = mp; 776 tcp->tcp_connp = connp; 777 kmem_cache_free(tcp_conn_cache, connp); 778 return; 779 } 780 if (connp->conn_latch != NULL) { 781 IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 782 connp->conn_latch = NULL; 783 } 784 if (connp->conn_policy != NULL) { 785 IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 786 connp->conn_policy = NULL; 787 } 788 if (connp->conn_ipsec_opt_mp != NULL) { 789 freemsg(connp->conn_ipsec_opt_mp); 790 connp->conn_ipsec_opt_mp = NULL; 791 } 792 793 if (connp->conn_flags & IPCL_SCTPCONN) { 794 ASSERT(ns != NULL); 795 sctp_free(connp); 796 return; 797 } 798 799 if (ns != NULL) { 800 connp->conn_netstack = NULL; 801 netstack_rele(ns); 802 } 803 804 ipcl_conn_cleanup(connp); 805 806 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 807 if (connp->conn_flags & IPCL_UDPCONN) { 808 connp->conn_flags = IPCL_UDPCONN; 809 kmem_cache_free(udp_conn_cache, connp); 810 } else if (connp->conn_flags & IPCL_RAWIPCONN) { 811 812 connp->conn_flags = IPCL_RAWIPCONN; 813 connp->conn_ulp = IPPROTO_ICMP; 814 kmem_cache_free(rawip_conn_cache, connp); 815 } else if (connp->conn_flags & IPCL_RTSCONN) { 816 connp->conn_flags = IPCL_RTSCONN; 817 kmem_cache_free(rts_conn_cache, connp); 818 } else { 819 connp->conn_flags = IPCL_IPCCONN; 820 ASSERT(connp->conn_flags & IPCL_IPCCONN); 821 ASSERT(connp->conn_priv == NULL); 822 kmem_cache_free(ip_conn_cache, connp); 823 } 824 } 825 826 /* 827 * Running in cluster mode - deregister listener information 828 */ 829 830 static void 831 ipcl_conn_unlisten(conn_t *connp) 832 { 833 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 834 ASSERT(connp->conn_lport != 0); 835 836 if (cl_inet_unlisten != NULL) { 837 sa_family_t addr_family; 838 uint8_t *laddrp; 839 840 if (connp->conn_pkt_isv6) { 841 addr_family = AF_INET6; 842 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 843 } else { 844 addr_family = AF_INET; 845 laddrp = (uint8_t *)&connp->conn_bound_source; 846 } 847 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 848 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 849 } 850 connp->conn_flags &= ~IPCL_CL_LISTENER; 851 } 852 853 /* 854 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 855 * which table the conn belonged to). So for debugging we can see which hash 856 * table this connection was in. 857 */ 858 #define IPCL_HASH_REMOVE(connp) { \ 859 connf_t *connfp = (connp)->conn_fanout; \ 860 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 861 if (connfp != NULL) { \ 862 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 863 (void *)(connp))); \ 864 mutex_enter(&connfp->connf_lock); \ 865 if ((connp)->conn_next != NULL) \ 866 (connp)->conn_next->conn_prev = \ 867 (connp)->conn_prev; \ 868 if ((connp)->conn_prev != NULL) \ 869 (connp)->conn_prev->conn_next = \ 870 (connp)->conn_next; \ 871 else \ 872 connfp->connf_head = (connp)->conn_next; \ 873 (connp)->conn_fanout = NULL; \ 874 (connp)->conn_next = NULL; \ 875 (connp)->conn_prev = NULL; \ 876 (connp)->conn_flags |= IPCL_REMOVED; \ 877 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 878 ipcl_conn_unlisten((connp)); \ 879 CONN_DEC_REF((connp)); \ 880 mutex_exit(&connfp->connf_lock); \ 881 } \ 882 } 883 884 void 885 ipcl_hash_remove(conn_t *connp) 886 { 887 IPCL_HASH_REMOVE(connp); 888 } 889 890 /* 891 * The whole purpose of this function is allow removal of 892 * a conn_t from the connected hash for timewait reclaim. 893 * This is essentially a TW reclaim fastpath where timewait 894 * collector checks under fanout lock (so no one else can 895 * get access to the conn_t) that refcnt is 2 i.e. one for 896 * TCP and one for the classifier hash list. If ref count 897 * is indeed 2, we can just remove the conn under lock and 898 * avoid cleaning up the conn under squeue. This gives us 899 * improved performance. 900 */ 901 void 902 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 903 { 904 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 905 ASSERT(MUTEX_HELD(&connp->conn_lock)); 906 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 907 908 if ((connp)->conn_next != NULL) { 909 (connp)->conn_next->conn_prev = (connp)->conn_prev; 910 } 911 if ((connp)->conn_prev != NULL) { 912 (connp)->conn_prev->conn_next = (connp)->conn_next; 913 } else { 914 connfp->connf_head = (connp)->conn_next; 915 } 916 (connp)->conn_fanout = NULL; 917 (connp)->conn_next = NULL; 918 (connp)->conn_prev = NULL; 919 (connp)->conn_flags |= IPCL_REMOVED; 920 ASSERT((connp)->conn_ref == 2); 921 (connp)->conn_ref--; 922 } 923 924 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 925 ASSERT((connp)->conn_fanout == NULL); \ 926 ASSERT((connp)->conn_next == NULL); \ 927 ASSERT((connp)->conn_prev == NULL); \ 928 if ((connfp)->connf_head != NULL) { \ 929 (connfp)->connf_head->conn_prev = (connp); \ 930 (connp)->conn_next = (connfp)->connf_head; \ 931 } \ 932 (connp)->conn_fanout = (connfp); \ 933 (connfp)->connf_head = (connp); \ 934 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 935 IPCL_CONNECTED; \ 936 CONN_INC_REF(connp); \ 937 } 938 939 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 940 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 941 "connp %p", (void *)(connfp), (void *)(connp))); \ 942 IPCL_HASH_REMOVE((connp)); \ 943 mutex_enter(&(connfp)->connf_lock); \ 944 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 945 mutex_exit(&(connfp)->connf_lock); \ 946 } 947 948 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 949 conn_t *pconnp = NULL, *nconnp; \ 950 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 951 "connp %p", (void *)connfp, (void *)(connp))); \ 952 IPCL_HASH_REMOVE((connp)); \ 953 mutex_enter(&(connfp)->connf_lock); \ 954 nconnp = (connfp)->connf_head; \ 955 while (nconnp != NULL && \ 956 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 957 pconnp = nconnp; \ 958 nconnp = nconnp->conn_next; \ 959 } \ 960 if (pconnp != NULL) { \ 961 pconnp->conn_next = (connp); \ 962 (connp)->conn_prev = pconnp; \ 963 } else { \ 964 (connfp)->connf_head = (connp); \ 965 } \ 966 if (nconnp != NULL) { \ 967 (connp)->conn_next = nconnp; \ 968 nconnp->conn_prev = (connp); \ 969 } \ 970 (connp)->conn_fanout = (connfp); \ 971 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 972 IPCL_BOUND; \ 973 CONN_INC_REF(connp); \ 974 mutex_exit(&(connfp)->connf_lock); \ 975 } 976 977 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 978 conn_t **list, *prev, *next; \ 979 boolean_t isv4mapped = \ 980 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 981 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 982 "connp %p", (void *)(connfp), (void *)(connp))); \ 983 IPCL_HASH_REMOVE((connp)); \ 984 mutex_enter(&(connfp)->connf_lock); \ 985 list = &(connfp)->connf_head; \ 986 prev = NULL; \ 987 while ((next = *list) != NULL) { \ 988 if (isv4mapped && \ 989 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 990 connp->conn_zoneid == next->conn_zoneid) { \ 991 (connp)->conn_next = next; \ 992 if (prev != NULL) \ 993 prev = next->conn_prev; \ 994 next->conn_prev = (connp); \ 995 break; \ 996 } \ 997 list = &next->conn_next; \ 998 prev = next; \ 999 } \ 1000 (connp)->conn_prev = prev; \ 1001 *list = (connp); \ 1002 (connp)->conn_fanout = (connfp); \ 1003 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 1004 IPCL_BOUND; \ 1005 CONN_INC_REF((connp)); \ 1006 mutex_exit(&(connfp)->connf_lock); \ 1007 } 1008 1009 void 1010 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 1011 { 1012 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1013 } 1014 1015 void 1016 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 1017 { 1018 connf_t *connfp; 1019 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1020 1021 ASSERT(connp != NULL); 1022 ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 1023 protocol == IPPROTO_ESP); 1024 1025 connp->conn_ulp = protocol; 1026 1027 /* Insert it in the protocol hash */ 1028 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1029 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1030 } 1031 1032 void 1033 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 1034 { 1035 connf_t *connfp; 1036 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1037 1038 ASSERT(connp != NULL); 1039 ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 1040 protocol == IPPROTO_ESP); 1041 1042 connp->conn_ulp = protocol; 1043 1044 /* Insert it in the Bind Hash */ 1045 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1046 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1047 } 1048 1049 /* 1050 * Because the classifier is used to classify inbound packets, the destination 1051 * address is meant to be our local tunnel address (tunnel source), and the 1052 * source the remote tunnel address (tunnel destination). 1053 */ 1054 conn_t * 1055 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 1056 { 1057 connf_t *connfp; 1058 conn_t *connp; 1059 1060 /* first look for IPv4 tunnel links */ 1061 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 1062 mutex_enter(&connfp->connf_lock); 1063 for (connp = connfp->connf_head; connp != NULL; 1064 connp = connp->conn_next) { 1065 if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 1066 break; 1067 } 1068 if (connp != NULL) 1069 goto done; 1070 1071 mutex_exit(&connfp->connf_lock); 1072 1073 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 1074 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 1075 INADDR_ANY)]; 1076 mutex_enter(&connfp->connf_lock); 1077 for (connp = connfp->connf_head; connp != NULL; 1078 connp = connp->conn_next) { 1079 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 1080 break; 1081 } 1082 done: 1083 if (connp != NULL) 1084 CONN_INC_REF(connp); 1085 mutex_exit(&connfp->connf_lock); 1086 return (connp); 1087 } 1088 1089 conn_t * 1090 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 1091 { 1092 connf_t *connfp; 1093 conn_t *connp; 1094 1095 /* Look for an IPv6 tunnel link */ 1096 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 1097 mutex_enter(&connfp->connf_lock); 1098 for (connp = connfp->connf_head; connp != NULL; 1099 connp = connp->conn_next) { 1100 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 1101 CONN_INC_REF(connp); 1102 break; 1103 } 1104 } 1105 mutex_exit(&connfp->connf_lock); 1106 return (connp); 1107 } 1108 1109 /* 1110 * This function is used only for inserting SCTP raw socket now. 1111 * This may change later. 1112 * 1113 * Note that only one raw socket can be bound to a port. The param 1114 * lport is in network byte order. 1115 */ 1116 static int 1117 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 1118 { 1119 connf_t *connfp; 1120 conn_t *oconnp; 1121 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1122 1123 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1124 1125 /* Check for existing raw socket already bound to the port. */ 1126 mutex_enter(&connfp->connf_lock); 1127 for (oconnp = connfp->connf_head; oconnp != NULL; 1128 oconnp = oconnp->conn_next) { 1129 if (oconnp->conn_lport == lport && 1130 oconnp->conn_zoneid == connp->conn_zoneid && 1131 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 1132 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 1133 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 1134 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 1135 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 1136 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 1137 &connp->conn_srcv6))) { 1138 break; 1139 } 1140 } 1141 mutex_exit(&connfp->connf_lock); 1142 if (oconnp != NULL) 1143 return (EADDRNOTAVAIL); 1144 1145 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 1146 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 1147 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 1148 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 1149 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1150 } else { 1151 IPCL_HASH_INSERT_BOUND(connfp, connp); 1152 } 1153 } else { 1154 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1155 } 1156 return (0); 1157 } 1158 1159 static int 1160 ipcl_iptun_hash_insert(conn_t *connp, ipaddr_t src, ipaddr_t dst, 1161 ip_stack_t *ipst) 1162 { 1163 connf_t *connfp; 1164 conn_t *tconnp; 1165 1166 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(src, dst)]; 1167 mutex_enter(&connfp->connf_lock); 1168 for (tconnp = connfp->connf_head; tconnp != NULL; 1169 tconnp = tconnp->conn_next) { 1170 if (IPCL_IPTUN_MATCH(tconnp, src, dst)) { 1171 /* A tunnel is already bound to these addresses. */ 1172 mutex_exit(&connfp->connf_lock); 1173 return (EADDRINUSE); 1174 } 1175 } 1176 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1177 mutex_exit(&connfp->connf_lock); 1178 return (0); 1179 } 1180 1181 static int 1182 ipcl_iptun_hash_insert_v6(conn_t *connp, const in6_addr_t *src, 1183 const in6_addr_t *dst, ip_stack_t *ipst) 1184 { 1185 connf_t *connfp; 1186 conn_t *tconnp; 1187 1188 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(src, dst)]; 1189 mutex_enter(&connfp->connf_lock); 1190 for (tconnp = connfp->connf_head; tconnp != NULL; 1191 tconnp = tconnp->conn_next) { 1192 if (IPCL_IPTUN_MATCH_V6(tconnp, src, dst)) { 1193 /* A tunnel is already bound to these addresses. */ 1194 mutex_exit(&connfp->connf_lock); 1195 return (EADDRINUSE); 1196 } 1197 } 1198 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1199 mutex_exit(&connfp->connf_lock); 1200 return (0); 1201 } 1202 1203 /* 1204 * Check for a MAC exemption conflict on a labeled system. Note that for 1205 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 1206 * transport layer. This check is for binding all other protocols. 1207 * 1208 * Returns true if there's a conflict. 1209 */ 1210 static boolean_t 1211 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 1212 { 1213 connf_t *connfp; 1214 conn_t *tconn; 1215 1216 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 1217 mutex_enter(&connfp->connf_lock); 1218 for (tconn = connfp->connf_head; tconn != NULL; 1219 tconn = tconn->conn_next) { 1220 /* We don't allow v4 fallback for v6 raw socket */ 1221 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 1222 continue; 1223 /* If neither is exempt, then there's no conflict */ 1224 if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 1225 continue; 1226 /* We are only concerned about sockets for a different zone */ 1227 if (connp->conn_zoneid == tconn->conn_zoneid) 1228 continue; 1229 /* If both are bound to different specific addrs, ok */ 1230 if (connp->conn_src != INADDR_ANY && 1231 tconn->conn_src != INADDR_ANY && 1232 connp->conn_src != tconn->conn_src) 1233 continue; 1234 /* These two conflict; fail */ 1235 break; 1236 } 1237 mutex_exit(&connfp->connf_lock); 1238 return (tconn != NULL); 1239 } 1240 1241 static boolean_t 1242 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1243 { 1244 connf_t *connfp; 1245 conn_t *tconn; 1246 1247 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 1248 mutex_enter(&connfp->connf_lock); 1249 for (tconn = connfp->connf_head; tconn != NULL; 1250 tconn = tconn->conn_next) { 1251 /* We don't allow v4 fallback for v6 raw socket */ 1252 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 1253 continue; 1254 /* If neither is exempt, then there's no conflict */ 1255 if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 1256 continue; 1257 /* We are only concerned about sockets for a different zone */ 1258 if (connp->conn_zoneid == tconn->conn_zoneid) 1259 continue; 1260 /* If both are bound to different addrs, ok */ 1261 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 1262 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 1263 !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 1264 continue; 1265 /* These two conflict; fail */ 1266 break; 1267 } 1268 mutex_exit(&connfp->connf_lock); 1269 return (tconn != NULL); 1270 } 1271 1272 /* 1273 * (v4, v6) bind hash insertion routines 1274 */ 1275 int 1276 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 1277 { 1278 connf_t *connfp; 1279 #ifdef IPCL_DEBUG 1280 char buf[INET_NTOA_BUFSIZE]; 1281 #endif 1282 int ret = 0; 1283 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1284 1285 ASSERT(connp); 1286 1287 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 1288 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 1289 1290 connp->conn_ulp = protocol; 1291 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 1292 connp->conn_lport = lport; 1293 1294 if (IPCL_IS_IPTUN(connp)) 1295 return (ipcl_iptun_hash_insert(connp, src, INADDR_ANY, ipst)); 1296 1297 switch (protocol) { 1298 default: 1299 if (is_system_labeled() && 1300 check_exempt_conflict_v4(connp, ipst)) 1301 return (EADDRINUSE); 1302 /* FALLTHROUGH */ 1303 case IPPROTO_UDP: 1304 if (protocol == IPPROTO_UDP) { 1305 IPCL_DEBUG_LVL(64, 1306 ("ipcl_bind_insert: connp %p - udp\n", 1307 (void *)connp)); 1308 connfp = &ipst->ips_ipcl_udp_fanout[ 1309 IPCL_UDP_HASH(lport, ipst)]; 1310 } else { 1311 IPCL_DEBUG_LVL(64, 1312 ("ipcl_bind_insert: connp %p - protocol\n", 1313 (void *)connp)); 1314 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1315 } 1316 1317 if (connp->conn_rem != INADDR_ANY) { 1318 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1319 } else if (connp->conn_src != INADDR_ANY) { 1320 IPCL_HASH_INSERT_BOUND(connfp, connp); 1321 } else { 1322 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1323 } 1324 break; 1325 1326 case IPPROTO_TCP: 1327 1328 /* Insert it in the Bind Hash */ 1329 ASSERT(connp->conn_zoneid != ALL_ZONES); 1330 connfp = &ipst->ips_ipcl_bind_fanout[ 1331 IPCL_BIND_HASH(lport, ipst)]; 1332 if (connp->conn_src != INADDR_ANY) { 1333 IPCL_HASH_INSERT_BOUND(connfp, connp); 1334 } else { 1335 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1336 } 1337 if (cl_inet_listen != NULL) { 1338 ASSERT(!connp->conn_pkt_isv6); 1339 connp->conn_flags |= IPCL_CL_LISTENER; 1340 (*cl_inet_listen)( 1341 connp->conn_netstack->netstack_stackid, 1342 IPPROTO_TCP, AF_INET, 1343 (uint8_t *)&connp->conn_bound_source, lport, NULL); 1344 } 1345 break; 1346 1347 case IPPROTO_SCTP: 1348 ret = ipcl_sctp_hash_insert(connp, lport); 1349 break; 1350 } 1351 1352 return (ret); 1353 } 1354 1355 int 1356 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1357 uint16_t lport) 1358 { 1359 connf_t *connfp; 1360 int ret = 0; 1361 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1362 1363 ASSERT(connp != NULL); connp->conn_ulp = protocol; 1364 connp->conn_srcv6 = *src; 1365 connp->conn_lport = lport; 1366 1367 if (IPCL_IS_IPTUN(connp)) { 1368 return (ipcl_iptun_hash_insert_v6(connp, src, &ipv6_all_zeros, 1369 ipst)); 1370 } 1371 1372 switch (protocol) { 1373 default: 1374 if (is_system_labeled() && 1375 check_exempt_conflict_v6(connp, ipst)) 1376 return (EADDRINUSE); 1377 /* FALLTHROUGH */ 1378 case IPPROTO_UDP: 1379 if (protocol == IPPROTO_UDP) { 1380 IPCL_DEBUG_LVL(128, 1381 ("ipcl_bind_insert_v6: connp %p - udp\n", 1382 (void *)connp)); 1383 connfp = &ipst->ips_ipcl_udp_fanout[ 1384 IPCL_UDP_HASH(lport, ipst)]; 1385 } else { 1386 IPCL_DEBUG_LVL(128, 1387 ("ipcl_bind_insert_v6: connp %p - protocol\n", 1388 (void *)connp)); 1389 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1390 } 1391 1392 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1393 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1394 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1395 IPCL_HASH_INSERT_BOUND(connfp, connp); 1396 } else { 1397 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1398 } 1399 break; 1400 1401 case IPPROTO_TCP: 1402 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 1403 1404 /* Insert it in the Bind Hash */ 1405 ASSERT(connp->conn_zoneid != ALL_ZONES); 1406 connfp = &ipst->ips_ipcl_bind_fanout[ 1407 IPCL_BIND_HASH(lport, ipst)]; 1408 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1409 IPCL_HASH_INSERT_BOUND(connfp, connp); 1410 } else { 1411 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1412 } 1413 if (cl_inet_listen != NULL) { 1414 sa_family_t addr_family; 1415 uint8_t *laddrp; 1416 1417 if (connp->conn_pkt_isv6) { 1418 addr_family = AF_INET6; 1419 laddrp = 1420 (uint8_t *)&connp->conn_bound_source_v6; 1421 } else { 1422 addr_family = AF_INET; 1423 laddrp = (uint8_t *)&connp->conn_bound_source; 1424 } 1425 connp->conn_flags |= IPCL_CL_LISTENER; 1426 (*cl_inet_listen)( 1427 connp->conn_netstack->netstack_stackid, 1428 IPPROTO_TCP, addr_family, laddrp, lport, NULL); 1429 } 1430 break; 1431 1432 case IPPROTO_SCTP: 1433 ret = ipcl_sctp_hash_insert(connp, lport); 1434 break; 1435 } 1436 1437 return (ret); 1438 } 1439 1440 /* 1441 * ipcl_conn_hash insertion routines. 1442 */ 1443 int 1444 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 1445 ipaddr_t rem, uint32_t ports) 1446 { 1447 connf_t *connfp; 1448 uint16_t *up; 1449 conn_t *tconnp; 1450 #ifdef IPCL_DEBUG 1451 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1452 #endif 1453 in_port_t lport; 1454 int ret = 0; 1455 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1456 1457 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1458 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1459 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1460 ports, protocol)); 1461 1462 if (IPCL_IS_IPTUN(connp)) 1463 return (ipcl_iptun_hash_insert(connp, src, rem, ipst)); 1464 1465 switch (protocol) { 1466 case IPPROTO_TCP: 1467 if (!(connp->conn_flags & IPCL_EAGER)) { 1468 /* 1469 * for a eager connection, i.e connections which 1470 * have just been created, the initialization is 1471 * already done in ip at conn_creation time, so 1472 * we can skip the checks here. 1473 */ 1474 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1475 } 1476 1477 /* 1478 * For tcp, we check whether the connection tuple already 1479 * exists before allowing the connection to proceed. We 1480 * also allow indexing on the zoneid. This is to allow 1481 * multiple shared stack zones to have the same tcp 1482 * connection tuple. In practice this only happens for 1483 * INADDR_LOOPBACK as it's the only local address which 1484 * doesn't have to be unique. 1485 */ 1486 connfp = &ipst->ips_ipcl_conn_fanout[ 1487 IPCL_CONN_HASH(connp->conn_rem, 1488 connp->conn_ports, ipst)]; 1489 mutex_enter(&connfp->connf_lock); 1490 for (tconnp = connfp->connf_head; tconnp != NULL; 1491 tconnp = tconnp->conn_next) { 1492 if ((IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1493 connp->conn_rem, connp->conn_src, 1494 connp->conn_ports)) && 1495 (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 1496 1497 /* Already have a conn. bail out */ 1498 mutex_exit(&connfp->connf_lock); 1499 return (EADDRINUSE); 1500 } 1501 } 1502 if (connp->conn_fanout != NULL) { 1503 /* 1504 * Probably a XTI/TLI application trying to do a 1505 * rebind. Let it happen. 1506 */ 1507 mutex_exit(&connfp->connf_lock); 1508 IPCL_HASH_REMOVE(connp); 1509 mutex_enter(&connfp->connf_lock); 1510 } 1511 1512 ASSERT(connp->conn_recv != NULL); 1513 1514 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1515 mutex_exit(&connfp->connf_lock); 1516 break; 1517 1518 case IPPROTO_SCTP: 1519 /* 1520 * The raw socket may have already been bound, remove it 1521 * from the hash first. 1522 */ 1523 IPCL_HASH_REMOVE(connp); 1524 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1525 ret = ipcl_sctp_hash_insert(connp, lport); 1526 break; 1527 1528 default: 1529 /* 1530 * Check for conflicts among MAC exempt bindings. For 1531 * transports with port numbers, this is done by the upper 1532 * level per-transport binding logic. For all others, it's 1533 * done here. 1534 */ 1535 if (is_system_labeled() && 1536 check_exempt_conflict_v4(connp, ipst)) 1537 return (EADDRINUSE); 1538 /* FALLTHROUGH */ 1539 1540 case IPPROTO_UDP: 1541 up = (uint16_t *)&ports; 1542 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1543 if (protocol == IPPROTO_UDP) { 1544 connfp = &ipst->ips_ipcl_udp_fanout[ 1545 IPCL_UDP_HASH(up[1], ipst)]; 1546 } else { 1547 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1548 } 1549 1550 if (connp->conn_rem != INADDR_ANY) { 1551 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1552 } else if (connp->conn_src != INADDR_ANY) { 1553 IPCL_HASH_INSERT_BOUND(connfp, connp); 1554 } else { 1555 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1556 } 1557 break; 1558 } 1559 1560 return (ret); 1561 } 1562 1563 int 1564 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1565 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1566 { 1567 connf_t *connfp; 1568 uint16_t *up; 1569 conn_t *tconnp; 1570 in_port_t lport; 1571 int ret = 0; 1572 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1573 1574 if (IPCL_IS_IPTUN(connp)) 1575 return (ipcl_iptun_hash_insert_v6(connp, src, rem, ipst)); 1576 1577 switch (protocol) { 1578 case IPPROTO_TCP: 1579 /* Just need to insert a conn struct */ 1580 if (!(connp->conn_flags & IPCL_EAGER)) { 1581 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1582 } 1583 1584 /* 1585 * For tcp, we check whether the connection tuple already 1586 * exists before allowing the connection to proceed. We 1587 * also allow indexing on the zoneid. This is to allow 1588 * multiple shared stack zones to have the same tcp 1589 * connection tuple. In practice this only happens for 1590 * ipv6_loopback as it's the only local address which 1591 * doesn't have to be unique. 1592 */ 1593 connfp = &ipst->ips_ipcl_conn_fanout[ 1594 IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1595 ipst)]; 1596 mutex_enter(&connfp->connf_lock); 1597 for (tconnp = connfp->connf_head; tconnp != NULL; 1598 tconnp = tconnp->conn_next) { 1599 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1600 connp->conn_remv6, connp->conn_srcv6, 1601 connp->conn_ports) && 1602 (tconnp->conn_tcp->tcp_bound_if == 0 || 1603 tconnp->conn_tcp->tcp_bound_if == ifindex) && 1604 (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 1605 /* Already have a conn. bail out */ 1606 mutex_exit(&connfp->connf_lock); 1607 return (EADDRINUSE); 1608 } 1609 } 1610 if (connp->conn_fanout != NULL) { 1611 /* 1612 * Probably a XTI/TLI application trying to do a 1613 * rebind. Let it happen. 1614 */ 1615 mutex_exit(&connfp->connf_lock); 1616 IPCL_HASH_REMOVE(connp); 1617 mutex_enter(&connfp->connf_lock); 1618 } 1619 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1620 mutex_exit(&connfp->connf_lock); 1621 break; 1622 1623 case IPPROTO_SCTP: 1624 IPCL_HASH_REMOVE(connp); 1625 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1626 ret = ipcl_sctp_hash_insert(connp, lport); 1627 break; 1628 1629 default: 1630 if (is_system_labeled() && 1631 check_exempt_conflict_v6(connp, ipst)) 1632 return (EADDRINUSE); 1633 /* FALLTHROUGH */ 1634 case IPPROTO_UDP: 1635 up = (uint16_t *)&ports; 1636 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1637 if (protocol == IPPROTO_UDP) { 1638 connfp = &ipst->ips_ipcl_udp_fanout[ 1639 IPCL_UDP_HASH(up[1], ipst)]; 1640 } else { 1641 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1642 } 1643 1644 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1645 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1646 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1647 IPCL_HASH_INSERT_BOUND(connfp, connp); 1648 } else { 1649 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1650 } 1651 break; 1652 } 1653 1654 return (ret); 1655 } 1656 1657 /* 1658 * v4 packet classifying function. looks up the fanout table to 1659 * find the conn, the packet belongs to. returns the conn with 1660 * the reference held, null otherwise. 1661 * 1662 * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1663 * Lookup" comment block are applied. Labels are also checked as described 1664 * above. If the packet is from the inside (looped back), and is from the same 1665 * zone, then label checks are omitted. 1666 */ 1667 conn_t * 1668 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1669 ip_stack_t *ipst) 1670 { 1671 ipha_t *ipha; 1672 connf_t *connfp, *bind_connfp; 1673 uint16_t lport; 1674 uint16_t fport; 1675 uint32_t ports; 1676 conn_t *connp; 1677 uint16_t *up; 1678 boolean_t shared_addr; 1679 boolean_t unlabeled; 1680 1681 ipha = (ipha_t *)mp->b_rptr; 1682 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1683 1684 switch (protocol) { 1685 case IPPROTO_TCP: 1686 ports = *(uint32_t *)up; 1687 connfp = 1688 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1689 ports, ipst)]; 1690 mutex_enter(&connfp->connf_lock); 1691 for (connp = connfp->connf_head; connp != NULL; 1692 connp = connp->conn_next) { 1693 if ((IPCL_CONN_MATCH(connp, protocol, 1694 ipha->ipha_src, ipha->ipha_dst, ports)) && 1695 (IPCL_ZONE_MATCH(connp, zoneid))) { 1696 break; 1697 } 1698 } 1699 1700 if (connp != NULL) { 1701 /* 1702 * We have a fully-bound TCP connection. 1703 * 1704 * For labeled systems, there's no need to check the 1705 * label here. It's known to be good as we checked 1706 * before allowing the connection to become bound. 1707 */ 1708 CONN_INC_REF(connp); 1709 mutex_exit(&connfp->connf_lock); 1710 return (connp); 1711 } 1712 1713 mutex_exit(&connfp->connf_lock); 1714 1715 lport = up[1]; 1716 unlabeled = B_FALSE; 1717 /* Cred cannot be null on IPv4 */ 1718 if (is_system_labeled()) { 1719 cred_t *cr = msg_getcred(mp, NULL); 1720 ASSERT(cr != NULL); 1721 unlabeled = (crgetlabel(cr)->tsl_flags & 1722 TSLF_UNLABELED) != 0; 1723 } 1724 shared_addr = (zoneid == ALL_ZONES); 1725 if (shared_addr) { 1726 /* 1727 * No need to handle exclusive-stack zones since 1728 * ALL_ZONES only applies to the shared stack. 1729 */ 1730 zoneid = tsol_mlp_findzone(protocol, lport); 1731 /* 1732 * If no shared MLP is found, tsol_mlp_findzone returns 1733 * ALL_ZONES. In that case, we assume it's SLP, and 1734 * search for the zone based on the packet label. 1735 * 1736 * If there is such a zone, we prefer to find a 1737 * connection in it. Otherwise, we look for a 1738 * MAC-exempt connection in any zone whose label 1739 * dominates the default label on the packet. 1740 */ 1741 if (zoneid == ALL_ZONES) 1742 zoneid = tsol_packet_to_zoneid(mp); 1743 else 1744 unlabeled = B_FALSE; 1745 } 1746 1747 bind_connfp = 1748 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1749 mutex_enter(&bind_connfp->connf_lock); 1750 for (connp = bind_connfp->connf_head; connp != NULL; 1751 connp = connp->conn_next) { 1752 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1753 lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 1754 (unlabeled && connp->conn_mac_exempt && 1755 shared_addr))) 1756 break; 1757 } 1758 1759 /* 1760 * If the matching connection is SLP on a private address, then 1761 * the label on the packet must match the local zone's label. 1762 * Otherwise, it must be in the label range defined by tnrh. 1763 * This is ensured by tsol_receive_label. 1764 */ 1765 if (connp != NULL && is_system_labeled() && 1766 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1767 shared_addr, connp)) { 1768 DTRACE_PROBE3( 1769 tx__ip__log__info__classify__tcp, 1770 char *, 1771 "connp(1) could not receive mp(2)", 1772 conn_t *, connp, mblk_t *, mp); 1773 connp = NULL; 1774 } 1775 1776 if (connp != NULL) { 1777 /* Have a listener at least */ 1778 CONN_INC_REF(connp); 1779 mutex_exit(&bind_connfp->connf_lock); 1780 return (connp); 1781 } 1782 1783 mutex_exit(&bind_connfp->connf_lock); 1784 1785 IPCL_DEBUG_LVL(512, 1786 ("ipcl_classify: couldn't classify mp = %p\n", 1787 (void *)mp)); 1788 break; 1789 1790 case IPPROTO_UDP: 1791 lport = up[1]; 1792 unlabeled = B_FALSE; 1793 /* Cred cannot be null on IPv4 */ 1794 if (is_system_labeled()) { 1795 cred_t *cr = msg_getcred(mp, NULL); 1796 ASSERT(cr != NULL); 1797 unlabeled = (crgetlabel(cr)->tsl_flags & 1798 TSLF_UNLABELED) != 0; 1799 } 1800 shared_addr = (zoneid == ALL_ZONES); 1801 if (shared_addr) { 1802 /* 1803 * No need to handle exclusive-stack zones since 1804 * ALL_ZONES only applies to the shared stack. 1805 */ 1806 zoneid = tsol_mlp_findzone(protocol, lport); 1807 /* 1808 * If no shared MLP is found, tsol_mlp_findzone returns 1809 * ALL_ZONES. In that case, we assume it's SLP, and 1810 * search for the zone based on the packet label. 1811 * 1812 * If there is such a zone, we prefer to find a 1813 * connection in it. Otherwise, we look for a 1814 * MAC-exempt connection in any zone whose label 1815 * dominates the default label on the packet. 1816 */ 1817 if (zoneid == ALL_ZONES) 1818 zoneid = tsol_packet_to_zoneid(mp); 1819 else 1820 unlabeled = B_FALSE; 1821 } 1822 fport = up[0]; 1823 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1824 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1825 mutex_enter(&connfp->connf_lock); 1826 for (connp = connfp->connf_head; connp != NULL; 1827 connp = connp->conn_next) { 1828 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1829 fport, ipha->ipha_src) && 1830 (IPCL_ZONE_MATCH(connp, zoneid) || 1831 (unlabeled && connp->conn_mac_exempt && 1832 shared_addr))) 1833 break; 1834 } 1835 1836 if (connp != NULL && is_system_labeled() && 1837 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1838 shared_addr, connp)) { 1839 DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1840 char *, "connp(1) could not receive mp(2)", 1841 conn_t *, connp, mblk_t *, mp); 1842 connp = NULL; 1843 } 1844 1845 if (connp != NULL) { 1846 CONN_INC_REF(connp); 1847 mutex_exit(&connfp->connf_lock); 1848 return (connp); 1849 } 1850 1851 /* 1852 * We shouldn't come here for multicast/broadcast packets 1853 */ 1854 mutex_exit(&connfp->connf_lock); 1855 IPCL_DEBUG_LVL(512, 1856 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1857 lport, fport)); 1858 break; 1859 1860 case IPPROTO_ENCAP: 1861 case IPPROTO_IPV6: 1862 return (ipcl_iptun_classify_v4(&ipha->ipha_src, 1863 &ipha->ipha_dst, ipst)); 1864 } 1865 1866 return (NULL); 1867 } 1868 1869 conn_t * 1870 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1871 ip_stack_t *ipst) 1872 { 1873 ip6_t *ip6h; 1874 connf_t *connfp, *bind_connfp; 1875 uint16_t lport; 1876 uint16_t fport; 1877 tcph_t *tcph; 1878 uint32_t ports; 1879 conn_t *connp; 1880 uint16_t *up; 1881 boolean_t shared_addr; 1882 boolean_t unlabeled; 1883 1884 ip6h = (ip6_t *)mp->b_rptr; 1885 1886 switch (protocol) { 1887 case IPPROTO_TCP: 1888 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1889 up = (uint16_t *)tcph->th_lport; 1890 ports = *(uint32_t *)up; 1891 1892 connfp = 1893 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1894 ports, ipst)]; 1895 mutex_enter(&connfp->connf_lock); 1896 for (connp = connfp->connf_head; connp != NULL; 1897 connp = connp->conn_next) { 1898 if ((IPCL_CONN_MATCH_V6(connp, protocol, 1899 ip6h->ip6_src, ip6h->ip6_dst, ports)) && 1900 (IPCL_ZONE_MATCH(connp, zoneid))) { 1901 break; 1902 } 1903 } 1904 1905 if (connp != NULL) { 1906 /* 1907 * We have a fully-bound TCP connection. 1908 * 1909 * For labeled systems, there's no need to check the 1910 * label here. It's known to be good as we checked 1911 * before allowing the connection to become bound. 1912 */ 1913 CONN_INC_REF(connp); 1914 mutex_exit(&connfp->connf_lock); 1915 return (connp); 1916 } 1917 1918 mutex_exit(&connfp->connf_lock); 1919 1920 lport = up[1]; 1921 unlabeled = B_FALSE; 1922 /* Cred can be null on IPv6 */ 1923 if (is_system_labeled()) { 1924 cred_t *cr = msg_getcred(mp, NULL); 1925 1926 unlabeled = (cr != NULL && 1927 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1928 } 1929 shared_addr = (zoneid == ALL_ZONES); 1930 if (shared_addr) { 1931 /* 1932 * No need to handle exclusive-stack zones since 1933 * ALL_ZONES only applies to the shared stack. 1934 */ 1935 zoneid = tsol_mlp_findzone(protocol, lport); 1936 /* 1937 * If no shared MLP is found, tsol_mlp_findzone returns 1938 * ALL_ZONES. In that case, we assume it's SLP, and 1939 * search for the zone based on the packet label. 1940 * 1941 * If there is such a zone, we prefer to find a 1942 * connection in it. Otherwise, we look for a 1943 * MAC-exempt connection in any zone whose label 1944 * dominates the default label on the packet. 1945 */ 1946 if (zoneid == ALL_ZONES) 1947 zoneid = tsol_packet_to_zoneid(mp); 1948 else 1949 unlabeled = B_FALSE; 1950 } 1951 1952 bind_connfp = 1953 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1954 mutex_enter(&bind_connfp->connf_lock); 1955 for (connp = bind_connfp->connf_head; connp != NULL; 1956 connp = connp->conn_next) { 1957 if (IPCL_BIND_MATCH_V6(connp, protocol, 1958 ip6h->ip6_dst, lport) && 1959 (IPCL_ZONE_MATCH(connp, zoneid) || 1960 (unlabeled && connp->conn_mac_exempt && 1961 shared_addr))) 1962 break; 1963 } 1964 1965 if (connp != NULL && is_system_labeled() && 1966 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1967 shared_addr, connp)) { 1968 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1969 char *, "connp(1) could not receive mp(2)", 1970 conn_t *, connp, mblk_t *, mp); 1971 connp = NULL; 1972 } 1973 1974 if (connp != NULL) { 1975 /* Have a listner at least */ 1976 CONN_INC_REF(connp); 1977 mutex_exit(&bind_connfp->connf_lock); 1978 IPCL_DEBUG_LVL(512, 1979 ("ipcl_classify_v6: found listner " 1980 "connp = %p\n", (void *)connp)); 1981 1982 return (connp); 1983 } 1984 1985 mutex_exit(&bind_connfp->connf_lock); 1986 1987 IPCL_DEBUG_LVL(512, 1988 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1989 (void *)mp)); 1990 break; 1991 1992 case IPPROTO_UDP: 1993 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1994 lport = up[1]; 1995 unlabeled = B_FALSE; 1996 /* Cred can be null on IPv6 */ 1997 if (is_system_labeled()) { 1998 cred_t *cr = msg_getcred(mp, NULL); 1999 2000 unlabeled = (cr != NULL && 2001 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 2002 } 2003 shared_addr = (zoneid == ALL_ZONES); 2004 if (shared_addr) { 2005 /* 2006 * No need to handle exclusive-stack zones since 2007 * ALL_ZONES only applies to the shared stack. 2008 */ 2009 zoneid = tsol_mlp_findzone(protocol, lport); 2010 /* 2011 * If no shared MLP is found, tsol_mlp_findzone returns 2012 * ALL_ZONES. In that case, we assume it's SLP, and 2013 * search for the zone based on the packet label. 2014 * 2015 * If there is such a zone, we prefer to find a 2016 * connection in it. Otherwise, we look for a 2017 * MAC-exempt connection in any zone whose label 2018 * dominates the default label on the packet. 2019 */ 2020 if (zoneid == ALL_ZONES) 2021 zoneid = tsol_packet_to_zoneid(mp); 2022 else 2023 unlabeled = B_FALSE; 2024 } 2025 2026 fport = up[0]; 2027 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 2028 fport)); 2029 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2030 mutex_enter(&connfp->connf_lock); 2031 for (connp = connfp->connf_head; connp != NULL; 2032 connp = connp->conn_next) { 2033 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 2034 fport, ip6h->ip6_src) && 2035 (IPCL_ZONE_MATCH(connp, zoneid) || 2036 (unlabeled && connp->conn_mac_exempt && 2037 shared_addr))) 2038 break; 2039 } 2040 2041 if (connp != NULL && is_system_labeled() && 2042 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 2043 shared_addr, connp)) { 2044 DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 2045 char *, "connp(1) could not receive mp(2)", 2046 conn_t *, connp, mblk_t *, mp); 2047 connp = NULL; 2048 } 2049 2050 if (connp != NULL) { 2051 CONN_INC_REF(connp); 2052 mutex_exit(&connfp->connf_lock); 2053 return (connp); 2054 } 2055 2056 /* 2057 * We shouldn't come here for multicast/broadcast packets 2058 */ 2059 mutex_exit(&connfp->connf_lock); 2060 IPCL_DEBUG_LVL(512, 2061 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 2062 lport, fport)); 2063 break; 2064 case IPPROTO_ENCAP: 2065 case IPPROTO_IPV6: 2066 return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 2067 &ip6h->ip6_dst, ipst)); 2068 } 2069 2070 return (NULL); 2071 } 2072 2073 /* 2074 * wrapper around ipcl_classify_(v4,v6) routines. 2075 */ 2076 conn_t * 2077 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 2078 { 2079 uint16_t hdr_len; 2080 ipha_t *ipha; 2081 uint8_t *nexthdrp; 2082 2083 if (MBLKL(mp) < sizeof (ipha_t)) 2084 return (NULL); 2085 2086 switch (IPH_HDR_VERSION(mp->b_rptr)) { 2087 case IPV4_VERSION: 2088 ipha = (ipha_t *)mp->b_rptr; 2089 hdr_len = IPH_HDR_LENGTH(ipha); 2090 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 2091 zoneid, ipst)); 2092 case IPV6_VERSION: 2093 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 2094 &hdr_len, &nexthdrp)) 2095 return (NULL); 2096 2097 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 2098 } 2099 2100 return (NULL); 2101 } 2102 2103 conn_t * 2104 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 2105 uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 2106 { 2107 connf_t *connfp; 2108 conn_t *connp; 2109 in_port_t lport; 2110 int af; 2111 boolean_t shared_addr; 2112 boolean_t unlabeled; 2113 const void *dst; 2114 2115 lport = ((uint16_t *)&ports)[1]; 2116 2117 unlabeled = B_FALSE; 2118 /* Cred can be null on IPv6 */ 2119 if (is_system_labeled()) { 2120 cred_t *cr = msg_getcred(mp, NULL); 2121 2122 unlabeled = (cr != NULL && 2123 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 2124 } 2125 shared_addr = (zoneid == ALL_ZONES); 2126 if (shared_addr) { 2127 /* 2128 * No need to handle exclusive-stack zones since ALL_ZONES 2129 * only applies to the shared stack. 2130 */ 2131 zoneid = tsol_mlp_findzone(protocol, lport); 2132 /* 2133 * If no shared MLP is found, tsol_mlp_findzone returns 2134 * ALL_ZONES. In that case, we assume it's SLP, and search for 2135 * the zone based on the packet label. 2136 * 2137 * If there is such a zone, we prefer to find a connection in 2138 * it. Otherwise, we look for a MAC-exempt connection in any 2139 * zone whose label dominates the default label on the packet. 2140 */ 2141 if (zoneid == ALL_ZONES) 2142 zoneid = tsol_packet_to_zoneid(mp); 2143 else 2144 unlabeled = B_FALSE; 2145 } 2146 2147 af = IPH_HDR_VERSION(hdr); 2148 dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 2149 (const void *)&((ip6_t *)hdr)->ip6_dst; 2150 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 2151 2152 mutex_enter(&connfp->connf_lock); 2153 for (connp = connfp->connf_head; connp != NULL; 2154 connp = connp->conn_next) { 2155 /* We don't allow v4 fallback for v6 raw socket. */ 2156 if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 2157 IPV6_VERSION)) 2158 continue; 2159 if (connp->conn_fully_bound) { 2160 if (af == IPV4_VERSION) { 2161 if (!IPCL_CONN_MATCH(connp, protocol, 2162 hdr->ipha_src, hdr->ipha_dst, ports)) 2163 continue; 2164 } else { 2165 if (!IPCL_CONN_MATCH_V6(connp, protocol, 2166 ((ip6_t *)hdr)->ip6_src, 2167 ((ip6_t *)hdr)->ip6_dst, ports)) 2168 continue; 2169 } 2170 } else { 2171 if (af == IPV4_VERSION) { 2172 if (!IPCL_BIND_MATCH(connp, protocol, 2173 hdr->ipha_dst, lport)) 2174 continue; 2175 } else { 2176 if (!IPCL_BIND_MATCH_V6(connp, protocol, 2177 ((ip6_t *)hdr)->ip6_dst, lport)) 2178 continue; 2179 } 2180 } 2181 2182 if (IPCL_ZONE_MATCH(connp, zoneid) || 2183 (unlabeled && connp->conn_mac_exempt && shared_addr)) 2184 break; 2185 } 2186 /* 2187 * If the connection is fully-bound and connection-oriented (TCP or 2188 * SCTP), then we've already validated the remote system's label. 2189 * There's no need to do it again for every packet. 2190 */ 2191 if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 2192 !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 2193 !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 2194 DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 2195 char *, "connp(1) could not receive mp(2)", 2196 conn_t *, connp, mblk_t *, mp); 2197 connp = NULL; 2198 } 2199 2200 if (connp != NULL) 2201 goto found; 2202 mutex_exit(&connfp->connf_lock); 2203 2204 /* Try to look for a wildcard match. */ 2205 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 2206 mutex_enter(&connfp->connf_lock); 2207 for (connp = connfp->connf_head; connp != NULL; 2208 connp = connp->conn_next) { 2209 /* We don't allow v4 fallback for v6 raw socket. */ 2210 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 2211 IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 2212 continue; 2213 } 2214 if (af == IPV4_VERSION) { 2215 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 2216 break; 2217 } else { 2218 if (IPCL_RAW_MATCH_V6(connp, protocol, 2219 ((ip6_t *)hdr)->ip6_dst)) { 2220 break; 2221 } 2222 } 2223 } 2224 2225 if (connp != NULL) 2226 goto found; 2227 2228 mutex_exit(&connfp->connf_lock); 2229 return (NULL); 2230 2231 found: 2232 ASSERT(connp != NULL); 2233 CONN_INC_REF(connp); 2234 mutex_exit(&connfp->connf_lock); 2235 return (connp); 2236 } 2237 2238 /* ARGSUSED */ 2239 static int 2240 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2241 { 2242 itc_t *itc = (itc_t *)buf; 2243 conn_t *connp = &itc->itc_conn; 2244 tcp_t *tcp = (tcp_t *)&itc[1]; 2245 2246 bzero(connp, sizeof (conn_t)); 2247 bzero(tcp, sizeof (tcp_t)); 2248 2249 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2250 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2251 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 2252 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 2253 connp->conn_tcp = tcp; 2254 connp->conn_flags = IPCL_TCPCONN; 2255 connp->conn_ulp = IPPROTO_TCP; 2256 tcp->tcp_connp = connp; 2257 return (0); 2258 } 2259 2260 /* ARGSUSED */ 2261 static void 2262 tcp_conn_destructor(void *buf, void *cdrarg) 2263 { 2264 itc_t *itc = (itc_t *)buf; 2265 conn_t *connp = &itc->itc_conn; 2266 tcp_t *tcp = (tcp_t *)&itc[1]; 2267 2268 ASSERT(connp->conn_flags & IPCL_TCPCONN); 2269 ASSERT(tcp->tcp_connp == connp); 2270 ASSERT(connp->conn_tcp == tcp); 2271 tcp_timermp_free(tcp); 2272 mutex_destroy(&connp->conn_lock); 2273 cv_destroy(&connp->conn_cv); 2274 cv_destroy(&connp->conn_sq_cv); 2275 } 2276 2277 /* ARGSUSED */ 2278 static int 2279 ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2280 { 2281 itc_t *itc = (itc_t *)buf; 2282 conn_t *connp = &itc->itc_conn; 2283 2284 bzero(connp, sizeof (conn_t)); 2285 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2286 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2287 connp->conn_flags = IPCL_IPCCONN; 2288 2289 return (0); 2290 } 2291 2292 /* ARGSUSED */ 2293 static void 2294 ip_conn_destructor(void *buf, void *cdrarg) 2295 { 2296 itc_t *itc = (itc_t *)buf; 2297 conn_t *connp = &itc->itc_conn; 2298 2299 ASSERT(connp->conn_flags & IPCL_IPCCONN); 2300 ASSERT(connp->conn_priv == NULL); 2301 mutex_destroy(&connp->conn_lock); 2302 cv_destroy(&connp->conn_cv); 2303 } 2304 2305 /* ARGSUSED */ 2306 static int 2307 udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2308 { 2309 itc_t *itc = (itc_t *)buf; 2310 conn_t *connp = &itc->itc_conn; 2311 udp_t *udp = (udp_t *)&itc[1]; 2312 2313 bzero(connp, sizeof (conn_t)); 2314 bzero(udp, sizeof (udp_t)); 2315 2316 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2317 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2318 connp->conn_udp = udp; 2319 connp->conn_flags = IPCL_UDPCONN; 2320 connp->conn_ulp = IPPROTO_UDP; 2321 udp->udp_connp = connp; 2322 return (0); 2323 } 2324 2325 /* ARGSUSED */ 2326 static void 2327 udp_conn_destructor(void *buf, void *cdrarg) 2328 { 2329 itc_t *itc = (itc_t *)buf; 2330 conn_t *connp = &itc->itc_conn; 2331 udp_t *udp = (udp_t *)&itc[1]; 2332 2333 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2334 ASSERT(udp->udp_connp == connp); 2335 ASSERT(connp->conn_udp == udp); 2336 mutex_destroy(&connp->conn_lock); 2337 cv_destroy(&connp->conn_cv); 2338 } 2339 2340 /* ARGSUSED */ 2341 static int 2342 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2343 { 2344 itc_t *itc = (itc_t *)buf; 2345 conn_t *connp = &itc->itc_conn; 2346 icmp_t *icmp = (icmp_t *)&itc[1]; 2347 2348 bzero(connp, sizeof (conn_t)); 2349 bzero(icmp, sizeof (icmp_t)); 2350 2351 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2352 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2353 connp->conn_icmp = icmp; 2354 connp->conn_flags = IPCL_RAWIPCONN; 2355 connp->conn_ulp = IPPROTO_ICMP; 2356 icmp->icmp_connp = connp; 2357 return (0); 2358 } 2359 2360 /* ARGSUSED */ 2361 static void 2362 rawip_conn_destructor(void *buf, void *cdrarg) 2363 { 2364 itc_t *itc = (itc_t *)buf; 2365 conn_t *connp = &itc->itc_conn; 2366 icmp_t *icmp = (icmp_t *)&itc[1]; 2367 2368 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2369 ASSERT(icmp->icmp_connp == connp); 2370 ASSERT(connp->conn_icmp == icmp); 2371 mutex_destroy(&connp->conn_lock); 2372 cv_destroy(&connp->conn_cv); 2373 } 2374 2375 /* ARGSUSED */ 2376 static int 2377 rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2378 { 2379 itc_t *itc = (itc_t *)buf; 2380 conn_t *connp = &itc->itc_conn; 2381 rts_t *rts = (rts_t *)&itc[1]; 2382 2383 bzero(connp, sizeof (conn_t)); 2384 bzero(rts, sizeof (rts_t)); 2385 2386 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2387 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2388 connp->conn_rts = rts; 2389 connp->conn_flags = IPCL_RTSCONN; 2390 rts->rts_connp = connp; 2391 return (0); 2392 } 2393 2394 /* ARGSUSED */ 2395 static void 2396 rts_conn_destructor(void *buf, void *cdrarg) 2397 { 2398 itc_t *itc = (itc_t *)buf; 2399 conn_t *connp = &itc->itc_conn; 2400 rts_t *rts = (rts_t *)&itc[1]; 2401 2402 ASSERT(connp->conn_flags & IPCL_RTSCONN); 2403 ASSERT(rts->rts_connp == connp); 2404 ASSERT(connp->conn_rts == rts); 2405 mutex_destroy(&connp->conn_lock); 2406 cv_destroy(&connp->conn_cv); 2407 } 2408 2409 /* ARGSUSED */ 2410 int 2411 ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags) 2412 { 2413 int error; 2414 netstack_t *ns; 2415 int ret; 2416 tcp_stack_t *tcps; 2417 ip_helper_stream_info_t *ip_helper_str; 2418 ip_stack_t *ipst; 2419 2420 ns = netstack_find_by_cred(kcred); 2421 ASSERT(ns != NULL); 2422 tcps = ns->netstack_tcp; 2423 ipst = ns->netstack_ip; 2424 ASSERT(tcps != NULL); 2425 ip_helper_str = (ip_helper_stream_info_t *)buf; 2426 2427 do { 2428 error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred, 2429 &ip_helper_str->iphs_handle, ipst->ips_ldi_ident); 2430 } while (error == EINTR); 2431 2432 if (error == 0) { 2433 do { 2434 error = ldi_ioctl( 2435 ip_helper_str->iphs_handle, SIOCSQPTR, 2436 (intptr_t)buf, FKIOCTL, kcred, &ret); 2437 } while (error == EINTR); 2438 2439 if (error != 0) { 2440 (void) ldi_close( 2441 ip_helper_str->iphs_handle, 0, kcred); 2442 } 2443 } 2444 2445 netstack_rele(ipst->ips_netstack); 2446 2447 return (error); 2448 } 2449 2450 /* ARGSUSED */ 2451 static void 2452 ip_helper_stream_destructor(void *buf, void *cdrarg) 2453 { 2454 ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf; 2455 2456 ip_helper_str->iphs_rq->q_ptr = 2457 ip_helper_str->iphs_wq->q_ptr = 2458 ip_helper_str->iphs_minfo; 2459 (void) ldi_close(ip_helper_str->iphs_handle, 0, kcred); 2460 } 2461 2462 2463 /* 2464 * Called as part of ipcl_conn_destroy to assert and clear any pointers 2465 * in the conn_t. 2466 */ 2467 void 2468 ipcl_conn_cleanup(conn_t *connp) 2469 { 2470 ASSERT(connp->conn_ire_cache == NULL); 2471 ASSERT(connp->conn_latch == NULL); 2472 #ifdef notdef 2473 ASSERT(connp->conn_rq == NULL); 2474 ASSERT(connp->conn_wq == NULL); 2475 #endif 2476 ASSERT(connp->conn_cred == NULL); 2477 ASSERT(connp->conn_g_fanout == NULL); 2478 ASSERT(connp->conn_g_next == NULL); 2479 ASSERT(connp->conn_g_prev == NULL); 2480 ASSERT(connp->conn_policy == NULL); 2481 ASSERT(connp->conn_fanout == NULL); 2482 ASSERT(connp->conn_next == NULL); 2483 ASSERT(connp->conn_prev == NULL); 2484 #ifdef notdef 2485 /* 2486 * The ill and ipif pointers are not cleared before the conn_t 2487 * goes away since they do not hold a reference on the ill/ipif. 2488 * We should replace these pointers with ifindex/ipaddr_t to 2489 * make the code less complex. 2490 */ 2491 ASSERT(connp->conn_outgoing_ill == NULL); 2492 ASSERT(connp->conn_incoming_ill == NULL); 2493 ASSERT(connp->conn_multicast_ipif == NULL); 2494 ASSERT(connp->conn_multicast_ill == NULL); 2495 #endif 2496 ASSERT(connp->conn_oper_pending_ill == NULL); 2497 ASSERT(connp->conn_ilg == NULL); 2498 ASSERT(connp->conn_drain_next == NULL); 2499 ASSERT(connp->conn_drain_prev == NULL); 2500 #ifdef notdef 2501 /* conn_idl is not cleared when removed from idl list */ 2502 ASSERT(connp->conn_idl == NULL); 2503 #endif 2504 ASSERT(connp->conn_ipsec_opt_mp == NULL); 2505 ASSERT(connp->conn_effective_cred == NULL); 2506 ASSERT(connp->conn_netstack == NULL); 2507 2508 ASSERT(connp->conn_helper_info == NULL); 2509 /* Clear out the conn_t fields that are not preserved */ 2510 bzero(&connp->conn_start_clr, 2511 sizeof (conn_t) - 2512 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2513 } 2514 2515 /* 2516 * All conns are inserted in a global multi-list for the benefit of 2517 * walkers. The walk is guaranteed to walk all open conns at the time 2518 * of the start of the walk exactly once. This property is needed to 2519 * achieve some cleanups during unplumb of interfaces. This is achieved 2520 * as follows. 2521 * 2522 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 2523 * call the insert and delete functions below at creation and deletion 2524 * time respectively. The conn never moves or changes its position in this 2525 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 2526 * won't increase due to walkers, once the conn deletion has started. Note 2527 * that we can't remove the conn from the global list and then wait for 2528 * the refcnt to drop to zero, since walkers would then see a truncated 2529 * list. CONN_INCIPIENT ensures that walkers don't start looking at 2530 * conns until ip_open is ready to make them globally visible. 2531 * The global round robin multi-list locks are held only to get the 2532 * next member/insertion/deletion and contention should be negligible 2533 * if the multi-list is much greater than the number of cpus. 2534 */ 2535 void 2536 ipcl_globalhash_insert(conn_t *connp) 2537 { 2538 int index; 2539 struct connf_s *connfp; 2540 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2541 2542 /* 2543 * No need for atomic here. Approximate even distribution 2544 * in the global lists is sufficient. 2545 */ 2546 ipst->ips_conn_g_index++; 2547 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 2548 2549 connp->conn_g_prev = NULL; 2550 /* 2551 * Mark as INCIPIENT, so that walkers will ignore this 2552 * for now, till ip_open is ready to make it visible globally. 2553 */ 2554 connp->conn_state_flags |= CONN_INCIPIENT; 2555 2556 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2557 /* Insert at the head of the list */ 2558 mutex_enter(&connfp->connf_lock); 2559 connp->conn_g_next = connfp->connf_head; 2560 if (connp->conn_g_next != NULL) 2561 connp->conn_g_next->conn_g_prev = connp; 2562 connfp->connf_head = connp; 2563 2564 /* The fanout bucket this conn points to */ 2565 connp->conn_g_fanout = connfp; 2566 2567 mutex_exit(&connfp->connf_lock); 2568 } 2569 2570 void 2571 ipcl_globalhash_remove(conn_t *connp) 2572 { 2573 struct connf_s *connfp; 2574 2575 /* 2576 * We were never inserted in the global multi list. 2577 * IPCL_NONE variety is never inserted in the global multilist 2578 * since it is presumed to not need any cleanup and is transient. 2579 */ 2580 if (connp->conn_g_fanout == NULL) 2581 return; 2582 2583 connfp = connp->conn_g_fanout; 2584 mutex_enter(&connfp->connf_lock); 2585 if (connp->conn_g_prev != NULL) 2586 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2587 else 2588 connfp->connf_head = connp->conn_g_next; 2589 if (connp->conn_g_next != NULL) 2590 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2591 mutex_exit(&connfp->connf_lock); 2592 2593 /* Better to stumble on a null pointer than to corrupt memory */ 2594 connp->conn_g_next = NULL; 2595 connp->conn_g_prev = NULL; 2596 connp->conn_g_fanout = NULL; 2597 } 2598 2599 /* 2600 * Walk the list of all conn_t's in the system, calling the function provided 2601 * with the specified argument for each. 2602 * Applies to both IPv4 and IPv6. 2603 * 2604 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 2605 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2606 * unplumbed or removed. New conn_t's that are created while we are walking 2607 * may be missed by this walk, because they are not necessarily inserted 2608 * at the tail of the list. They are new conn_t's and thus don't have any 2609 * stale pointers. The CONN_CLOSING flag ensures that no new reference 2610 * is created to the struct that is going away. 2611 */ 2612 void 2613 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2614 { 2615 int i; 2616 conn_t *connp; 2617 conn_t *prev_connp; 2618 2619 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2620 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2621 prev_connp = NULL; 2622 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2623 while (connp != NULL) { 2624 mutex_enter(&connp->conn_lock); 2625 if (connp->conn_state_flags & 2626 (CONN_CONDEMNED | CONN_INCIPIENT)) { 2627 mutex_exit(&connp->conn_lock); 2628 connp = connp->conn_g_next; 2629 continue; 2630 } 2631 CONN_INC_REF_LOCKED(connp); 2632 mutex_exit(&connp->conn_lock); 2633 mutex_exit( 2634 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2635 (*func)(connp, arg); 2636 if (prev_connp != NULL) 2637 CONN_DEC_REF(prev_connp); 2638 mutex_enter( 2639 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2640 prev_connp = connp; 2641 connp = connp->conn_g_next; 2642 } 2643 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2644 if (prev_connp != NULL) 2645 CONN_DEC_REF(prev_connp); 2646 } 2647 } 2648 2649 /* 2650 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2651 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2652 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2653 * (peer tcp in ESTABLISHED state). 2654 */ 2655 conn_t * 2656 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2657 ip_stack_t *ipst) 2658 { 2659 uint32_t ports; 2660 uint16_t *pports = (uint16_t *)&ports; 2661 connf_t *connfp; 2662 conn_t *tconnp; 2663 boolean_t zone_chk; 2664 2665 /* 2666 * If either the source of destination address is loopback, then 2667 * both endpoints must be in the same Zone. Otherwise, both of 2668 * the addresses are system-wide unique (tcp is in ESTABLISHED 2669 * state) and the endpoints may reside in different Zones. 2670 */ 2671 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2672 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2673 2674 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2675 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2676 2677 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2678 ports, ipst)]; 2679 2680 mutex_enter(&connfp->connf_lock); 2681 for (tconnp = connfp->connf_head; tconnp != NULL; 2682 tconnp = tconnp->conn_next) { 2683 2684 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2685 ipha->ipha_dst, ipha->ipha_src, ports) && 2686 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2687 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2688 2689 ASSERT(tconnp != connp); 2690 CONN_INC_REF(tconnp); 2691 mutex_exit(&connfp->connf_lock); 2692 return (tconnp); 2693 } 2694 } 2695 mutex_exit(&connfp->connf_lock); 2696 return (NULL); 2697 } 2698 2699 /* 2700 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2701 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2702 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2703 * (peer tcp in ESTABLISHED state). 2704 */ 2705 conn_t * 2706 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2707 ip_stack_t *ipst) 2708 { 2709 uint32_t ports; 2710 uint16_t *pports = (uint16_t *)&ports; 2711 connf_t *connfp; 2712 conn_t *tconnp; 2713 boolean_t zone_chk; 2714 2715 /* 2716 * If either the source of destination address is loopback, then 2717 * both endpoints must be in the same Zone. Otherwise, both of 2718 * the addresses are system-wide unique (tcp is in ESTABLISHED 2719 * state) and the endpoints may reside in different Zones. We 2720 * don't do Zone check for link local address(es) because the 2721 * current Zone implementation treats each link local address as 2722 * being unique per system node, i.e. they belong to global Zone. 2723 */ 2724 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2725 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2726 2727 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2728 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2729 2730 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2731 ports, ipst)]; 2732 2733 mutex_enter(&connfp->connf_lock); 2734 for (tconnp = connfp->connf_head; tconnp != NULL; 2735 tconnp = tconnp->conn_next) { 2736 2737 /* We skip tcp_bound_if check here as this is loopback tcp */ 2738 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2739 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2740 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2741 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2742 2743 ASSERT(tconnp != connp); 2744 CONN_INC_REF(tconnp); 2745 mutex_exit(&connfp->connf_lock); 2746 return (tconnp); 2747 } 2748 } 2749 mutex_exit(&connfp->connf_lock); 2750 return (NULL); 2751 } 2752 2753 /* 2754 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2755 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2756 * Only checks for connected entries i.e. no INADDR_ANY checks. 2757 */ 2758 conn_t * 2759 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2760 ip_stack_t *ipst) 2761 { 2762 uint32_t ports; 2763 uint16_t *pports; 2764 connf_t *connfp; 2765 conn_t *tconnp; 2766 2767 pports = (uint16_t *)&ports; 2768 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2769 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2770 2771 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2772 ports, ipst)]; 2773 2774 mutex_enter(&connfp->connf_lock); 2775 for (tconnp = connfp->connf_head; tconnp != NULL; 2776 tconnp = tconnp->conn_next) { 2777 2778 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2779 ipha->ipha_dst, ipha->ipha_src, ports) && 2780 tconnp->conn_tcp->tcp_state >= min_state) { 2781 2782 CONN_INC_REF(tconnp); 2783 mutex_exit(&connfp->connf_lock); 2784 return (tconnp); 2785 } 2786 } 2787 mutex_exit(&connfp->connf_lock); 2788 return (NULL); 2789 } 2790 2791 /* 2792 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2793 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2794 * Only checks for connected entries i.e. no INADDR_ANY checks. 2795 * Match on ifindex in addition to addresses. 2796 */ 2797 conn_t * 2798 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2799 uint_t ifindex, ip_stack_t *ipst) 2800 { 2801 tcp_t *tcp; 2802 uint32_t ports; 2803 uint16_t *pports; 2804 connf_t *connfp; 2805 conn_t *tconnp; 2806 2807 pports = (uint16_t *)&ports; 2808 pports[0] = tcpha->tha_fport; 2809 pports[1] = tcpha->tha_lport; 2810 2811 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2812 ports, ipst)]; 2813 2814 mutex_enter(&connfp->connf_lock); 2815 for (tconnp = connfp->connf_head; tconnp != NULL; 2816 tconnp = tconnp->conn_next) { 2817 2818 tcp = tconnp->conn_tcp; 2819 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2820 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2821 tcp->tcp_state >= min_state && 2822 (tcp->tcp_bound_if == 0 || 2823 tcp->tcp_bound_if == ifindex)) { 2824 2825 CONN_INC_REF(tconnp); 2826 mutex_exit(&connfp->connf_lock); 2827 return (tconnp); 2828 } 2829 } 2830 mutex_exit(&connfp->connf_lock); 2831 return (NULL); 2832 } 2833 2834 /* 2835 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2836 * a listener when changing state. 2837 */ 2838 conn_t * 2839 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2840 ip_stack_t *ipst) 2841 { 2842 connf_t *bind_connfp; 2843 conn_t *connp; 2844 tcp_t *tcp; 2845 2846 /* 2847 * Avoid false matches for packets sent to an IP destination of 2848 * all zeros. 2849 */ 2850 if (laddr == 0) 2851 return (NULL); 2852 2853 ASSERT(zoneid != ALL_ZONES); 2854 2855 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2856 mutex_enter(&bind_connfp->connf_lock); 2857 for (connp = bind_connfp->connf_head; connp != NULL; 2858 connp = connp->conn_next) { 2859 tcp = connp->conn_tcp; 2860 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2861 IPCL_ZONE_MATCH(connp, zoneid) && 2862 (tcp->tcp_listener == NULL)) { 2863 CONN_INC_REF(connp); 2864 mutex_exit(&bind_connfp->connf_lock); 2865 return (connp); 2866 } 2867 } 2868 mutex_exit(&bind_connfp->connf_lock); 2869 return (NULL); 2870 } 2871 2872 /* 2873 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2874 * a listener when changing state. 2875 */ 2876 conn_t * 2877 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2878 zoneid_t zoneid, ip_stack_t *ipst) 2879 { 2880 connf_t *bind_connfp; 2881 conn_t *connp = NULL; 2882 tcp_t *tcp; 2883 2884 /* 2885 * Avoid false matches for packets sent to an IP destination of 2886 * all zeros. 2887 */ 2888 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2889 return (NULL); 2890 2891 ASSERT(zoneid != ALL_ZONES); 2892 2893 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2894 mutex_enter(&bind_connfp->connf_lock); 2895 for (connp = bind_connfp->connf_head; connp != NULL; 2896 connp = connp->conn_next) { 2897 tcp = connp->conn_tcp; 2898 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2899 IPCL_ZONE_MATCH(connp, zoneid) && 2900 (tcp->tcp_bound_if == 0 || 2901 tcp->tcp_bound_if == ifindex) && 2902 tcp->tcp_listener == NULL) { 2903 CONN_INC_REF(connp); 2904 mutex_exit(&bind_connfp->connf_lock); 2905 return (connp); 2906 } 2907 } 2908 mutex_exit(&bind_connfp->connf_lock); 2909 return (NULL); 2910 } 2911 2912 /* 2913 * ipcl_get_next_conn 2914 * get the next entry in the conn global list 2915 * and put a reference on the next_conn. 2916 * decrement the reference on the current conn. 2917 * 2918 * This is an iterator based walker function that also provides for 2919 * some selection by the caller. It walks through the conn_hash bucket 2920 * searching for the next valid connp in the list, and selects connections 2921 * that are neither closed nor condemned. It also REFHOLDS the conn 2922 * thus ensuring that the conn exists when the caller uses the conn. 2923 */ 2924 conn_t * 2925 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2926 { 2927 conn_t *next_connp; 2928 2929 if (connfp == NULL) 2930 return (NULL); 2931 2932 mutex_enter(&connfp->connf_lock); 2933 2934 next_connp = (connp == NULL) ? 2935 connfp->connf_head : connp->conn_g_next; 2936 2937 while (next_connp != NULL) { 2938 mutex_enter(&next_connp->conn_lock); 2939 if (!(next_connp->conn_flags & conn_flags) || 2940 (next_connp->conn_state_flags & 2941 (CONN_CONDEMNED | CONN_INCIPIENT))) { 2942 /* 2943 * This conn has been condemned or 2944 * is closing, or the flags don't match 2945 */ 2946 mutex_exit(&next_connp->conn_lock); 2947 next_connp = next_connp->conn_g_next; 2948 continue; 2949 } 2950 CONN_INC_REF_LOCKED(next_connp); 2951 mutex_exit(&next_connp->conn_lock); 2952 break; 2953 } 2954 2955 mutex_exit(&connfp->connf_lock); 2956 2957 if (connp != NULL) 2958 CONN_DEC_REF(connp); 2959 2960 return (next_connp); 2961 } 2962 2963 #ifdef CONN_DEBUG 2964 /* 2965 * Trace of the last NBUF refhold/refrele 2966 */ 2967 int 2968 conn_trace_ref(conn_t *connp) 2969 { 2970 int last; 2971 conn_trace_t *ctb; 2972 2973 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2974 last = connp->conn_trace_last; 2975 last++; 2976 if (last == CONN_TRACE_MAX) 2977 last = 0; 2978 2979 ctb = &connp->conn_trace_buf[last]; 2980 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2981 connp->conn_trace_last = last; 2982 return (1); 2983 } 2984 2985 int 2986 conn_untrace_ref(conn_t *connp) 2987 { 2988 int last; 2989 conn_trace_t *ctb; 2990 2991 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2992 last = connp->conn_trace_last; 2993 last++; 2994 if (last == CONN_TRACE_MAX) 2995 last = 0; 2996 2997 ctb = &connp->conn_trace_buf[last]; 2998 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2999 connp->conn_trace_last = last; 3000 return (1); 3001 } 3002 #endif 3003