1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IP PACKET CLASSIFIER 28 * 29 * The IP packet classifier provides mapping between IP packets and persistent 30 * connection state for connection-oriented protocols. It also provides 31 * interface for managing connection states. 32 * 33 * The connection state is kept in conn_t data structure and contains, among 34 * other things: 35 * 36 * o local/remote address and ports 37 * o Transport protocol 38 * o squeue for the connection (for TCP only) 39 * o reference counter 40 * o Connection state 41 * o hash table linkage 42 * o interface/ire information 43 * o credentials 44 * o ipsec policy 45 * o send and receive functions. 46 * o mutex lock. 47 * 48 * Connections use a reference counting scheme. They are freed when the 49 * reference counter drops to zero. A reference is incremented when connection 50 * is placed in a list or table, when incoming packet for the connection arrives 51 * and when connection is processed via squeue (squeue processing may be 52 * asynchronous and the reference protects the connection from being destroyed 53 * before its processing is finished). 54 * 55 * send and receive functions are currently used for TCP only. The send function 56 * determines the IP entry point for the packet once it leaves TCP to be sent to 57 * the destination address. The receive function is used by IP when the packet 58 * should be passed for TCP processing. When a new connection is created these 59 * are set to ip_output() and tcp_input() respectively. During the lifetime of 60 * the connection the send and receive functions may change depending on the 61 * changes in the connection state. For example, Once the connection is bound to 62 * an addresse, the receive function for this connection is set to 63 * tcp_conn_request(). This allows incoming SYNs to go directly into the 64 * listener SYN processing function without going to tcp_input() first. 65 * 66 * Classifier uses several hash tables: 67 * 68 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 69 * ipcl_bind_fanout: contains all connections in BOUND state 70 * ipcl_proto_fanout: IPv4 protocol fanout 71 * ipcl_proto_fanout_v6: IPv6 protocol fanout 72 * ipcl_udp_fanout: contains all UDP connections 73 * ipcl_iptun_fanout: contains all IP tunnel connections 74 * ipcl_globalhash_fanout: contains all connections 75 * 76 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 77 * which need to view all existing connections. 78 * 79 * All tables are protected by per-bucket locks. When both per-bucket lock and 80 * connection lock need to be held, the per-bucket lock should be acquired 81 * first, followed by the connection lock. 82 * 83 * All functions doing search in one of these tables increment a reference 84 * counter on the connection found (if any). This reference should be dropped 85 * when the caller has finished processing the connection. 86 * 87 * 88 * INTERFACES: 89 * =========== 90 * 91 * Connection Lookup: 92 * ------------------ 93 * 94 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 95 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 96 * 97 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 98 * it can't find any associated connection. If the connection is found, its 99 * reference counter is incremented. 100 * 101 * mp: mblock, containing packet header. The full header should fit 102 * into a single mblock. It should also contain at least full IP 103 * and TCP or UDP header. 104 * 105 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 106 * 107 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 108 * the packet. 109 * 110 * zoneid: The zone in which the returned connection must be; the zoneid 111 * corresponding to the ire_zoneid on the IRE located for the 112 * packet's destination address. 113 * 114 * For TCP connections, the lookup order is as follows: 115 * 5-tuple {src, dst, protocol, local port, remote port} 116 * lookup in ipcl_conn_fanout table. 117 * 3-tuple {dst, remote port, protocol} lookup in 118 * ipcl_bind_fanout table. 119 * 120 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 121 * remote port} lookup is done on ipcl_udp_fanout. Note that, 122 * these interfaces do not handle cases where a packets belongs 123 * to multiple UDP clients, which is handled in IP itself. 124 * 125 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 126 * determine which actual zone gets the segment. This is used only in a 127 * labeled environment. The matching rules are: 128 * 129 * - If it's not a multilevel port, then the label on the packet selects 130 * the zone. Unlabeled packets are delivered to the global zone. 131 * 132 * - If it's a multilevel port, then only the zone registered to receive 133 * packets on that port matches. 134 * 135 * Also, in a labeled environment, packet labels need to be checked. For fully 136 * bound TCP connections, we can assume that the packet label was checked 137 * during connection establishment, and doesn't need to be checked on each 138 * packet. For others, though, we need to check for strict equality or, for 139 * multilevel ports, membership in the range or set. This part currently does 140 * a tnrh lookup on each packet, but could be optimized to use cached results 141 * if that were necessary. (SCTP doesn't come through here, but if it did, 142 * we would apply the same rules as TCP.) 143 * 144 * An implication of the above is that fully-bound TCP sockets must always use 145 * distinct 4-tuples; they can't be discriminated by label alone. 146 * 147 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 148 * as there's no connection set-up handshake and no shared state. 149 * 150 * Labels on looped-back packets within a single zone do not need to be 151 * checked, as all processes in the same zone have the same label. 152 * 153 * Finally, for unlabeled packets received by a labeled system, special rules 154 * apply. We consider only the MLP if there is one. Otherwise, we prefer a 155 * socket in the zone whose label matches the default label of the sender, if 156 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 157 * receiver's label must dominate the sender's default label. 158 * 159 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 160 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 161 * ip_stack); 162 * 163 * Lookup routine to find a exact match for {src, dst, local port, 164 * remote port) for TCP connections in ipcl_conn_fanout. The address and 165 * ports are read from the IP and TCP header respectively. 166 * 167 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 168 * zoneid, ip_stack); 169 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 170 * zoneid, ip_stack); 171 * 172 * Lookup routine to find a listener with the tuple {lport, laddr, 173 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 174 * parameter interface index is also compared. 175 * 176 * void ipcl_walk(func, arg, ip_stack) 177 * 178 * Apply 'func' to every connection available. The 'func' is called as 179 * (*func)(connp, arg). The walk is non-atomic so connections may be 180 * created and destroyed during the walk. The CONN_CONDEMNED and 181 * CONN_INCIPIENT flags ensure that connections which are newly created 182 * or being destroyed are not selected by the walker. 183 * 184 * Table Updates 185 * ------------- 186 * 187 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 188 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 189 * 190 * Insert 'connp' in the ipcl_conn_fanout. 191 * Arguements : 192 * connp conn_t to be inserted 193 * protocol connection protocol 194 * src source address 195 * dst destination address 196 * ports local and remote port 197 * ifindex interface index for IPv6 connections 198 * 199 * Return value : 200 * 0 if connp was inserted 201 * EADDRINUSE if the connection with the same tuple 202 * already exists. 203 * 204 * int ipcl_bind_insert(connp, protocol, src, lport); 205 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 206 * 207 * Insert 'connp' in ipcl_bind_fanout. 208 * Arguements : 209 * connp conn_t to be inserted 210 * protocol connection protocol 211 * src source address connection wants 212 * to bind to 213 * lport local port connection wants to 214 * bind to 215 * 216 * 217 * void ipcl_hash_remove(connp); 218 * 219 * Removes the 'connp' from the connection fanout table. 220 * 221 * Connection Creation/Destruction 222 * ------------------------------- 223 * 224 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 225 * 226 * Creates a new conn based on the type flag, inserts it into 227 * globalhash table. 228 * 229 * type: This flag determines the type of conn_t which needs to be 230 * created i.e., which kmem_cache it comes from. 231 * IPCL_TCPCONN indicates a TCP connection 232 * IPCL_SCTPCONN indicates a SCTP connection 233 * IPCL_UDPCONN indicates a UDP conn_t. 234 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 235 * IPCL_RTSCONN indicates a RTS conn_t. 236 * IPCL_IPCCONN indicates all other connections. 237 * 238 * void ipcl_conn_destroy(connp) 239 * 240 * Destroys the connection state, removes it from the global 241 * connection hash table and frees its memory. 242 */ 243 244 #include <sys/types.h> 245 #include <sys/stream.h> 246 #include <sys/stropts.h> 247 #include <sys/sysmacros.h> 248 #include <sys/strsubr.h> 249 #include <sys/strsun.h> 250 #define _SUN_TPI_VERSION 2 251 #include <sys/ddi.h> 252 #include <sys/cmn_err.h> 253 #include <sys/debug.h> 254 255 #include <sys/systm.h> 256 #include <sys/param.h> 257 #include <sys/kmem.h> 258 #include <sys/isa_defs.h> 259 #include <inet/common.h> 260 #include <netinet/ip6.h> 261 #include <netinet/icmp6.h> 262 263 #include <inet/ip.h> 264 #include <inet/ip6.h> 265 #include <inet/ip_ndp.h> 266 #include <inet/ip_impl.h> 267 #include <inet/udp_impl.h> 268 #include <inet/sctp_ip.h> 269 #include <inet/sctp/sctp_impl.h> 270 #include <inet/rawip_impl.h> 271 #include <inet/rts_impl.h> 272 #include <inet/iptun/iptun_impl.h> 273 274 #include <sys/cpuvar.h> 275 276 #include <inet/ipclassifier.h> 277 #include <inet/tcp.h> 278 #include <inet/ipsec_impl.h> 279 280 #include <sys/tsol/tnet.h> 281 #include <sys/sockio.h> 282 283 #ifdef DEBUG 284 #define IPCL_DEBUG 285 #else 286 #undef IPCL_DEBUG 287 #endif 288 289 #ifdef IPCL_DEBUG 290 int ipcl_debug_level = 0; 291 #define IPCL_DEBUG_LVL(level, args) \ 292 if (ipcl_debug_level & level) { printf args; } 293 #else 294 #define IPCL_DEBUG_LVL(level, args) {; } 295 #endif 296 /* Old value for compatibility. Setable in /etc/system */ 297 uint_t tcp_conn_hash_size = 0; 298 299 /* New value. Zero means choose automatically. Setable in /etc/system */ 300 uint_t ipcl_conn_hash_size = 0; 301 uint_t ipcl_conn_hash_memfactor = 8192; 302 uint_t ipcl_conn_hash_maxsize = 82500; 303 304 /* bind/udp fanout table size */ 305 uint_t ipcl_bind_fanout_size = 512; 306 uint_t ipcl_udp_fanout_size = 16384; 307 308 /* Raw socket fanout size. Must be a power of 2. */ 309 uint_t ipcl_raw_fanout_size = 256; 310 311 /* 312 * The IPCL_IPTUN_HASH() function works best with a prime table size. We 313 * expect that most large deployments would have hundreds of tunnels, and 314 * thousands in the extreme case. 315 */ 316 uint_t ipcl_iptun_fanout_size = 6143; 317 318 /* 319 * Power of 2^N Primes useful for hashing for N of 0-28, 320 * these primes are the nearest prime <= 2^N - 2^(N-2). 321 */ 322 323 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 324 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 325 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 326 50331599, 100663291, 201326557, 0} 327 328 /* 329 * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 330 * are aligned on cache lines. 331 */ 332 typedef union itc_s { 333 conn_t itc_conn; 334 char itcu_filler[CACHE_ALIGN(conn_s)]; 335 } itc_t; 336 337 struct kmem_cache *tcp_conn_cache; 338 struct kmem_cache *ip_conn_cache; 339 struct kmem_cache *ip_helper_stream_cache; 340 extern struct kmem_cache *sctp_conn_cache; 341 extern struct kmem_cache *tcp_sack_info_cache; 342 extern struct kmem_cache *tcp_iphc_cache; 343 struct kmem_cache *udp_conn_cache; 344 struct kmem_cache *rawip_conn_cache; 345 struct kmem_cache *rts_conn_cache; 346 347 extern void tcp_timermp_free(tcp_t *); 348 extern mblk_t *tcp_timermp_alloc(int); 349 350 static int ip_conn_constructor(void *, void *, int); 351 static void ip_conn_destructor(void *, void *); 352 353 static int tcp_conn_constructor(void *, void *, int); 354 static void tcp_conn_destructor(void *, void *); 355 356 static int udp_conn_constructor(void *, void *, int); 357 static void udp_conn_destructor(void *, void *); 358 359 static int rawip_conn_constructor(void *, void *, int); 360 static void rawip_conn_destructor(void *, void *); 361 362 static int rts_conn_constructor(void *, void *, int); 363 static void rts_conn_destructor(void *, void *); 364 365 static int ip_helper_stream_constructor(void *, void *, int); 366 static void ip_helper_stream_destructor(void *, void *); 367 368 boolean_t ip_use_helper_cache = B_TRUE; 369 370 /* 371 * Hook functions to enable cluster networking 372 * On non-clustered systems these vectors must always be NULL. 373 */ 374 extern void (*cl_inet_listen)(netstackid_t, uint8_t, sa_family_t, 375 uint8_t *, in_port_t, void *); 376 extern void (*cl_inet_unlisten)(netstackid_t, uint8_t, sa_family_t, 377 uint8_t *, in_port_t, void *); 378 379 #ifdef IPCL_DEBUG 380 #define INET_NTOA_BUFSIZE 18 381 382 static char * 383 inet_ntoa_r(uint32_t in, char *b) 384 { 385 unsigned char *p; 386 387 p = (unsigned char *)∈ 388 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 389 return (b); 390 } 391 #endif 392 393 /* 394 * Global (for all stack instances) init routine 395 */ 396 void 397 ipcl_g_init(void) 398 { 399 ip_conn_cache = kmem_cache_create("ip_conn_cache", 400 sizeof (conn_t), CACHE_ALIGN_SIZE, 401 ip_conn_constructor, ip_conn_destructor, 402 NULL, NULL, NULL, 0); 403 404 tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 405 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 406 tcp_conn_constructor, tcp_conn_destructor, 407 NULL, NULL, NULL, 0); 408 409 udp_conn_cache = kmem_cache_create("udp_conn_cache", 410 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 411 udp_conn_constructor, udp_conn_destructor, 412 NULL, NULL, NULL, 0); 413 414 rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 415 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 416 rawip_conn_constructor, rawip_conn_destructor, 417 NULL, NULL, NULL, 0); 418 419 rts_conn_cache = kmem_cache_create("rts_conn_cache", 420 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 421 rts_conn_constructor, rts_conn_destructor, 422 NULL, NULL, NULL, 0); 423 424 if (ip_use_helper_cache) { 425 ip_helper_stream_cache = kmem_cache_create 426 ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t), 427 CACHE_ALIGN_SIZE, ip_helper_stream_constructor, 428 ip_helper_stream_destructor, NULL, NULL, NULL, 0); 429 } else { 430 ip_helper_stream_cache = NULL; 431 } 432 } 433 434 /* 435 * ipclassifier intialization routine, sets up hash tables. 436 */ 437 void 438 ipcl_init(ip_stack_t *ipst) 439 { 440 int i; 441 int sizes[] = P2Ps(); 442 443 /* 444 * Calculate size of conn fanout table from /etc/system settings 445 */ 446 if (ipcl_conn_hash_size != 0) { 447 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 448 } else if (tcp_conn_hash_size != 0) { 449 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 450 } else { 451 extern pgcnt_t freemem; 452 453 ipst->ips_ipcl_conn_fanout_size = 454 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 455 456 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 457 ipst->ips_ipcl_conn_fanout_size = 458 ipcl_conn_hash_maxsize; 459 } 460 } 461 462 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 463 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 464 break; 465 } 466 } 467 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 468 /* Out of range, use the 2^16 value */ 469 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 470 } 471 472 /* Take values from /etc/system */ 473 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 474 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 475 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 476 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 477 478 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 479 480 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 481 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 482 483 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 484 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 485 MUTEX_DEFAULT, NULL); 486 } 487 488 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 489 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 490 491 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 492 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 493 MUTEX_DEFAULT, NULL); 494 } 495 496 ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 497 sizeof (connf_t), KM_SLEEP); 498 for (i = 0; i < IPPROTO_MAX; i++) { 499 mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 500 MUTEX_DEFAULT, NULL); 501 } 502 503 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 504 sizeof (connf_t), KM_SLEEP); 505 for (i = 0; i < IPPROTO_MAX; i++) { 506 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 507 MUTEX_DEFAULT, NULL); 508 } 509 510 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 511 mutex_init(&ipst->ips_rts_clients->connf_lock, 512 NULL, MUTEX_DEFAULT, NULL); 513 514 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 515 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 516 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 517 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 518 MUTEX_DEFAULT, NULL); 519 } 520 521 ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 522 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 523 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 524 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 525 MUTEX_DEFAULT, NULL); 526 } 527 528 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 529 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 530 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 531 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 532 MUTEX_DEFAULT, NULL); 533 } 534 535 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 536 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 537 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 538 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 539 NULL, MUTEX_DEFAULT, NULL); 540 } 541 } 542 543 void 544 ipcl_g_destroy(void) 545 { 546 kmem_cache_destroy(ip_conn_cache); 547 kmem_cache_destroy(tcp_conn_cache); 548 kmem_cache_destroy(udp_conn_cache); 549 kmem_cache_destroy(rawip_conn_cache); 550 kmem_cache_destroy(rts_conn_cache); 551 } 552 553 /* 554 * All user-level and kernel use of the stack must be gone 555 * by now. 556 */ 557 void 558 ipcl_destroy(ip_stack_t *ipst) 559 { 560 int i; 561 562 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 563 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 564 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 565 } 566 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 567 sizeof (connf_t)); 568 ipst->ips_ipcl_conn_fanout = NULL; 569 570 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 571 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 572 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 573 } 574 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 575 sizeof (connf_t)); 576 ipst->ips_ipcl_bind_fanout = NULL; 577 578 for (i = 0; i < IPPROTO_MAX; i++) { 579 ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 580 mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 581 } 582 kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 583 ipst->ips_ipcl_proto_fanout = NULL; 584 585 for (i = 0; i < IPPROTO_MAX; i++) { 586 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 587 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 588 } 589 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 590 IPPROTO_MAX * sizeof (connf_t)); 591 ipst->ips_ipcl_proto_fanout_v6 = NULL; 592 593 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 594 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 595 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 596 } 597 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 598 sizeof (connf_t)); 599 ipst->ips_ipcl_udp_fanout = NULL; 600 601 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 602 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 603 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 604 } 605 kmem_free(ipst->ips_ipcl_iptun_fanout, 606 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 607 ipst->ips_ipcl_iptun_fanout = NULL; 608 609 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 610 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 611 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 612 } 613 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 614 sizeof (connf_t)); 615 ipst->ips_ipcl_raw_fanout = NULL; 616 617 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 618 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 619 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 620 } 621 kmem_free(ipst->ips_ipcl_globalhash_fanout, 622 sizeof (connf_t) * CONN_G_HASH_SIZE); 623 ipst->ips_ipcl_globalhash_fanout = NULL; 624 625 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 626 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 627 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 628 ipst->ips_rts_clients = NULL; 629 } 630 631 /* 632 * conn creation routine. initialize the conn, sets the reference 633 * and inserts it in the global hash table. 634 */ 635 conn_t * 636 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 637 { 638 conn_t *connp; 639 sctp_stack_t *sctps; 640 struct kmem_cache *conn_cache; 641 642 switch (type) { 643 case IPCL_SCTPCONN: 644 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 645 return (NULL); 646 sctp_conn_init(connp); 647 sctps = ns->netstack_sctp; 648 SCTP_G_Q_REFHOLD(sctps); 649 netstack_hold(ns); 650 connp->conn_netstack = ns; 651 return (connp); 652 653 case IPCL_TCPCONN: 654 conn_cache = tcp_conn_cache; 655 break; 656 657 case IPCL_UDPCONN: 658 conn_cache = udp_conn_cache; 659 break; 660 661 case IPCL_RAWIPCONN: 662 conn_cache = rawip_conn_cache; 663 break; 664 665 case IPCL_RTSCONN: 666 conn_cache = rts_conn_cache; 667 break; 668 669 case IPCL_IPCCONN: 670 conn_cache = ip_conn_cache; 671 break; 672 673 default: 674 connp = NULL; 675 ASSERT(0); 676 } 677 678 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 679 return (NULL); 680 681 connp->conn_ref = 1; 682 netstack_hold(ns); 683 connp->conn_netstack = ns; 684 ipcl_globalhash_insert(connp); 685 return (connp); 686 } 687 688 void 689 ipcl_conn_destroy(conn_t *connp) 690 { 691 mblk_t *mp; 692 netstack_t *ns = connp->conn_netstack; 693 694 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 695 ASSERT(connp->conn_ref == 0); 696 ASSERT(connp->conn_ire_cache == NULL); 697 698 DTRACE_PROBE1(conn__destroy, conn_t *, connp); 699 700 if (connp->conn_effective_cred != NULL) { 701 crfree(connp->conn_effective_cred); 702 connp->conn_effective_cred = NULL; 703 } 704 705 if (connp->conn_cred != NULL) { 706 crfree(connp->conn_cred); 707 connp->conn_cred = NULL; 708 } 709 710 ipcl_globalhash_remove(connp); 711 712 /* FIXME: add separate tcp_conn_free()? */ 713 if (connp->conn_flags & IPCL_TCPCONN) { 714 tcp_t *tcp = connp->conn_tcp; 715 tcp_stack_t *tcps; 716 717 ASSERT(tcp != NULL); 718 tcps = tcp->tcp_tcps; 719 if (tcps != NULL) { 720 if (connp->conn_latch != NULL) { 721 IPLATCH_REFRELE(connp->conn_latch, ns); 722 connp->conn_latch = NULL; 723 } 724 if (connp->conn_policy != NULL) { 725 IPPH_REFRELE(connp->conn_policy, ns); 726 connp->conn_policy = NULL; 727 } 728 tcp->tcp_tcps = NULL; 729 TCPS_REFRELE(tcps); 730 } 731 732 tcp_free(tcp); 733 mp = tcp->tcp_timercache; 734 tcp->tcp_cred = NULL; 735 736 if (tcp->tcp_sack_info != NULL) { 737 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 738 kmem_cache_free(tcp_sack_info_cache, 739 tcp->tcp_sack_info); 740 } 741 if (tcp->tcp_iphc != NULL) { 742 if (tcp->tcp_hdr_grown) { 743 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 744 } else { 745 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 746 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 747 } 748 tcp->tcp_iphc_len = 0; 749 } 750 ASSERT(tcp->tcp_iphc_len == 0); 751 752 /* 753 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 754 * the mblk. 755 */ 756 if (tcp->tcp_rsrv_mp != NULL) { 757 freeb(tcp->tcp_rsrv_mp); 758 tcp->tcp_rsrv_mp = NULL; 759 mutex_destroy(&tcp->tcp_rsrv_mp_lock); 760 } 761 762 ASSERT(connp->conn_latch == NULL); 763 ASSERT(connp->conn_policy == NULL); 764 765 if (ns != NULL) { 766 ASSERT(tcp->tcp_tcps == NULL); 767 connp->conn_netstack = NULL; 768 netstack_rele(ns); 769 } 770 771 ipcl_conn_cleanup(connp); 772 connp->conn_flags = IPCL_TCPCONN; 773 bzero(tcp, sizeof (tcp_t)); 774 775 tcp->tcp_timercache = mp; 776 tcp->tcp_connp = connp; 777 kmem_cache_free(tcp_conn_cache, connp); 778 return; 779 } 780 if (connp->conn_latch != NULL) { 781 IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 782 connp->conn_latch = NULL; 783 } 784 if (connp->conn_policy != NULL) { 785 IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 786 connp->conn_policy = NULL; 787 } 788 if (connp->conn_ipsec_opt_mp != NULL) { 789 freemsg(connp->conn_ipsec_opt_mp); 790 connp->conn_ipsec_opt_mp = NULL; 791 } 792 793 if (connp->conn_flags & IPCL_SCTPCONN) { 794 ASSERT(ns != NULL); 795 sctp_free(connp); 796 return; 797 } 798 799 if (ns != NULL) { 800 connp->conn_netstack = NULL; 801 netstack_rele(ns); 802 } 803 804 ipcl_conn_cleanup(connp); 805 806 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 807 if (connp->conn_flags & IPCL_UDPCONN) { 808 connp->conn_flags = IPCL_UDPCONN; 809 kmem_cache_free(udp_conn_cache, connp); 810 } else if (connp->conn_flags & IPCL_RAWIPCONN) { 811 812 connp->conn_flags = IPCL_RAWIPCONN; 813 connp->conn_ulp = IPPROTO_ICMP; 814 kmem_cache_free(rawip_conn_cache, connp); 815 } else if (connp->conn_flags & IPCL_RTSCONN) { 816 connp->conn_flags = IPCL_RTSCONN; 817 kmem_cache_free(rts_conn_cache, connp); 818 } else { 819 connp->conn_flags = IPCL_IPCCONN; 820 ASSERT(connp->conn_flags & IPCL_IPCCONN); 821 ASSERT(connp->conn_priv == NULL); 822 kmem_cache_free(ip_conn_cache, connp); 823 } 824 } 825 826 /* 827 * Running in cluster mode - deregister listener information 828 */ 829 830 static void 831 ipcl_conn_unlisten(conn_t *connp) 832 { 833 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 834 ASSERT(connp->conn_lport != 0); 835 836 if (cl_inet_unlisten != NULL) { 837 sa_family_t addr_family; 838 uint8_t *laddrp; 839 840 if (connp->conn_pkt_isv6) { 841 addr_family = AF_INET6; 842 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 843 } else { 844 addr_family = AF_INET; 845 laddrp = (uint8_t *)&connp->conn_bound_source; 846 } 847 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 848 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 849 } 850 connp->conn_flags &= ~IPCL_CL_LISTENER; 851 } 852 853 /* 854 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 855 * which table the conn belonged to). So for debugging we can see which hash 856 * table this connection was in. 857 */ 858 #define IPCL_HASH_REMOVE(connp) { \ 859 connf_t *connfp = (connp)->conn_fanout; \ 860 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 861 if (connfp != NULL) { \ 862 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 863 (void *)(connp))); \ 864 mutex_enter(&connfp->connf_lock); \ 865 if ((connp)->conn_next != NULL) \ 866 (connp)->conn_next->conn_prev = \ 867 (connp)->conn_prev; \ 868 if ((connp)->conn_prev != NULL) \ 869 (connp)->conn_prev->conn_next = \ 870 (connp)->conn_next; \ 871 else \ 872 connfp->connf_head = (connp)->conn_next; \ 873 (connp)->conn_fanout = NULL; \ 874 (connp)->conn_next = NULL; \ 875 (connp)->conn_prev = NULL; \ 876 (connp)->conn_flags |= IPCL_REMOVED; \ 877 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 878 ipcl_conn_unlisten((connp)); \ 879 CONN_DEC_REF((connp)); \ 880 mutex_exit(&connfp->connf_lock); \ 881 } \ 882 } 883 884 void 885 ipcl_hash_remove(conn_t *connp) 886 { 887 IPCL_HASH_REMOVE(connp); 888 } 889 890 /* 891 * The whole purpose of this function is allow removal of 892 * a conn_t from the connected hash for timewait reclaim. 893 * This is essentially a TW reclaim fastpath where timewait 894 * collector checks under fanout lock (so no one else can 895 * get access to the conn_t) that refcnt is 2 i.e. one for 896 * TCP and one for the classifier hash list. If ref count 897 * is indeed 2, we can just remove the conn under lock and 898 * avoid cleaning up the conn under squeue. This gives us 899 * improved performance. 900 */ 901 void 902 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 903 { 904 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 905 ASSERT(MUTEX_HELD(&connp->conn_lock)); 906 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 907 908 if ((connp)->conn_next != NULL) { 909 (connp)->conn_next->conn_prev = (connp)->conn_prev; 910 } 911 if ((connp)->conn_prev != NULL) { 912 (connp)->conn_prev->conn_next = (connp)->conn_next; 913 } else { 914 connfp->connf_head = (connp)->conn_next; 915 } 916 (connp)->conn_fanout = NULL; 917 (connp)->conn_next = NULL; 918 (connp)->conn_prev = NULL; 919 (connp)->conn_flags |= IPCL_REMOVED; 920 ASSERT((connp)->conn_ref == 2); 921 (connp)->conn_ref--; 922 } 923 924 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 925 ASSERT((connp)->conn_fanout == NULL); \ 926 ASSERT((connp)->conn_next == NULL); \ 927 ASSERT((connp)->conn_prev == NULL); \ 928 if ((connfp)->connf_head != NULL) { \ 929 (connfp)->connf_head->conn_prev = (connp); \ 930 (connp)->conn_next = (connfp)->connf_head; \ 931 } \ 932 (connp)->conn_fanout = (connfp); \ 933 (connfp)->connf_head = (connp); \ 934 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 935 IPCL_CONNECTED; \ 936 CONN_INC_REF(connp); \ 937 } 938 939 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 940 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 941 "connp %p", (void *)(connfp), (void *)(connp))); \ 942 IPCL_HASH_REMOVE((connp)); \ 943 mutex_enter(&(connfp)->connf_lock); \ 944 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 945 mutex_exit(&(connfp)->connf_lock); \ 946 } 947 948 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 949 conn_t *pconnp = NULL, *nconnp; \ 950 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 951 "connp %p", (void *)connfp, (void *)(connp))); \ 952 IPCL_HASH_REMOVE((connp)); \ 953 mutex_enter(&(connfp)->connf_lock); \ 954 nconnp = (connfp)->connf_head; \ 955 while (nconnp != NULL && \ 956 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 957 pconnp = nconnp; \ 958 nconnp = nconnp->conn_next; \ 959 } \ 960 if (pconnp != NULL) { \ 961 pconnp->conn_next = (connp); \ 962 (connp)->conn_prev = pconnp; \ 963 } else { \ 964 (connfp)->connf_head = (connp); \ 965 } \ 966 if (nconnp != NULL) { \ 967 (connp)->conn_next = nconnp; \ 968 nconnp->conn_prev = (connp); \ 969 } \ 970 (connp)->conn_fanout = (connfp); \ 971 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 972 IPCL_BOUND; \ 973 CONN_INC_REF(connp); \ 974 mutex_exit(&(connfp)->connf_lock); \ 975 } 976 977 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 978 conn_t **list, *prev, *next; \ 979 boolean_t isv4mapped = \ 980 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 981 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 982 "connp %p", (void *)(connfp), (void *)(connp))); \ 983 IPCL_HASH_REMOVE((connp)); \ 984 mutex_enter(&(connfp)->connf_lock); \ 985 list = &(connfp)->connf_head; \ 986 prev = NULL; \ 987 while ((next = *list) != NULL) { \ 988 if (isv4mapped && \ 989 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 990 connp->conn_zoneid == next->conn_zoneid) { \ 991 (connp)->conn_next = next; \ 992 if (prev != NULL) \ 993 prev = next->conn_prev; \ 994 next->conn_prev = (connp); \ 995 break; \ 996 } \ 997 list = &next->conn_next; \ 998 prev = next; \ 999 } \ 1000 (connp)->conn_prev = prev; \ 1001 *list = (connp); \ 1002 (connp)->conn_fanout = (connfp); \ 1003 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 1004 IPCL_BOUND; \ 1005 CONN_INC_REF((connp)); \ 1006 mutex_exit(&(connfp)->connf_lock); \ 1007 } 1008 1009 void 1010 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 1011 { 1012 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1013 } 1014 1015 void 1016 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 1017 { 1018 connf_t *connfp; 1019 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1020 1021 ASSERT(connp != NULL); 1022 ASSERT((connp->conn_mac_mode == CONN_MAC_DEFAULT) || 1023 protocol == IPPROTO_AH || protocol == IPPROTO_ESP); 1024 1025 connp->conn_ulp = protocol; 1026 1027 /* Insert it in the protocol hash */ 1028 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1029 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1030 } 1031 1032 void 1033 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 1034 { 1035 connf_t *connfp; 1036 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1037 1038 ASSERT(connp != NULL); 1039 ASSERT((connp->conn_mac_mode == CONN_MAC_DEFAULT) || 1040 protocol == IPPROTO_AH || protocol == IPPROTO_ESP); 1041 1042 connp->conn_ulp = protocol; 1043 1044 /* Insert it in the Bind Hash */ 1045 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1046 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1047 } 1048 1049 /* 1050 * Because the classifier is used to classify inbound packets, the destination 1051 * address is meant to be our local tunnel address (tunnel source), and the 1052 * source the remote tunnel address (tunnel destination). 1053 */ 1054 conn_t * 1055 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 1056 { 1057 connf_t *connfp; 1058 conn_t *connp; 1059 1060 /* first look for IPv4 tunnel links */ 1061 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 1062 mutex_enter(&connfp->connf_lock); 1063 for (connp = connfp->connf_head; connp != NULL; 1064 connp = connp->conn_next) { 1065 if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 1066 break; 1067 } 1068 if (connp != NULL) 1069 goto done; 1070 1071 mutex_exit(&connfp->connf_lock); 1072 1073 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 1074 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 1075 INADDR_ANY)]; 1076 mutex_enter(&connfp->connf_lock); 1077 for (connp = connfp->connf_head; connp != NULL; 1078 connp = connp->conn_next) { 1079 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 1080 break; 1081 } 1082 done: 1083 if (connp != NULL) 1084 CONN_INC_REF(connp); 1085 mutex_exit(&connfp->connf_lock); 1086 return (connp); 1087 } 1088 1089 conn_t * 1090 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 1091 { 1092 connf_t *connfp; 1093 conn_t *connp; 1094 1095 /* Look for an IPv6 tunnel link */ 1096 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 1097 mutex_enter(&connfp->connf_lock); 1098 for (connp = connfp->connf_head; connp != NULL; 1099 connp = connp->conn_next) { 1100 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 1101 CONN_INC_REF(connp); 1102 break; 1103 } 1104 } 1105 mutex_exit(&connfp->connf_lock); 1106 return (connp); 1107 } 1108 1109 /* 1110 * This function is used only for inserting SCTP raw socket now. 1111 * This may change later. 1112 * 1113 * Note that only one raw socket can be bound to a port. The param 1114 * lport is in network byte order. 1115 */ 1116 static int 1117 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 1118 { 1119 connf_t *connfp; 1120 conn_t *oconnp; 1121 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1122 1123 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1124 1125 /* Check for existing raw socket already bound to the port. */ 1126 mutex_enter(&connfp->connf_lock); 1127 for (oconnp = connfp->connf_head; oconnp != NULL; 1128 oconnp = oconnp->conn_next) { 1129 if (oconnp->conn_lport == lport && 1130 oconnp->conn_zoneid == connp->conn_zoneid && 1131 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 1132 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 1133 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 1134 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 1135 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 1136 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 1137 &connp->conn_srcv6))) { 1138 break; 1139 } 1140 } 1141 mutex_exit(&connfp->connf_lock); 1142 if (oconnp != NULL) 1143 return (EADDRNOTAVAIL); 1144 1145 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 1146 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 1147 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 1148 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 1149 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1150 } else { 1151 IPCL_HASH_INSERT_BOUND(connfp, connp); 1152 } 1153 } else { 1154 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1155 } 1156 return (0); 1157 } 1158 1159 static int 1160 ipcl_iptun_hash_insert(conn_t *connp, ipaddr_t src, ipaddr_t dst, 1161 ip_stack_t *ipst) 1162 { 1163 connf_t *connfp; 1164 conn_t *tconnp; 1165 1166 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(src, dst)]; 1167 mutex_enter(&connfp->connf_lock); 1168 for (tconnp = connfp->connf_head; tconnp != NULL; 1169 tconnp = tconnp->conn_next) { 1170 if (IPCL_IPTUN_MATCH(tconnp, src, dst)) { 1171 /* A tunnel is already bound to these addresses. */ 1172 mutex_exit(&connfp->connf_lock); 1173 return (EADDRINUSE); 1174 } 1175 } 1176 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1177 mutex_exit(&connfp->connf_lock); 1178 return (0); 1179 } 1180 1181 static int 1182 ipcl_iptun_hash_insert_v6(conn_t *connp, const in6_addr_t *src, 1183 const in6_addr_t *dst, ip_stack_t *ipst) 1184 { 1185 connf_t *connfp; 1186 conn_t *tconnp; 1187 1188 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(src, dst)]; 1189 mutex_enter(&connfp->connf_lock); 1190 for (tconnp = connfp->connf_head; tconnp != NULL; 1191 tconnp = tconnp->conn_next) { 1192 if (IPCL_IPTUN_MATCH_V6(tconnp, src, dst)) { 1193 /* A tunnel is already bound to these addresses. */ 1194 mutex_exit(&connfp->connf_lock); 1195 return (EADDRINUSE); 1196 } 1197 } 1198 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1199 mutex_exit(&connfp->connf_lock); 1200 return (0); 1201 } 1202 1203 /* 1204 * Check for a MAC exemption conflict on a labeled system. Note that for 1205 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 1206 * transport layer. This check is for binding all other protocols. 1207 * 1208 * Returns true if there's a conflict. 1209 */ 1210 static boolean_t 1211 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 1212 { 1213 connf_t *connfp; 1214 conn_t *tconn; 1215 1216 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 1217 mutex_enter(&connfp->connf_lock); 1218 for (tconn = connfp->connf_head; tconn != NULL; 1219 tconn = tconn->conn_next) { 1220 /* We don't allow v4 fallback for v6 raw socket */ 1221 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 1222 continue; 1223 /* If neither is exempt, then there's no conflict */ 1224 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1225 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1226 continue; 1227 /* We are only concerned about sockets for a different zone */ 1228 if (connp->conn_zoneid == tconn->conn_zoneid) 1229 continue; 1230 /* If both are bound to different specific addrs, ok */ 1231 if (connp->conn_src != INADDR_ANY && 1232 tconn->conn_src != INADDR_ANY && 1233 connp->conn_src != tconn->conn_src) 1234 continue; 1235 /* These two conflict; fail */ 1236 break; 1237 } 1238 mutex_exit(&connfp->connf_lock); 1239 return (tconn != NULL); 1240 } 1241 1242 static boolean_t 1243 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1244 { 1245 connf_t *connfp; 1246 conn_t *tconn; 1247 1248 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 1249 mutex_enter(&connfp->connf_lock); 1250 for (tconn = connfp->connf_head; tconn != NULL; 1251 tconn = tconn->conn_next) { 1252 /* We don't allow v4 fallback for v6 raw socket */ 1253 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 1254 continue; 1255 /* If neither is exempt, then there's no conflict */ 1256 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1257 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1258 continue; 1259 /* We are only concerned about sockets for a different zone */ 1260 if (connp->conn_zoneid == tconn->conn_zoneid) 1261 continue; 1262 /* If both are bound to different addrs, ok */ 1263 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 1264 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 1265 !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 1266 continue; 1267 /* These two conflict; fail */ 1268 break; 1269 } 1270 mutex_exit(&connfp->connf_lock); 1271 return (tconn != NULL); 1272 } 1273 1274 /* 1275 * (v4, v6) bind hash insertion routines 1276 */ 1277 int 1278 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 1279 { 1280 connf_t *connfp; 1281 #ifdef IPCL_DEBUG 1282 char buf[INET_NTOA_BUFSIZE]; 1283 #endif 1284 int ret = 0; 1285 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1286 1287 ASSERT(connp); 1288 1289 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 1290 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 1291 1292 connp->conn_ulp = protocol; 1293 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 1294 connp->conn_lport = lport; 1295 1296 if (IPCL_IS_IPTUN(connp)) 1297 return (ipcl_iptun_hash_insert(connp, src, INADDR_ANY, ipst)); 1298 1299 switch (protocol) { 1300 default: 1301 if (is_system_labeled() && 1302 check_exempt_conflict_v4(connp, ipst)) 1303 return (EADDRINUSE); 1304 /* FALLTHROUGH */ 1305 case IPPROTO_UDP: 1306 if (protocol == IPPROTO_UDP) { 1307 IPCL_DEBUG_LVL(64, 1308 ("ipcl_bind_insert: connp %p - udp\n", 1309 (void *)connp)); 1310 connfp = &ipst->ips_ipcl_udp_fanout[ 1311 IPCL_UDP_HASH(lport, ipst)]; 1312 } else { 1313 IPCL_DEBUG_LVL(64, 1314 ("ipcl_bind_insert: connp %p - protocol\n", 1315 (void *)connp)); 1316 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1317 } 1318 1319 if (connp->conn_rem != INADDR_ANY) { 1320 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1321 } else if (connp->conn_src != INADDR_ANY) { 1322 IPCL_HASH_INSERT_BOUND(connfp, connp); 1323 } else { 1324 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1325 } 1326 break; 1327 1328 case IPPROTO_TCP: 1329 1330 /* Insert it in the Bind Hash */ 1331 ASSERT(connp->conn_zoneid != ALL_ZONES); 1332 connfp = &ipst->ips_ipcl_bind_fanout[ 1333 IPCL_BIND_HASH(lport, ipst)]; 1334 if (connp->conn_src != INADDR_ANY) { 1335 IPCL_HASH_INSERT_BOUND(connfp, connp); 1336 } else { 1337 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1338 } 1339 if (cl_inet_listen != NULL) { 1340 ASSERT(!connp->conn_pkt_isv6); 1341 connp->conn_flags |= IPCL_CL_LISTENER; 1342 (*cl_inet_listen)( 1343 connp->conn_netstack->netstack_stackid, 1344 IPPROTO_TCP, AF_INET, 1345 (uint8_t *)&connp->conn_bound_source, lport, NULL); 1346 } 1347 break; 1348 1349 case IPPROTO_SCTP: 1350 ret = ipcl_sctp_hash_insert(connp, lport); 1351 break; 1352 } 1353 1354 return (ret); 1355 } 1356 1357 int 1358 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1359 uint16_t lport) 1360 { 1361 connf_t *connfp; 1362 int ret = 0; 1363 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1364 1365 ASSERT(connp != NULL); connp->conn_ulp = protocol; 1366 connp->conn_srcv6 = *src; 1367 connp->conn_lport = lport; 1368 1369 if (IPCL_IS_IPTUN(connp)) { 1370 return (ipcl_iptun_hash_insert_v6(connp, src, &ipv6_all_zeros, 1371 ipst)); 1372 } 1373 1374 switch (protocol) { 1375 default: 1376 if (is_system_labeled() && 1377 check_exempt_conflict_v6(connp, ipst)) 1378 return (EADDRINUSE); 1379 /* FALLTHROUGH */ 1380 case IPPROTO_UDP: 1381 if (protocol == IPPROTO_UDP) { 1382 IPCL_DEBUG_LVL(128, 1383 ("ipcl_bind_insert_v6: connp %p - udp\n", 1384 (void *)connp)); 1385 connfp = &ipst->ips_ipcl_udp_fanout[ 1386 IPCL_UDP_HASH(lport, ipst)]; 1387 } else { 1388 IPCL_DEBUG_LVL(128, 1389 ("ipcl_bind_insert_v6: connp %p - protocol\n", 1390 (void *)connp)); 1391 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1392 } 1393 1394 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1395 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1396 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1397 IPCL_HASH_INSERT_BOUND(connfp, connp); 1398 } else { 1399 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1400 } 1401 break; 1402 1403 case IPPROTO_TCP: 1404 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 1405 1406 /* Insert it in the Bind Hash */ 1407 ASSERT(connp->conn_zoneid != ALL_ZONES); 1408 connfp = &ipst->ips_ipcl_bind_fanout[ 1409 IPCL_BIND_HASH(lport, ipst)]; 1410 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1411 IPCL_HASH_INSERT_BOUND(connfp, connp); 1412 } else { 1413 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1414 } 1415 if (cl_inet_listen != NULL) { 1416 sa_family_t addr_family; 1417 uint8_t *laddrp; 1418 1419 if (connp->conn_pkt_isv6) { 1420 addr_family = AF_INET6; 1421 laddrp = 1422 (uint8_t *)&connp->conn_bound_source_v6; 1423 } else { 1424 addr_family = AF_INET; 1425 laddrp = (uint8_t *)&connp->conn_bound_source; 1426 } 1427 connp->conn_flags |= IPCL_CL_LISTENER; 1428 (*cl_inet_listen)( 1429 connp->conn_netstack->netstack_stackid, 1430 IPPROTO_TCP, addr_family, laddrp, lport, NULL); 1431 } 1432 break; 1433 1434 case IPPROTO_SCTP: 1435 ret = ipcl_sctp_hash_insert(connp, lport); 1436 break; 1437 } 1438 1439 return (ret); 1440 } 1441 1442 /* 1443 * ipcl_conn_hash insertion routines. 1444 */ 1445 int 1446 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 1447 ipaddr_t rem, uint32_t ports) 1448 { 1449 connf_t *connfp; 1450 uint16_t *up; 1451 conn_t *tconnp; 1452 #ifdef IPCL_DEBUG 1453 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1454 #endif 1455 in_port_t lport; 1456 int ret = 0; 1457 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1458 1459 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1460 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1461 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1462 ports, protocol)); 1463 1464 if (IPCL_IS_IPTUN(connp)) 1465 return (ipcl_iptun_hash_insert(connp, src, rem, ipst)); 1466 1467 switch (protocol) { 1468 case IPPROTO_TCP: 1469 if (!(connp->conn_flags & IPCL_EAGER)) { 1470 /* 1471 * for a eager connection, i.e connections which 1472 * have just been created, the initialization is 1473 * already done in ip at conn_creation time, so 1474 * we can skip the checks here. 1475 */ 1476 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1477 } 1478 1479 /* 1480 * For tcp, we check whether the connection tuple already 1481 * exists before allowing the connection to proceed. We 1482 * also allow indexing on the zoneid. This is to allow 1483 * multiple shared stack zones to have the same tcp 1484 * connection tuple. In practice this only happens for 1485 * INADDR_LOOPBACK as it's the only local address which 1486 * doesn't have to be unique. 1487 */ 1488 connfp = &ipst->ips_ipcl_conn_fanout[ 1489 IPCL_CONN_HASH(connp->conn_rem, 1490 connp->conn_ports, ipst)]; 1491 mutex_enter(&connfp->connf_lock); 1492 for (tconnp = connfp->connf_head; tconnp != NULL; 1493 tconnp = tconnp->conn_next) { 1494 if ((IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1495 connp->conn_rem, connp->conn_src, 1496 connp->conn_ports)) && 1497 (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 1498 1499 /* Already have a conn. bail out */ 1500 mutex_exit(&connfp->connf_lock); 1501 return (EADDRINUSE); 1502 } 1503 } 1504 if (connp->conn_fanout != NULL) { 1505 /* 1506 * Probably a XTI/TLI application trying to do a 1507 * rebind. Let it happen. 1508 */ 1509 mutex_exit(&connfp->connf_lock); 1510 IPCL_HASH_REMOVE(connp); 1511 mutex_enter(&connfp->connf_lock); 1512 } 1513 1514 ASSERT(connp->conn_recv != NULL); 1515 1516 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1517 mutex_exit(&connfp->connf_lock); 1518 break; 1519 1520 case IPPROTO_SCTP: 1521 /* 1522 * The raw socket may have already been bound, remove it 1523 * from the hash first. 1524 */ 1525 IPCL_HASH_REMOVE(connp); 1526 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1527 ret = ipcl_sctp_hash_insert(connp, lport); 1528 break; 1529 1530 default: 1531 /* 1532 * Check for conflicts among MAC exempt bindings. For 1533 * transports with port numbers, this is done by the upper 1534 * level per-transport binding logic. For all others, it's 1535 * done here. 1536 */ 1537 if (is_system_labeled() && 1538 check_exempt_conflict_v4(connp, ipst)) 1539 return (EADDRINUSE); 1540 /* FALLTHROUGH */ 1541 1542 case IPPROTO_UDP: 1543 up = (uint16_t *)&ports; 1544 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1545 if (protocol == IPPROTO_UDP) { 1546 connfp = &ipst->ips_ipcl_udp_fanout[ 1547 IPCL_UDP_HASH(up[1], ipst)]; 1548 } else { 1549 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1550 } 1551 1552 if (connp->conn_rem != INADDR_ANY) { 1553 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1554 } else if (connp->conn_src != INADDR_ANY) { 1555 IPCL_HASH_INSERT_BOUND(connfp, connp); 1556 } else { 1557 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1558 } 1559 break; 1560 } 1561 1562 return (ret); 1563 } 1564 1565 int 1566 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1567 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1568 { 1569 connf_t *connfp; 1570 uint16_t *up; 1571 conn_t *tconnp; 1572 in_port_t lport; 1573 int ret = 0; 1574 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1575 1576 if (IPCL_IS_IPTUN(connp)) 1577 return (ipcl_iptun_hash_insert_v6(connp, src, rem, ipst)); 1578 1579 switch (protocol) { 1580 case IPPROTO_TCP: 1581 /* Just need to insert a conn struct */ 1582 if (!(connp->conn_flags & IPCL_EAGER)) { 1583 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1584 } 1585 1586 /* 1587 * For tcp, we check whether the connection tuple already 1588 * exists before allowing the connection to proceed. We 1589 * also allow indexing on the zoneid. This is to allow 1590 * multiple shared stack zones to have the same tcp 1591 * connection tuple. In practice this only happens for 1592 * ipv6_loopback as it's the only local address which 1593 * doesn't have to be unique. 1594 */ 1595 connfp = &ipst->ips_ipcl_conn_fanout[ 1596 IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1597 ipst)]; 1598 mutex_enter(&connfp->connf_lock); 1599 for (tconnp = connfp->connf_head; tconnp != NULL; 1600 tconnp = tconnp->conn_next) { 1601 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1602 connp->conn_remv6, connp->conn_srcv6, 1603 connp->conn_ports) && 1604 (tconnp->conn_tcp->tcp_bound_if == 0 || 1605 tconnp->conn_tcp->tcp_bound_if == ifindex) && 1606 (IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid))) { 1607 /* Already have a conn. bail out */ 1608 mutex_exit(&connfp->connf_lock); 1609 return (EADDRINUSE); 1610 } 1611 } 1612 if (connp->conn_fanout != NULL) { 1613 /* 1614 * Probably a XTI/TLI application trying to do a 1615 * rebind. Let it happen. 1616 */ 1617 mutex_exit(&connfp->connf_lock); 1618 IPCL_HASH_REMOVE(connp); 1619 mutex_enter(&connfp->connf_lock); 1620 } 1621 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1622 mutex_exit(&connfp->connf_lock); 1623 break; 1624 1625 case IPPROTO_SCTP: 1626 IPCL_HASH_REMOVE(connp); 1627 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1628 ret = ipcl_sctp_hash_insert(connp, lport); 1629 break; 1630 1631 default: 1632 if (is_system_labeled() && 1633 check_exempt_conflict_v6(connp, ipst)) 1634 return (EADDRINUSE); 1635 /* FALLTHROUGH */ 1636 case IPPROTO_UDP: 1637 up = (uint16_t *)&ports; 1638 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1639 if (protocol == IPPROTO_UDP) { 1640 connfp = &ipst->ips_ipcl_udp_fanout[ 1641 IPCL_UDP_HASH(up[1], ipst)]; 1642 } else { 1643 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1644 } 1645 1646 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1647 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1648 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1649 IPCL_HASH_INSERT_BOUND(connfp, connp); 1650 } else { 1651 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1652 } 1653 break; 1654 } 1655 1656 return (ret); 1657 } 1658 1659 /* 1660 * v4 packet classifying function. looks up the fanout table to 1661 * find the conn, the packet belongs to. returns the conn with 1662 * the reference held, null otherwise. 1663 * 1664 * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1665 * Lookup" comment block are applied. Labels are also checked as described 1666 * above. If the packet is from the inside (looped back), and is from the same 1667 * zone, then label checks are omitted. 1668 */ 1669 conn_t * 1670 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1671 ip_stack_t *ipst) 1672 { 1673 ipha_t *ipha; 1674 connf_t *connfp, *bind_connfp; 1675 uint16_t lport; 1676 uint16_t fport; 1677 uint32_t ports; 1678 conn_t *connp; 1679 uint16_t *up; 1680 boolean_t shared_addr; 1681 boolean_t unlabeled; 1682 1683 ipha = (ipha_t *)mp->b_rptr; 1684 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1685 1686 switch (protocol) { 1687 case IPPROTO_TCP: 1688 ports = *(uint32_t *)up; 1689 connfp = 1690 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1691 ports, ipst)]; 1692 mutex_enter(&connfp->connf_lock); 1693 for (connp = connfp->connf_head; connp != NULL; 1694 connp = connp->conn_next) { 1695 if ((IPCL_CONN_MATCH(connp, protocol, 1696 ipha->ipha_src, ipha->ipha_dst, ports)) && 1697 (IPCL_ZONE_MATCH(connp, zoneid))) { 1698 break; 1699 } 1700 } 1701 1702 if (connp != NULL) { 1703 /* 1704 * We have a fully-bound TCP connection. 1705 * 1706 * For labeled systems, there's no need to check the 1707 * label here. It's known to be good as we checked 1708 * before allowing the connection to become bound. 1709 */ 1710 CONN_INC_REF(connp); 1711 mutex_exit(&connfp->connf_lock); 1712 return (connp); 1713 } 1714 1715 mutex_exit(&connfp->connf_lock); 1716 1717 lport = up[1]; 1718 unlabeled = B_FALSE; 1719 /* Cred cannot be null on IPv4 */ 1720 if (is_system_labeled()) { 1721 cred_t *cr = msg_getcred(mp, NULL); 1722 ASSERT(cr != NULL); 1723 unlabeled = (crgetlabel(cr)->tsl_flags & 1724 TSLF_UNLABELED) != 0; 1725 } 1726 shared_addr = (zoneid == ALL_ZONES); 1727 if (shared_addr) { 1728 /* 1729 * No need to handle exclusive-stack zones since 1730 * ALL_ZONES only applies to the shared stack. 1731 */ 1732 zoneid = tsol_mlp_findzone(protocol, lport); 1733 /* 1734 * If no shared MLP is found, tsol_mlp_findzone returns 1735 * ALL_ZONES. In that case, we assume it's SLP, and 1736 * search for the zone based on the packet label. 1737 * 1738 * If there is such a zone, we prefer to find a 1739 * connection in it. Otherwise, we look for a 1740 * MAC-exempt connection in any zone whose label 1741 * dominates the default label on the packet. 1742 */ 1743 if (zoneid == ALL_ZONES) 1744 zoneid = tsol_packet_to_zoneid(mp); 1745 else 1746 unlabeled = B_FALSE; 1747 } 1748 1749 bind_connfp = 1750 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1751 mutex_enter(&bind_connfp->connf_lock); 1752 for (connp = bind_connfp->connf_head; connp != NULL; 1753 connp = connp->conn_next) { 1754 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1755 lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 1756 (unlabeled && shared_addr && 1757 (connp->conn_mac_mode != CONN_MAC_DEFAULT)))) 1758 break; 1759 } 1760 1761 /* 1762 * If the matching connection is SLP on a private address, then 1763 * the label on the packet must match the local zone's label. 1764 * Otherwise, it must be in the label range defined by tnrh. 1765 * This is ensured by tsol_receive_label. 1766 */ 1767 if (connp != NULL && is_system_labeled() && 1768 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1769 shared_addr, connp)) { 1770 DTRACE_PROBE3( 1771 tx__ip__log__info__classify__tcp, 1772 char *, 1773 "connp(1) could not receive mp(2)", 1774 conn_t *, connp, mblk_t *, mp); 1775 connp = NULL; 1776 } 1777 1778 if (connp != NULL) { 1779 /* Have a listener at least */ 1780 CONN_INC_REF(connp); 1781 mutex_exit(&bind_connfp->connf_lock); 1782 return (connp); 1783 } 1784 1785 mutex_exit(&bind_connfp->connf_lock); 1786 1787 IPCL_DEBUG_LVL(512, 1788 ("ipcl_classify: couldn't classify mp = %p\n", 1789 (void *)mp)); 1790 break; 1791 1792 case IPPROTO_UDP: 1793 lport = up[1]; 1794 unlabeled = B_FALSE; 1795 /* Cred cannot be null on IPv4 */ 1796 if (is_system_labeled()) { 1797 cred_t *cr = msg_getcred(mp, NULL); 1798 ASSERT(cr != NULL); 1799 unlabeled = (crgetlabel(cr)->tsl_flags & 1800 TSLF_UNLABELED) != 0; 1801 } 1802 shared_addr = (zoneid == ALL_ZONES); 1803 if (shared_addr) { 1804 /* 1805 * No need to handle exclusive-stack zones since 1806 * ALL_ZONES only applies to the shared stack. 1807 */ 1808 zoneid = tsol_mlp_findzone(protocol, lport); 1809 /* 1810 * If no shared MLP is found, tsol_mlp_findzone returns 1811 * ALL_ZONES. In that case, we assume it's SLP, and 1812 * search for the zone based on the packet label. 1813 * 1814 * If there is such a zone, we prefer to find a 1815 * connection in it. Otherwise, we look for a 1816 * MAC-exempt connection in any zone whose label 1817 * dominates the default label on the packet. 1818 */ 1819 if (zoneid == ALL_ZONES) 1820 zoneid = tsol_packet_to_zoneid(mp); 1821 else 1822 unlabeled = B_FALSE; 1823 } 1824 fport = up[0]; 1825 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1826 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1827 mutex_enter(&connfp->connf_lock); 1828 for (connp = connfp->connf_head; connp != NULL; 1829 connp = connp->conn_next) { 1830 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1831 fport, ipha->ipha_src) && 1832 (IPCL_ZONE_MATCH(connp, zoneid) || 1833 (unlabeled && shared_addr && 1834 (connp->conn_mac_mode != CONN_MAC_DEFAULT)))) 1835 break; 1836 } 1837 1838 if (connp != NULL && is_system_labeled() && 1839 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1840 shared_addr, connp)) { 1841 DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1842 char *, "connp(1) could not receive mp(2)", 1843 conn_t *, connp, mblk_t *, mp); 1844 connp = NULL; 1845 } 1846 1847 if (connp != NULL) { 1848 CONN_INC_REF(connp); 1849 mutex_exit(&connfp->connf_lock); 1850 return (connp); 1851 } 1852 1853 /* 1854 * We shouldn't come here for multicast/broadcast packets 1855 */ 1856 mutex_exit(&connfp->connf_lock); 1857 IPCL_DEBUG_LVL(512, 1858 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1859 lport, fport)); 1860 break; 1861 1862 case IPPROTO_ENCAP: 1863 case IPPROTO_IPV6: 1864 return (ipcl_iptun_classify_v4(&ipha->ipha_src, 1865 &ipha->ipha_dst, ipst)); 1866 } 1867 1868 return (NULL); 1869 } 1870 1871 conn_t * 1872 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1873 ip_stack_t *ipst) 1874 { 1875 ip6_t *ip6h; 1876 connf_t *connfp, *bind_connfp; 1877 uint16_t lport; 1878 uint16_t fport; 1879 tcph_t *tcph; 1880 uint32_t ports; 1881 conn_t *connp; 1882 uint16_t *up; 1883 boolean_t shared_addr; 1884 boolean_t unlabeled; 1885 1886 ip6h = (ip6_t *)mp->b_rptr; 1887 1888 switch (protocol) { 1889 case IPPROTO_TCP: 1890 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1891 up = (uint16_t *)tcph->th_lport; 1892 ports = *(uint32_t *)up; 1893 1894 connfp = 1895 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1896 ports, ipst)]; 1897 mutex_enter(&connfp->connf_lock); 1898 for (connp = connfp->connf_head; connp != NULL; 1899 connp = connp->conn_next) { 1900 if ((IPCL_CONN_MATCH_V6(connp, protocol, 1901 ip6h->ip6_src, ip6h->ip6_dst, ports)) && 1902 (IPCL_ZONE_MATCH(connp, zoneid))) { 1903 break; 1904 } 1905 } 1906 1907 if (connp != NULL) { 1908 /* 1909 * We have a fully-bound TCP connection. 1910 * 1911 * For labeled systems, there's no need to check the 1912 * label here. It's known to be good as we checked 1913 * before allowing the connection to become bound. 1914 */ 1915 CONN_INC_REF(connp); 1916 mutex_exit(&connfp->connf_lock); 1917 return (connp); 1918 } 1919 1920 mutex_exit(&connfp->connf_lock); 1921 1922 lport = up[1]; 1923 unlabeled = B_FALSE; 1924 /* Cred can be null on IPv6 */ 1925 if (is_system_labeled()) { 1926 cred_t *cr = msg_getcred(mp, NULL); 1927 1928 unlabeled = (cr != NULL && 1929 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1930 } 1931 shared_addr = (zoneid == ALL_ZONES); 1932 if (shared_addr) { 1933 /* 1934 * No need to handle exclusive-stack zones since 1935 * ALL_ZONES only applies to the shared stack. 1936 */ 1937 zoneid = tsol_mlp_findzone(protocol, lport); 1938 /* 1939 * If no shared MLP is found, tsol_mlp_findzone returns 1940 * ALL_ZONES. In that case, we assume it's SLP, and 1941 * search for the zone based on the packet label. 1942 * 1943 * If there is such a zone, we prefer to find a 1944 * connection in it. Otherwise, we look for a 1945 * MAC-exempt connection in any zone whose label 1946 * dominates the default label on the packet. 1947 */ 1948 if (zoneid == ALL_ZONES) 1949 zoneid = tsol_packet_to_zoneid(mp); 1950 else 1951 unlabeled = B_FALSE; 1952 } 1953 1954 bind_connfp = 1955 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1956 mutex_enter(&bind_connfp->connf_lock); 1957 for (connp = bind_connfp->connf_head; connp != NULL; 1958 connp = connp->conn_next) { 1959 if (IPCL_BIND_MATCH_V6(connp, protocol, 1960 ip6h->ip6_dst, lport) && 1961 (IPCL_ZONE_MATCH(connp, zoneid) || 1962 (unlabeled && shared_addr && 1963 (connp->conn_mac_mode != CONN_MAC_DEFAULT)))) 1964 break; 1965 } 1966 1967 if (connp != NULL && is_system_labeled() && 1968 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1969 shared_addr, connp)) { 1970 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1971 char *, "connp(1) could not receive mp(2)", 1972 conn_t *, connp, mblk_t *, mp); 1973 connp = NULL; 1974 } 1975 1976 if (connp != NULL) { 1977 /* Have a listner at least */ 1978 CONN_INC_REF(connp); 1979 mutex_exit(&bind_connfp->connf_lock); 1980 IPCL_DEBUG_LVL(512, 1981 ("ipcl_classify_v6: found listner " 1982 "connp = %p\n", (void *)connp)); 1983 1984 return (connp); 1985 } 1986 1987 mutex_exit(&bind_connfp->connf_lock); 1988 1989 IPCL_DEBUG_LVL(512, 1990 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1991 (void *)mp)); 1992 break; 1993 1994 case IPPROTO_UDP: 1995 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1996 lport = up[1]; 1997 unlabeled = B_FALSE; 1998 /* Cred can be null on IPv6 */ 1999 if (is_system_labeled()) { 2000 cred_t *cr = msg_getcred(mp, NULL); 2001 2002 unlabeled = (cr != NULL && 2003 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 2004 } 2005 shared_addr = (zoneid == ALL_ZONES); 2006 if (shared_addr) { 2007 /* 2008 * No need to handle exclusive-stack zones since 2009 * ALL_ZONES only applies to the shared stack. 2010 */ 2011 zoneid = tsol_mlp_findzone(protocol, lport); 2012 /* 2013 * If no shared MLP is found, tsol_mlp_findzone returns 2014 * ALL_ZONES. In that case, we assume it's SLP, and 2015 * search for the zone based on the packet label. 2016 * 2017 * If there is such a zone, we prefer to find a 2018 * connection in it. Otherwise, we look for a 2019 * MAC-exempt connection in any zone whose label 2020 * dominates the default label on the packet. 2021 */ 2022 if (zoneid == ALL_ZONES) 2023 zoneid = tsol_packet_to_zoneid(mp); 2024 else 2025 unlabeled = B_FALSE; 2026 } 2027 2028 fport = up[0]; 2029 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 2030 fport)); 2031 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2032 mutex_enter(&connfp->connf_lock); 2033 for (connp = connfp->connf_head; connp != NULL; 2034 connp = connp->conn_next) { 2035 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 2036 fport, ip6h->ip6_src) && 2037 (IPCL_ZONE_MATCH(connp, zoneid) || 2038 (unlabeled && shared_addr && 2039 (connp->conn_mac_mode != CONN_MAC_DEFAULT)))) 2040 break; 2041 } 2042 2043 if (connp != NULL && is_system_labeled() && 2044 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 2045 shared_addr, connp)) { 2046 DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 2047 char *, "connp(1) could not receive mp(2)", 2048 conn_t *, connp, mblk_t *, mp); 2049 connp = NULL; 2050 } 2051 2052 if (connp != NULL) { 2053 CONN_INC_REF(connp); 2054 mutex_exit(&connfp->connf_lock); 2055 return (connp); 2056 } 2057 2058 /* 2059 * We shouldn't come here for multicast/broadcast packets 2060 */ 2061 mutex_exit(&connfp->connf_lock); 2062 IPCL_DEBUG_LVL(512, 2063 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 2064 lport, fport)); 2065 break; 2066 case IPPROTO_ENCAP: 2067 case IPPROTO_IPV6: 2068 return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 2069 &ip6h->ip6_dst, ipst)); 2070 } 2071 2072 return (NULL); 2073 } 2074 2075 /* 2076 * wrapper around ipcl_classify_(v4,v6) routines. 2077 */ 2078 conn_t * 2079 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 2080 { 2081 uint16_t hdr_len; 2082 ipha_t *ipha; 2083 uint8_t *nexthdrp; 2084 2085 if (MBLKL(mp) < sizeof (ipha_t)) 2086 return (NULL); 2087 2088 switch (IPH_HDR_VERSION(mp->b_rptr)) { 2089 case IPV4_VERSION: 2090 ipha = (ipha_t *)mp->b_rptr; 2091 hdr_len = IPH_HDR_LENGTH(ipha); 2092 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 2093 zoneid, ipst)); 2094 case IPV6_VERSION: 2095 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 2096 &hdr_len, &nexthdrp)) 2097 return (NULL); 2098 2099 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 2100 } 2101 2102 return (NULL); 2103 } 2104 2105 conn_t * 2106 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 2107 uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 2108 { 2109 connf_t *connfp; 2110 conn_t *connp; 2111 in_port_t lport; 2112 int af; 2113 boolean_t shared_addr; 2114 boolean_t unlabeled; 2115 const void *dst; 2116 2117 lport = ((uint16_t *)&ports)[1]; 2118 2119 unlabeled = B_FALSE; 2120 /* Cred can be null on IPv6 */ 2121 if (is_system_labeled()) { 2122 cred_t *cr = msg_getcred(mp, NULL); 2123 2124 unlabeled = (cr != NULL && 2125 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 2126 } 2127 shared_addr = (zoneid == ALL_ZONES); 2128 if (shared_addr) { 2129 /* 2130 * No need to handle exclusive-stack zones since ALL_ZONES 2131 * only applies to the shared stack. 2132 */ 2133 zoneid = tsol_mlp_findzone(protocol, lport); 2134 /* 2135 * If no shared MLP is found, tsol_mlp_findzone returns 2136 * ALL_ZONES. In that case, we assume it's SLP, and search for 2137 * the zone based on the packet label. 2138 * 2139 * If there is such a zone, we prefer to find a connection in 2140 * it. Otherwise, we look for a MAC-exempt connection in any 2141 * zone whose label dominates the default label on the packet. 2142 */ 2143 if (zoneid == ALL_ZONES) 2144 zoneid = tsol_packet_to_zoneid(mp); 2145 else 2146 unlabeled = B_FALSE; 2147 } 2148 2149 af = IPH_HDR_VERSION(hdr); 2150 dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 2151 (const void *)&((ip6_t *)hdr)->ip6_dst; 2152 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 2153 2154 mutex_enter(&connfp->connf_lock); 2155 for (connp = connfp->connf_head; connp != NULL; 2156 connp = connp->conn_next) { 2157 /* We don't allow v4 fallback for v6 raw socket. */ 2158 if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 2159 IPV6_VERSION)) 2160 continue; 2161 if (connp->conn_fully_bound) { 2162 if (af == IPV4_VERSION) { 2163 if (!IPCL_CONN_MATCH(connp, protocol, 2164 hdr->ipha_src, hdr->ipha_dst, ports)) 2165 continue; 2166 } else { 2167 if (!IPCL_CONN_MATCH_V6(connp, protocol, 2168 ((ip6_t *)hdr)->ip6_src, 2169 ((ip6_t *)hdr)->ip6_dst, ports)) 2170 continue; 2171 } 2172 } else { 2173 if (af == IPV4_VERSION) { 2174 if (!IPCL_BIND_MATCH(connp, protocol, 2175 hdr->ipha_dst, lport)) 2176 continue; 2177 } else { 2178 if (!IPCL_BIND_MATCH_V6(connp, protocol, 2179 ((ip6_t *)hdr)->ip6_dst, lport)) 2180 continue; 2181 } 2182 } 2183 2184 if (IPCL_ZONE_MATCH(connp, zoneid) || 2185 (unlabeled && 2186 (connp->conn_mac_mode != CONN_MAC_DEFAULT) && 2187 shared_addr)) 2188 break; 2189 } 2190 /* 2191 * If the connection is fully-bound and connection-oriented (TCP or 2192 * SCTP), then we've already validated the remote system's label. 2193 * There's no need to do it again for every packet. 2194 */ 2195 if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 2196 !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 2197 !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 2198 DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 2199 char *, "connp(1) could not receive mp(2)", 2200 conn_t *, connp, mblk_t *, mp); 2201 connp = NULL; 2202 } 2203 2204 if (connp != NULL) 2205 goto found; 2206 mutex_exit(&connfp->connf_lock); 2207 2208 /* Try to look for a wildcard match. */ 2209 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 2210 mutex_enter(&connfp->connf_lock); 2211 for (connp = connfp->connf_head; connp != NULL; 2212 connp = connp->conn_next) { 2213 /* We don't allow v4 fallback for v6 raw socket. */ 2214 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 2215 IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 2216 continue; 2217 } 2218 if (af == IPV4_VERSION) { 2219 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 2220 break; 2221 } else { 2222 if (IPCL_RAW_MATCH_V6(connp, protocol, 2223 ((ip6_t *)hdr)->ip6_dst)) { 2224 break; 2225 } 2226 } 2227 } 2228 2229 if (connp != NULL) 2230 goto found; 2231 2232 mutex_exit(&connfp->connf_lock); 2233 return (NULL); 2234 2235 found: 2236 ASSERT(connp != NULL); 2237 CONN_INC_REF(connp); 2238 mutex_exit(&connfp->connf_lock); 2239 return (connp); 2240 } 2241 2242 /* ARGSUSED */ 2243 static int 2244 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2245 { 2246 itc_t *itc = (itc_t *)buf; 2247 conn_t *connp = &itc->itc_conn; 2248 tcp_t *tcp = (tcp_t *)&itc[1]; 2249 2250 bzero(connp, sizeof (conn_t)); 2251 bzero(tcp, sizeof (tcp_t)); 2252 2253 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2254 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2255 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 2256 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 2257 connp->conn_tcp = tcp; 2258 connp->conn_flags = IPCL_TCPCONN; 2259 connp->conn_ulp = IPPROTO_TCP; 2260 tcp->tcp_connp = connp; 2261 return (0); 2262 } 2263 2264 /* ARGSUSED */ 2265 static void 2266 tcp_conn_destructor(void *buf, void *cdrarg) 2267 { 2268 itc_t *itc = (itc_t *)buf; 2269 conn_t *connp = &itc->itc_conn; 2270 tcp_t *tcp = (tcp_t *)&itc[1]; 2271 2272 ASSERT(connp->conn_flags & IPCL_TCPCONN); 2273 ASSERT(tcp->tcp_connp == connp); 2274 ASSERT(connp->conn_tcp == tcp); 2275 tcp_timermp_free(tcp); 2276 mutex_destroy(&connp->conn_lock); 2277 cv_destroy(&connp->conn_cv); 2278 cv_destroy(&connp->conn_sq_cv); 2279 } 2280 2281 /* ARGSUSED */ 2282 static int 2283 ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2284 { 2285 itc_t *itc = (itc_t *)buf; 2286 conn_t *connp = &itc->itc_conn; 2287 2288 bzero(connp, sizeof (conn_t)); 2289 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2290 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2291 connp->conn_flags = IPCL_IPCCONN; 2292 2293 return (0); 2294 } 2295 2296 /* ARGSUSED */ 2297 static void 2298 ip_conn_destructor(void *buf, void *cdrarg) 2299 { 2300 itc_t *itc = (itc_t *)buf; 2301 conn_t *connp = &itc->itc_conn; 2302 2303 ASSERT(connp->conn_flags & IPCL_IPCCONN); 2304 ASSERT(connp->conn_priv == NULL); 2305 mutex_destroy(&connp->conn_lock); 2306 cv_destroy(&connp->conn_cv); 2307 } 2308 2309 /* ARGSUSED */ 2310 static int 2311 udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2312 { 2313 itc_t *itc = (itc_t *)buf; 2314 conn_t *connp = &itc->itc_conn; 2315 udp_t *udp = (udp_t *)&itc[1]; 2316 2317 bzero(connp, sizeof (conn_t)); 2318 bzero(udp, sizeof (udp_t)); 2319 2320 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2321 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2322 connp->conn_udp = udp; 2323 connp->conn_flags = IPCL_UDPCONN; 2324 connp->conn_ulp = IPPROTO_UDP; 2325 udp->udp_connp = connp; 2326 return (0); 2327 } 2328 2329 /* ARGSUSED */ 2330 static void 2331 udp_conn_destructor(void *buf, void *cdrarg) 2332 { 2333 itc_t *itc = (itc_t *)buf; 2334 conn_t *connp = &itc->itc_conn; 2335 udp_t *udp = (udp_t *)&itc[1]; 2336 2337 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2338 ASSERT(udp->udp_connp == connp); 2339 ASSERT(connp->conn_udp == udp); 2340 mutex_destroy(&connp->conn_lock); 2341 cv_destroy(&connp->conn_cv); 2342 } 2343 2344 /* ARGSUSED */ 2345 static int 2346 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2347 { 2348 itc_t *itc = (itc_t *)buf; 2349 conn_t *connp = &itc->itc_conn; 2350 icmp_t *icmp = (icmp_t *)&itc[1]; 2351 2352 bzero(connp, sizeof (conn_t)); 2353 bzero(icmp, sizeof (icmp_t)); 2354 2355 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2356 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2357 connp->conn_icmp = icmp; 2358 connp->conn_flags = IPCL_RAWIPCONN; 2359 connp->conn_ulp = IPPROTO_ICMP; 2360 icmp->icmp_connp = connp; 2361 return (0); 2362 } 2363 2364 /* ARGSUSED */ 2365 static void 2366 rawip_conn_destructor(void *buf, void *cdrarg) 2367 { 2368 itc_t *itc = (itc_t *)buf; 2369 conn_t *connp = &itc->itc_conn; 2370 icmp_t *icmp = (icmp_t *)&itc[1]; 2371 2372 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2373 ASSERT(icmp->icmp_connp == connp); 2374 ASSERT(connp->conn_icmp == icmp); 2375 mutex_destroy(&connp->conn_lock); 2376 cv_destroy(&connp->conn_cv); 2377 } 2378 2379 /* ARGSUSED */ 2380 static int 2381 rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2382 { 2383 itc_t *itc = (itc_t *)buf; 2384 conn_t *connp = &itc->itc_conn; 2385 rts_t *rts = (rts_t *)&itc[1]; 2386 2387 bzero(connp, sizeof (conn_t)); 2388 bzero(rts, sizeof (rts_t)); 2389 2390 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2391 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2392 connp->conn_rts = rts; 2393 connp->conn_flags = IPCL_RTSCONN; 2394 rts->rts_connp = connp; 2395 return (0); 2396 } 2397 2398 /* ARGSUSED */ 2399 static void 2400 rts_conn_destructor(void *buf, void *cdrarg) 2401 { 2402 itc_t *itc = (itc_t *)buf; 2403 conn_t *connp = &itc->itc_conn; 2404 rts_t *rts = (rts_t *)&itc[1]; 2405 2406 ASSERT(connp->conn_flags & IPCL_RTSCONN); 2407 ASSERT(rts->rts_connp == connp); 2408 ASSERT(connp->conn_rts == rts); 2409 mutex_destroy(&connp->conn_lock); 2410 cv_destroy(&connp->conn_cv); 2411 } 2412 2413 /* ARGSUSED */ 2414 int 2415 ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags) 2416 { 2417 int error; 2418 netstack_t *ns; 2419 int ret; 2420 tcp_stack_t *tcps; 2421 ip_helper_stream_info_t *ip_helper_str; 2422 ip_stack_t *ipst; 2423 2424 ns = netstack_find_by_cred(kcred); 2425 ASSERT(ns != NULL); 2426 tcps = ns->netstack_tcp; 2427 ipst = ns->netstack_ip; 2428 ASSERT(tcps != NULL); 2429 ip_helper_str = (ip_helper_stream_info_t *)buf; 2430 2431 do { 2432 error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred, 2433 &ip_helper_str->iphs_handle, ipst->ips_ldi_ident); 2434 } while (error == EINTR); 2435 2436 if (error == 0) { 2437 do { 2438 error = ldi_ioctl( 2439 ip_helper_str->iphs_handle, SIOCSQPTR, 2440 (intptr_t)buf, FKIOCTL, kcred, &ret); 2441 } while (error == EINTR); 2442 2443 if (error != 0) { 2444 (void) ldi_close( 2445 ip_helper_str->iphs_handle, 0, kcred); 2446 } 2447 } 2448 2449 netstack_rele(ipst->ips_netstack); 2450 2451 return (error); 2452 } 2453 2454 /* ARGSUSED */ 2455 static void 2456 ip_helper_stream_destructor(void *buf, void *cdrarg) 2457 { 2458 ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf; 2459 2460 ip_helper_str->iphs_rq->q_ptr = 2461 ip_helper_str->iphs_wq->q_ptr = 2462 ip_helper_str->iphs_minfo; 2463 (void) ldi_close(ip_helper_str->iphs_handle, 0, kcred); 2464 } 2465 2466 2467 /* 2468 * Called as part of ipcl_conn_destroy to assert and clear any pointers 2469 * in the conn_t. 2470 */ 2471 void 2472 ipcl_conn_cleanup(conn_t *connp) 2473 { 2474 ASSERT(connp->conn_ire_cache == NULL); 2475 ASSERT(connp->conn_latch == NULL); 2476 #ifdef notdef 2477 ASSERT(connp->conn_rq == NULL); 2478 ASSERT(connp->conn_wq == NULL); 2479 #endif 2480 ASSERT(connp->conn_cred == NULL); 2481 ASSERT(connp->conn_g_fanout == NULL); 2482 ASSERT(connp->conn_g_next == NULL); 2483 ASSERT(connp->conn_g_prev == NULL); 2484 ASSERT(connp->conn_policy == NULL); 2485 ASSERT(connp->conn_fanout == NULL); 2486 ASSERT(connp->conn_next == NULL); 2487 ASSERT(connp->conn_prev == NULL); 2488 #ifdef notdef 2489 /* 2490 * The ill and ipif pointers are not cleared before the conn_t 2491 * goes away since they do not hold a reference on the ill/ipif. 2492 * We should replace these pointers with ifindex/ipaddr_t to 2493 * make the code less complex. 2494 */ 2495 ASSERT(connp->conn_outgoing_ill == NULL); 2496 ASSERT(connp->conn_incoming_ill == NULL); 2497 ASSERT(connp->conn_multicast_ipif == NULL); 2498 ASSERT(connp->conn_multicast_ill == NULL); 2499 #endif 2500 ASSERT(connp->conn_oper_pending_ill == NULL); 2501 ASSERT(connp->conn_ilg == NULL); 2502 ASSERT(connp->conn_drain_next == NULL); 2503 ASSERT(connp->conn_drain_prev == NULL); 2504 #ifdef notdef 2505 /* conn_idl is not cleared when removed from idl list */ 2506 ASSERT(connp->conn_idl == NULL); 2507 #endif 2508 ASSERT(connp->conn_ipsec_opt_mp == NULL); 2509 ASSERT(connp->conn_effective_cred == NULL); 2510 ASSERT(connp->conn_netstack == NULL); 2511 2512 ASSERT(connp->conn_helper_info == NULL); 2513 /* Clear out the conn_t fields that are not preserved */ 2514 bzero(&connp->conn_start_clr, 2515 sizeof (conn_t) - 2516 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2517 } 2518 2519 /* 2520 * All conns are inserted in a global multi-list for the benefit of 2521 * walkers. The walk is guaranteed to walk all open conns at the time 2522 * of the start of the walk exactly once. This property is needed to 2523 * achieve some cleanups during unplumb of interfaces. This is achieved 2524 * as follows. 2525 * 2526 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 2527 * call the insert and delete functions below at creation and deletion 2528 * time respectively. The conn never moves or changes its position in this 2529 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 2530 * won't increase due to walkers, once the conn deletion has started. Note 2531 * that we can't remove the conn from the global list and then wait for 2532 * the refcnt to drop to zero, since walkers would then see a truncated 2533 * list. CONN_INCIPIENT ensures that walkers don't start looking at 2534 * conns until ip_open is ready to make them globally visible. 2535 * The global round robin multi-list locks are held only to get the 2536 * next member/insertion/deletion and contention should be negligible 2537 * if the multi-list is much greater than the number of cpus. 2538 */ 2539 void 2540 ipcl_globalhash_insert(conn_t *connp) 2541 { 2542 int index; 2543 struct connf_s *connfp; 2544 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2545 2546 /* 2547 * No need for atomic here. Approximate even distribution 2548 * in the global lists is sufficient. 2549 */ 2550 ipst->ips_conn_g_index++; 2551 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 2552 2553 connp->conn_g_prev = NULL; 2554 /* 2555 * Mark as INCIPIENT, so that walkers will ignore this 2556 * for now, till ip_open is ready to make it visible globally. 2557 */ 2558 connp->conn_state_flags |= CONN_INCIPIENT; 2559 2560 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2561 /* Insert at the head of the list */ 2562 mutex_enter(&connfp->connf_lock); 2563 connp->conn_g_next = connfp->connf_head; 2564 if (connp->conn_g_next != NULL) 2565 connp->conn_g_next->conn_g_prev = connp; 2566 connfp->connf_head = connp; 2567 2568 /* The fanout bucket this conn points to */ 2569 connp->conn_g_fanout = connfp; 2570 2571 mutex_exit(&connfp->connf_lock); 2572 } 2573 2574 void 2575 ipcl_globalhash_remove(conn_t *connp) 2576 { 2577 struct connf_s *connfp; 2578 2579 /* 2580 * We were never inserted in the global multi list. 2581 * IPCL_NONE variety is never inserted in the global multilist 2582 * since it is presumed to not need any cleanup and is transient. 2583 */ 2584 if (connp->conn_g_fanout == NULL) 2585 return; 2586 2587 connfp = connp->conn_g_fanout; 2588 mutex_enter(&connfp->connf_lock); 2589 if (connp->conn_g_prev != NULL) 2590 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2591 else 2592 connfp->connf_head = connp->conn_g_next; 2593 if (connp->conn_g_next != NULL) 2594 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2595 mutex_exit(&connfp->connf_lock); 2596 2597 /* Better to stumble on a null pointer than to corrupt memory */ 2598 connp->conn_g_next = NULL; 2599 connp->conn_g_prev = NULL; 2600 connp->conn_g_fanout = NULL; 2601 } 2602 2603 /* 2604 * Walk the list of all conn_t's in the system, calling the function provided 2605 * with the specified argument for each. 2606 * Applies to both IPv4 and IPv6. 2607 * 2608 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 2609 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2610 * unplumbed or removed. New conn_t's that are created while we are walking 2611 * may be missed by this walk, because they are not necessarily inserted 2612 * at the tail of the list. They are new conn_t's and thus don't have any 2613 * stale pointers. The CONN_CLOSING flag ensures that no new reference 2614 * is created to the struct that is going away. 2615 */ 2616 void 2617 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2618 { 2619 int i; 2620 conn_t *connp; 2621 conn_t *prev_connp; 2622 2623 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2624 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2625 prev_connp = NULL; 2626 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2627 while (connp != NULL) { 2628 mutex_enter(&connp->conn_lock); 2629 if (connp->conn_state_flags & 2630 (CONN_CONDEMNED | CONN_INCIPIENT)) { 2631 mutex_exit(&connp->conn_lock); 2632 connp = connp->conn_g_next; 2633 continue; 2634 } 2635 CONN_INC_REF_LOCKED(connp); 2636 mutex_exit(&connp->conn_lock); 2637 mutex_exit( 2638 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2639 (*func)(connp, arg); 2640 if (prev_connp != NULL) 2641 CONN_DEC_REF(prev_connp); 2642 mutex_enter( 2643 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2644 prev_connp = connp; 2645 connp = connp->conn_g_next; 2646 } 2647 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2648 if (prev_connp != NULL) 2649 CONN_DEC_REF(prev_connp); 2650 } 2651 } 2652 2653 /* 2654 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2655 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2656 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2657 * (peer tcp in ESTABLISHED state). 2658 */ 2659 conn_t * 2660 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2661 ip_stack_t *ipst) 2662 { 2663 uint32_t ports; 2664 uint16_t *pports = (uint16_t *)&ports; 2665 connf_t *connfp; 2666 conn_t *tconnp; 2667 boolean_t zone_chk; 2668 2669 /* 2670 * If either the source of destination address is loopback, then 2671 * both endpoints must be in the same Zone. Otherwise, both of 2672 * the addresses are system-wide unique (tcp is in ESTABLISHED 2673 * state) and the endpoints may reside in different Zones. 2674 */ 2675 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2676 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2677 2678 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2679 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2680 2681 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2682 ports, ipst)]; 2683 2684 mutex_enter(&connfp->connf_lock); 2685 for (tconnp = connfp->connf_head; tconnp != NULL; 2686 tconnp = tconnp->conn_next) { 2687 2688 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2689 ipha->ipha_dst, ipha->ipha_src, ports) && 2690 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2691 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2692 2693 ASSERT(tconnp != connp); 2694 CONN_INC_REF(tconnp); 2695 mutex_exit(&connfp->connf_lock); 2696 return (tconnp); 2697 } 2698 } 2699 mutex_exit(&connfp->connf_lock); 2700 return (NULL); 2701 } 2702 2703 /* 2704 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2705 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2706 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2707 * (peer tcp in ESTABLISHED state). 2708 */ 2709 conn_t * 2710 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2711 ip_stack_t *ipst) 2712 { 2713 uint32_t ports; 2714 uint16_t *pports = (uint16_t *)&ports; 2715 connf_t *connfp; 2716 conn_t *tconnp; 2717 boolean_t zone_chk; 2718 2719 /* 2720 * If either the source of destination address is loopback, then 2721 * both endpoints must be in the same Zone. Otherwise, both of 2722 * the addresses are system-wide unique (tcp is in ESTABLISHED 2723 * state) and the endpoints may reside in different Zones. We 2724 * don't do Zone check for link local address(es) because the 2725 * current Zone implementation treats each link local address as 2726 * being unique per system node, i.e. they belong to global Zone. 2727 */ 2728 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2729 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2730 2731 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2732 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2733 2734 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2735 ports, ipst)]; 2736 2737 mutex_enter(&connfp->connf_lock); 2738 for (tconnp = connfp->connf_head; tconnp != NULL; 2739 tconnp = tconnp->conn_next) { 2740 2741 /* We skip tcp_bound_if check here as this is loopback tcp */ 2742 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2743 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2744 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2745 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2746 2747 ASSERT(tconnp != connp); 2748 CONN_INC_REF(tconnp); 2749 mutex_exit(&connfp->connf_lock); 2750 return (tconnp); 2751 } 2752 } 2753 mutex_exit(&connfp->connf_lock); 2754 return (NULL); 2755 } 2756 2757 /* 2758 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2759 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2760 * Only checks for connected entries i.e. no INADDR_ANY checks. 2761 */ 2762 conn_t * 2763 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2764 ip_stack_t *ipst) 2765 { 2766 uint32_t ports; 2767 uint16_t *pports; 2768 connf_t *connfp; 2769 conn_t *tconnp; 2770 2771 pports = (uint16_t *)&ports; 2772 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2773 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2774 2775 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2776 ports, ipst)]; 2777 2778 mutex_enter(&connfp->connf_lock); 2779 for (tconnp = connfp->connf_head; tconnp != NULL; 2780 tconnp = tconnp->conn_next) { 2781 2782 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2783 ipha->ipha_dst, ipha->ipha_src, ports) && 2784 tconnp->conn_tcp->tcp_state >= min_state) { 2785 2786 CONN_INC_REF(tconnp); 2787 mutex_exit(&connfp->connf_lock); 2788 return (tconnp); 2789 } 2790 } 2791 mutex_exit(&connfp->connf_lock); 2792 return (NULL); 2793 } 2794 2795 /* 2796 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2797 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2798 * Only checks for connected entries i.e. no INADDR_ANY checks. 2799 * Match on ifindex in addition to addresses. 2800 */ 2801 conn_t * 2802 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2803 uint_t ifindex, ip_stack_t *ipst) 2804 { 2805 tcp_t *tcp; 2806 uint32_t ports; 2807 uint16_t *pports; 2808 connf_t *connfp; 2809 conn_t *tconnp; 2810 2811 pports = (uint16_t *)&ports; 2812 pports[0] = tcpha->tha_fport; 2813 pports[1] = tcpha->tha_lport; 2814 2815 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2816 ports, ipst)]; 2817 2818 mutex_enter(&connfp->connf_lock); 2819 for (tconnp = connfp->connf_head; tconnp != NULL; 2820 tconnp = tconnp->conn_next) { 2821 2822 tcp = tconnp->conn_tcp; 2823 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2824 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2825 tcp->tcp_state >= min_state && 2826 (tcp->tcp_bound_if == 0 || 2827 tcp->tcp_bound_if == ifindex)) { 2828 2829 CONN_INC_REF(tconnp); 2830 mutex_exit(&connfp->connf_lock); 2831 return (tconnp); 2832 } 2833 } 2834 mutex_exit(&connfp->connf_lock); 2835 return (NULL); 2836 } 2837 2838 /* 2839 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2840 * a listener when changing state. 2841 */ 2842 conn_t * 2843 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2844 ip_stack_t *ipst) 2845 { 2846 connf_t *bind_connfp; 2847 conn_t *connp; 2848 tcp_t *tcp; 2849 2850 /* 2851 * Avoid false matches for packets sent to an IP destination of 2852 * all zeros. 2853 */ 2854 if (laddr == 0) 2855 return (NULL); 2856 2857 ASSERT(zoneid != ALL_ZONES); 2858 2859 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2860 mutex_enter(&bind_connfp->connf_lock); 2861 for (connp = bind_connfp->connf_head; connp != NULL; 2862 connp = connp->conn_next) { 2863 tcp = connp->conn_tcp; 2864 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2865 IPCL_ZONE_MATCH(connp, zoneid) && 2866 (tcp->tcp_listener == NULL)) { 2867 CONN_INC_REF(connp); 2868 mutex_exit(&bind_connfp->connf_lock); 2869 return (connp); 2870 } 2871 } 2872 mutex_exit(&bind_connfp->connf_lock); 2873 return (NULL); 2874 } 2875 2876 /* 2877 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2878 * a listener when changing state. 2879 */ 2880 conn_t * 2881 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2882 zoneid_t zoneid, ip_stack_t *ipst) 2883 { 2884 connf_t *bind_connfp; 2885 conn_t *connp = NULL; 2886 tcp_t *tcp; 2887 2888 /* 2889 * Avoid false matches for packets sent to an IP destination of 2890 * all zeros. 2891 */ 2892 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2893 return (NULL); 2894 2895 ASSERT(zoneid != ALL_ZONES); 2896 2897 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2898 mutex_enter(&bind_connfp->connf_lock); 2899 for (connp = bind_connfp->connf_head; connp != NULL; 2900 connp = connp->conn_next) { 2901 tcp = connp->conn_tcp; 2902 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2903 IPCL_ZONE_MATCH(connp, zoneid) && 2904 (tcp->tcp_bound_if == 0 || 2905 tcp->tcp_bound_if == ifindex) && 2906 tcp->tcp_listener == NULL) { 2907 CONN_INC_REF(connp); 2908 mutex_exit(&bind_connfp->connf_lock); 2909 return (connp); 2910 } 2911 } 2912 mutex_exit(&bind_connfp->connf_lock); 2913 return (NULL); 2914 } 2915 2916 /* 2917 * ipcl_get_next_conn 2918 * get the next entry in the conn global list 2919 * and put a reference on the next_conn. 2920 * decrement the reference on the current conn. 2921 * 2922 * This is an iterator based walker function that also provides for 2923 * some selection by the caller. It walks through the conn_hash bucket 2924 * searching for the next valid connp in the list, and selects connections 2925 * that are neither closed nor condemned. It also REFHOLDS the conn 2926 * thus ensuring that the conn exists when the caller uses the conn. 2927 */ 2928 conn_t * 2929 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2930 { 2931 conn_t *next_connp; 2932 2933 if (connfp == NULL) 2934 return (NULL); 2935 2936 mutex_enter(&connfp->connf_lock); 2937 2938 next_connp = (connp == NULL) ? 2939 connfp->connf_head : connp->conn_g_next; 2940 2941 while (next_connp != NULL) { 2942 mutex_enter(&next_connp->conn_lock); 2943 if (!(next_connp->conn_flags & conn_flags) || 2944 (next_connp->conn_state_flags & 2945 (CONN_CONDEMNED | CONN_INCIPIENT))) { 2946 /* 2947 * This conn has been condemned or 2948 * is closing, or the flags don't match 2949 */ 2950 mutex_exit(&next_connp->conn_lock); 2951 next_connp = next_connp->conn_g_next; 2952 continue; 2953 } 2954 CONN_INC_REF_LOCKED(next_connp); 2955 mutex_exit(&next_connp->conn_lock); 2956 break; 2957 } 2958 2959 mutex_exit(&connfp->connf_lock); 2960 2961 if (connp != NULL) 2962 CONN_DEC_REF(connp); 2963 2964 return (next_connp); 2965 } 2966 2967 #ifdef CONN_DEBUG 2968 /* 2969 * Trace of the last NBUF refhold/refrele 2970 */ 2971 int 2972 conn_trace_ref(conn_t *connp) 2973 { 2974 int last; 2975 conn_trace_t *ctb; 2976 2977 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2978 last = connp->conn_trace_last; 2979 last++; 2980 if (last == CONN_TRACE_MAX) 2981 last = 0; 2982 2983 ctb = &connp->conn_trace_buf[last]; 2984 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2985 connp->conn_trace_last = last; 2986 return (1); 2987 } 2988 2989 int 2990 conn_untrace_ref(conn_t *connp) 2991 { 2992 int last; 2993 conn_trace_t *ctb; 2994 2995 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2996 last = connp->conn_trace_last; 2997 last++; 2998 if (last == CONN_TRACE_MAX) 2999 last = 0; 3000 3001 ctb = &connp->conn_trace_buf[last]; 3002 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 3003 connp->conn_trace_last = last; 3004 return (1); 3005 } 3006 #endif 3007