1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; 29 30 /* 31 * IP PACKET CLASSIFIER 32 * 33 * The IP packet classifier provides mapping between IP packets and persistent 34 * connection state for connection-oriented protocols. It also provides 35 * interface for managing connection states. 36 * 37 * The connection state is kept in conn_t data structure and contains, among 38 * other things: 39 * 40 * o local/remote address and ports 41 * o Transport protocol 42 * o squeue for the connection (for TCP only) 43 * o reference counter 44 * o Connection state 45 * o hash table linkage 46 * o interface/ire information 47 * o credentials 48 * o ipsec policy 49 * o send and receive functions. 50 * o mutex lock. 51 * 52 * Connections use a reference counting scheme. They are freed when the 53 * reference counter drops to zero. A reference is incremented when connection 54 * is placed in a list or table, when incoming packet for the connection arrives 55 * and when connection is processed via squeue (squeue processing may be 56 * asynchronous and the reference protects the connection from being destroyed 57 * before its processing is finished). 58 * 59 * send and receive functions are currently used for TCP only. The send function 60 * determines the IP entry point for the packet once it leaves TCP to be sent to 61 * the destination address. The receive function is used by IP when the packet 62 * should be passed for TCP processing. When a new connection is created these 63 * are set to ip_output() and tcp_input() respectively. During the lifetime of 64 * the connection the send and receive functions may change depending on the 65 * changes in the connection state. For example, Once the connection is bound to 66 * an addresse, the receive function for this connection is set to 67 * tcp_conn_request(). This allows incoming SYNs to go directly into the 68 * listener SYN processing function without going to tcp_input() first. 69 * 70 * Classifier uses several hash tables: 71 * 72 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 73 * ipcl_bind_fanout: contains all connections in BOUND state 74 * ipcl_proto_fanout: IPv4 protocol fanout 75 * ipcl_proto_fanout_v6: IPv6 protocol fanout 76 * ipcl_udp_fanout: contains all UDP connections 77 * ipcl_globalhash_fanout: contains all connections 78 * 79 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 80 * which need to view all existing connections. 81 * 82 * All tables are protected by per-bucket locks. When both per-bucket lock and 83 * connection lock need to be held, the per-bucket lock should be acquired 84 * first, followed by the connection lock. 85 * 86 * All functions doing search in one of these tables increment a reference 87 * counter on the connection found (if any). This reference should be dropped 88 * when the caller has finished processing the connection. 89 * 90 * 91 * INTERFACES: 92 * =========== 93 * 94 * Connection Lookup: 95 * ------------------ 96 * 97 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 98 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 99 * 100 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 101 * it can't find any associated connection. If the connection is found, its 102 * reference counter is incremented. 103 * 104 * mp: mblock, containing packet header. The full header should fit 105 * into a single mblock. It should also contain at least full IP 106 * and TCP or UDP header. 107 * 108 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 109 * 110 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 111 * the packet. 112 * 113 * zoneid: The zone in which the returned connection must be; the zoneid 114 * corresponding to the ire_zoneid on the IRE located for the 115 * packet's destination address. 116 * 117 * For TCP connections, the lookup order is as follows: 118 * 5-tuple {src, dst, protocol, local port, remote port} 119 * lookup in ipcl_conn_fanout table. 120 * 3-tuple {dst, remote port, protocol} lookup in 121 * ipcl_bind_fanout table. 122 * 123 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 124 * remote port} lookup is done on ipcl_udp_fanout. Note that, 125 * these interfaces do not handle cases where a packets belongs 126 * to multiple UDP clients, which is handled in IP itself. 127 * 128 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 129 * determine which actual zone gets the segment. This is used only in a 130 * labeled environment. The matching rules are: 131 * 132 * - If it's not a multilevel port, then the label on the packet selects 133 * the zone. Unlabeled packets are delivered to the global zone. 134 * 135 * - If it's a multilevel port, then only the zone registered to receive 136 * packets on that port matches. 137 * 138 * Also, in a labeled environment, packet labels need to be checked. For fully 139 * bound TCP connections, we can assume that the packet label was checked 140 * during connection establishment, and doesn't need to be checked on each 141 * packet. For others, though, we need to check for strict equality or, for 142 * multilevel ports, membership in the range or set. This part currently does 143 * a tnrh lookup on each packet, but could be optimized to use cached results 144 * if that were necessary. (SCTP doesn't come through here, but if it did, 145 * we would apply the same rules as TCP.) 146 * 147 * An implication of the above is that fully-bound TCP sockets must always use 148 * distinct 4-tuples; they can't be discriminated by label alone. 149 * 150 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 151 * as there's no connection set-up handshake and no shared state. 152 * 153 * Labels on looped-back packets within a single zone do not need to be 154 * checked, as all processes in the same zone have the same label. 155 * 156 * Finally, for unlabeled packets received by a labeled system, special rules 157 * apply. We consider only the MLP if there is one. Otherwise, we prefer a 158 * socket in the zone whose label matches the default label of the sender, if 159 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 160 * receiver's label must dominate the sender's default label. 161 * 162 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 163 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 164 * ip_stack); 165 * 166 * Lookup routine to find a exact match for {src, dst, local port, 167 * remote port) for TCP connections in ipcl_conn_fanout. The address and 168 * ports are read from the IP and TCP header respectively. 169 * 170 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 171 * zoneid, ip_stack); 172 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 173 * zoneid, ip_stack); 174 * 175 * Lookup routine to find a listener with the tuple {lport, laddr, 176 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 177 * parameter interface index is also compared. 178 * 179 * void ipcl_walk(func, arg, ip_stack) 180 * 181 * Apply 'func' to every connection available. The 'func' is called as 182 * (*func)(connp, arg). The walk is non-atomic so connections may be 183 * created and destroyed during the walk. The CONN_CONDEMNED and 184 * CONN_INCIPIENT flags ensure that connections which are newly created 185 * or being destroyed are not selected by the walker. 186 * 187 * Table Updates 188 * ------------- 189 * 190 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 191 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 192 * 193 * Insert 'connp' in the ipcl_conn_fanout. 194 * Arguements : 195 * connp conn_t to be inserted 196 * protocol connection protocol 197 * src source address 198 * dst destination address 199 * ports local and remote port 200 * ifindex interface index for IPv6 connections 201 * 202 * Return value : 203 * 0 if connp was inserted 204 * EADDRINUSE if the connection with the same tuple 205 * already exists. 206 * 207 * int ipcl_bind_insert(connp, protocol, src, lport); 208 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 209 * 210 * Insert 'connp' in ipcl_bind_fanout. 211 * Arguements : 212 * connp conn_t to be inserted 213 * protocol connection protocol 214 * src source address connection wants 215 * to bind to 216 * lport local port connection wants to 217 * bind to 218 * 219 * 220 * void ipcl_hash_remove(connp); 221 * 222 * Removes the 'connp' from the connection fanout table. 223 * 224 * Connection Creation/Destruction 225 * ------------------------------- 226 * 227 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 228 * 229 * Creates a new conn based on the type flag, inserts it into 230 * globalhash table. 231 * 232 * type: This flag determines the type of conn_t which needs to be 233 * created. 234 * IPCL_TCPCONN indicates a TCP connection 235 * IPCL_IPCONN indicates all non-TCP connections. 236 * 237 * void ipcl_conn_destroy(connp) 238 * 239 * Destroys the connection state, removes it from the global 240 * connection hash table and frees its memory. 241 */ 242 243 #include <sys/types.h> 244 #include <sys/stream.h> 245 #include <sys/stropts.h> 246 #include <sys/sysmacros.h> 247 #include <sys/strsubr.h> 248 #include <sys/strsun.h> 249 #define _SUN_TPI_VERSION 2 250 #include <sys/ddi.h> 251 #include <sys/cmn_err.h> 252 #include <sys/debug.h> 253 254 #include <sys/systm.h> 255 #include <sys/param.h> 256 #include <sys/kmem.h> 257 #include <sys/isa_defs.h> 258 #include <inet/common.h> 259 #include <netinet/ip6.h> 260 #include <netinet/icmp6.h> 261 262 #include <inet/ip.h> 263 #include <inet/ip6.h> 264 #include <inet/tcp.h> 265 #include <inet/ip_ndp.h> 266 #include <inet/udp_impl.h> 267 #include <inet/sctp_ip.h> 268 #include <inet/sctp/sctp_impl.h> 269 270 #include <sys/cpuvar.h> 271 272 #include <inet/ipclassifier.h> 273 #include <inet/ipsec_impl.h> 274 275 #include <sys/tsol/tnet.h> 276 277 #ifdef DEBUG 278 #define IPCL_DEBUG 279 #else 280 #undef IPCL_DEBUG 281 #endif 282 283 #ifdef IPCL_DEBUG 284 int ipcl_debug_level = 0; 285 #define IPCL_DEBUG_LVL(level, args) \ 286 if (ipcl_debug_level & level) { printf args; } 287 #else 288 #define IPCL_DEBUG_LVL(level, args) {; } 289 #endif 290 /* Old value for compatibility. Setable in /etc/system */ 291 uint_t tcp_conn_hash_size = 0; 292 293 /* New value. Zero means choose automatically. Setable in /etc/system */ 294 uint_t ipcl_conn_hash_size = 0; 295 uint_t ipcl_conn_hash_memfactor = 8192; 296 uint_t ipcl_conn_hash_maxsize = 82500; 297 298 /* bind/udp fanout table size */ 299 uint_t ipcl_bind_fanout_size = 512; 300 uint_t ipcl_udp_fanout_size = 16384; 301 302 /* Raw socket fanout size. Must be a power of 2. */ 303 uint_t ipcl_raw_fanout_size = 256; 304 305 /* 306 * Power of 2^N Primes useful for hashing for N of 0-28, 307 * these primes are the nearest prime <= 2^N - 2^(N-2). 308 */ 309 310 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 311 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 312 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 313 50331599, 100663291, 201326557, 0} 314 315 /* 316 * wrapper structure to ensure that conn+tcpb are aligned 317 * on cache lines. 318 */ 319 typedef struct itc_s { 320 union { 321 conn_t itcu_conn; 322 char itcu_filler[CACHE_ALIGN(conn_s)]; 323 } itc_u; 324 tcp_t itc_tcp; 325 } itc_t; 326 327 #define itc_conn itc_u.itcu_conn 328 329 struct kmem_cache *ipcl_tcpconn_cache; 330 struct kmem_cache *ipcl_conn_cache; 331 extern struct kmem_cache *sctp_conn_cache; 332 extern struct kmem_cache *tcp_sack_info_cache; 333 extern struct kmem_cache *tcp_iphc_cache; 334 335 extern void tcp_timermp_free(tcp_t *); 336 extern mblk_t *tcp_timermp_alloc(int); 337 338 static int ipcl_tcpconn_constructor(void *, void *, int); 339 static void ipcl_tcpconn_destructor(void *, void *); 340 341 #ifdef IPCL_DEBUG 342 #define INET_NTOA_BUFSIZE 18 343 344 static char * 345 inet_ntoa_r(uint32_t in, char *b) 346 { 347 unsigned char *p; 348 349 p = (unsigned char *)∈ 350 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 351 return (b); 352 } 353 #endif 354 355 /* 356 * Global (for all stack instances) init routine 357 */ 358 void 359 ipcl_g_init(void) 360 { 361 ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 362 sizeof (conn_t), CACHE_ALIGN_SIZE, 363 NULL, NULL, NULL, NULL, NULL, 0); 364 365 ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 366 sizeof (itc_t), CACHE_ALIGN_SIZE, 367 ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 368 NULL, NULL, NULL, 0); 369 } 370 371 /* 372 * ipclassifier intialization routine, sets up hash tables. 373 */ 374 void 375 ipcl_init(ip_stack_t *ipst) 376 { 377 int i; 378 int sizes[] = P2Ps(); 379 380 /* 381 * Calculate size of conn fanout table from /etc/system settings 382 */ 383 if (ipcl_conn_hash_size != 0) { 384 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 385 } else if (tcp_conn_hash_size != 0) { 386 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 387 } else { 388 extern pgcnt_t freemem; 389 390 ipst->ips_ipcl_conn_fanout_size = 391 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 392 393 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 394 ipst->ips_ipcl_conn_fanout_size = 395 ipcl_conn_hash_maxsize; 396 } 397 } 398 399 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 400 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 401 break; 402 } 403 } 404 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 405 /* Out of range, use the 2^16 value */ 406 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 407 } 408 409 /* Take values from /etc/system */ 410 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 411 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 412 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 413 414 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 415 416 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 417 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 418 419 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 420 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 421 MUTEX_DEFAULT, NULL); 422 } 423 424 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 425 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 426 427 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 428 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 429 MUTEX_DEFAULT, NULL); 430 } 431 432 ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 433 sizeof (connf_t), KM_SLEEP); 434 for (i = 0; i < IPPROTO_MAX; i++) { 435 mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 436 MUTEX_DEFAULT, NULL); 437 } 438 439 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 440 sizeof (connf_t), KM_SLEEP); 441 for (i = 0; i < IPPROTO_MAX; i++) { 442 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 443 MUTEX_DEFAULT, NULL); 444 } 445 446 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 447 mutex_init(&ipst->ips_rts_clients->connf_lock, 448 NULL, MUTEX_DEFAULT, NULL); 449 450 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 451 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 452 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 453 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 454 MUTEX_DEFAULT, NULL); 455 } 456 457 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 458 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 459 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 460 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 461 MUTEX_DEFAULT, NULL); 462 } 463 464 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 465 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 466 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 467 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 468 NULL, MUTEX_DEFAULT, NULL); 469 } 470 } 471 472 void 473 ipcl_g_destroy(void) 474 { 475 kmem_cache_destroy(ipcl_conn_cache); 476 kmem_cache_destroy(ipcl_tcpconn_cache); 477 } 478 479 /* 480 * All user-level and kernel use of the stack must be gone 481 * by now. 482 */ 483 void 484 ipcl_destroy(ip_stack_t *ipst) 485 { 486 int i; 487 488 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 489 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 490 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 491 } 492 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 493 sizeof (connf_t)); 494 ipst->ips_ipcl_conn_fanout = NULL; 495 496 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 497 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 498 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 499 } 500 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 501 sizeof (connf_t)); 502 ipst->ips_ipcl_bind_fanout = NULL; 503 504 for (i = 0; i < IPPROTO_MAX; i++) { 505 ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 506 mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 507 } 508 kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 509 ipst->ips_ipcl_proto_fanout = NULL; 510 511 for (i = 0; i < IPPROTO_MAX; i++) { 512 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 513 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 514 } 515 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 516 IPPROTO_MAX * sizeof (connf_t)); 517 ipst->ips_ipcl_proto_fanout_v6 = NULL; 518 519 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 520 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 521 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 522 } 523 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 524 sizeof (connf_t)); 525 ipst->ips_ipcl_udp_fanout = NULL; 526 527 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 528 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 529 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 530 } 531 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 532 sizeof (connf_t)); 533 ipst->ips_ipcl_raw_fanout = NULL; 534 535 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 536 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 537 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 538 } 539 kmem_free(ipst->ips_ipcl_globalhash_fanout, 540 sizeof (connf_t) * CONN_G_HASH_SIZE); 541 ipst->ips_ipcl_globalhash_fanout = NULL; 542 543 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 544 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 545 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 546 ipst->ips_rts_clients = NULL; 547 } 548 549 /* 550 * conn creation routine. initialize the conn, sets the reference 551 * and inserts it in the global hash table. 552 */ 553 conn_t * 554 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 555 { 556 itc_t *itc; 557 conn_t *connp; 558 sctp_stack_t *sctps; 559 560 switch (type) { 561 case IPCL_TCPCONN: 562 if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 563 sleep)) == NULL) 564 return (NULL); 565 connp = &itc->itc_conn; 566 connp->conn_ref = 1; 567 netstack_hold(ns); 568 connp->conn_netstack = ns; 569 IPCL_DEBUG_LVL(1, 570 ("ipcl_conn_create: connp = %p tcp (%p)", 571 (void *)connp, (void *)connp->conn_tcp)); 572 ipcl_globalhash_insert(connp); 573 break; 574 case IPCL_SCTPCONN: 575 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 576 return (NULL); 577 connp->conn_flags = IPCL_SCTPCONN; 578 sctps = ns->netstack_sctp; 579 SCTP_G_Q_REFHOLD(sctps); 580 netstack_hold(ns); 581 connp->conn_netstack = ns; 582 break; 583 case IPCL_IPCCONN: 584 connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 585 if (connp == NULL) 586 return (NULL); 587 bzero(connp, sizeof (conn_t)); 588 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 589 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 590 connp->conn_flags = IPCL_IPCCONN; 591 connp->conn_ref = 1; 592 netstack_hold(ns); 593 connp->conn_netstack = ns; 594 IPCL_DEBUG_LVL(1, 595 ("ipcl_conn_create: connp = %p\n", (void *)connp)); 596 ipcl_globalhash_insert(connp); 597 break; 598 default: 599 connp = NULL; 600 ASSERT(0); 601 } 602 603 return (connp); 604 } 605 606 void 607 ipcl_conn_destroy(conn_t *connp) 608 { 609 mblk_t *mp; 610 netstack_t *ns = connp->conn_netstack; 611 612 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 613 ASSERT(connp->conn_ref == 0); 614 ASSERT(connp->conn_ire_cache == NULL); 615 616 if (connp->conn_peercred != NULL && 617 connp->conn_peercred != connp->conn_cred) 618 crfree(connp->conn_peercred); 619 connp->conn_peercred = NULL; 620 621 if (connp->conn_cred != NULL) { 622 crfree(connp->conn_cred); 623 connp->conn_cred = NULL; 624 } 625 626 ipcl_globalhash_remove(connp); 627 628 cv_destroy(&connp->conn_cv); 629 if (connp->conn_flags & IPCL_TCPCONN) { 630 tcp_t *tcp = connp->conn_tcp; 631 tcp_stack_t *tcps; 632 633 ASSERT(tcp != NULL); 634 tcps = tcp->tcp_tcps; 635 if (tcps != NULL) { 636 if (connp->conn_latch != NULL) { 637 IPLATCH_REFRELE(connp->conn_latch, ns); 638 connp->conn_latch = NULL; 639 } 640 if (connp->conn_policy != NULL) { 641 IPPH_REFRELE(connp->conn_policy, ns); 642 connp->conn_policy = NULL; 643 } 644 tcp->tcp_tcps = NULL; 645 TCPS_REFRELE(tcps); 646 } 647 648 mutex_destroy(&connp->conn_lock); 649 tcp_free(tcp); 650 mp = tcp->tcp_timercache; 651 tcp->tcp_cred = NULL; 652 653 if (tcp->tcp_sack_info != NULL) { 654 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 655 kmem_cache_free(tcp_sack_info_cache, 656 tcp->tcp_sack_info); 657 } 658 if (tcp->tcp_iphc != NULL) { 659 if (tcp->tcp_hdr_grown) { 660 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 661 } else { 662 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 663 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 664 } 665 tcp->tcp_iphc_len = 0; 666 } 667 ASSERT(tcp->tcp_iphc_len == 0); 668 669 ASSERT(connp->conn_latch == NULL); 670 ASSERT(connp->conn_policy == NULL); 671 672 bzero(connp, sizeof (itc_t)); 673 674 tcp->tcp_timercache = mp; 675 connp->conn_tcp = tcp; 676 connp->conn_flags = IPCL_TCPCONN; 677 connp->conn_ulp = IPPROTO_TCP; 678 tcp->tcp_connp = connp; 679 if (ns != NULL) { 680 ASSERT(tcp->tcp_tcps == NULL); 681 connp->conn_netstack = NULL; 682 netstack_rele(ns); 683 } 684 kmem_cache_free(ipcl_tcpconn_cache, connp); 685 } else if (connp->conn_flags & IPCL_SCTPCONN) { 686 ASSERT(ns != NULL); 687 sctp_free(connp); 688 } else { 689 ASSERT(connp->conn_udp == NULL); 690 mutex_destroy(&connp->conn_lock); 691 if (ns != NULL) { 692 connp->conn_netstack = NULL; 693 netstack_rele(ns); 694 } 695 kmem_cache_free(ipcl_conn_cache, connp); 696 } 697 } 698 699 /* 700 * Running in cluster mode - deregister listener information 701 */ 702 703 static void 704 ipcl_conn_unlisten(conn_t *connp) 705 { 706 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 707 ASSERT(connp->conn_lport != 0); 708 709 if (cl_inet_unlisten != NULL) { 710 sa_family_t addr_family; 711 uint8_t *laddrp; 712 713 if (connp->conn_pkt_isv6) { 714 addr_family = AF_INET6; 715 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 716 } else { 717 addr_family = AF_INET; 718 laddrp = (uint8_t *)&connp->conn_bound_source; 719 } 720 (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 721 connp->conn_lport); 722 } 723 connp->conn_flags &= ~IPCL_CL_LISTENER; 724 } 725 726 /* 727 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 728 * which table the conn belonged to). So for debugging we can see which hash 729 * table this connection was in. 730 */ 731 #define IPCL_HASH_REMOVE(connp) { \ 732 connf_t *connfp = (connp)->conn_fanout; \ 733 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 734 if (connfp != NULL) { \ 735 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 736 (void *)(connp))); \ 737 mutex_enter(&connfp->connf_lock); \ 738 if ((connp)->conn_next != NULL) \ 739 (connp)->conn_next->conn_prev = \ 740 (connp)->conn_prev; \ 741 if ((connp)->conn_prev != NULL) \ 742 (connp)->conn_prev->conn_next = \ 743 (connp)->conn_next; \ 744 else \ 745 connfp->connf_head = (connp)->conn_next; \ 746 (connp)->conn_fanout = NULL; \ 747 (connp)->conn_next = NULL; \ 748 (connp)->conn_prev = NULL; \ 749 (connp)->conn_flags |= IPCL_REMOVED; \ 750 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 751 ipcl_conn_unlisten((connp)); \ 752 CONN_DEC_REF((connp)); \ 753 mutex_exit(&connfp->connf_lock); \ 754 } \ 755 } 756 757 void 758 ipcl_hash_remove(conn_t *connp) 759 { 760 IPCL_HASH_REMOVE(connp); 761 } 762 763 /* 764 * The whole purpose of this function is allow removal of 765 * a conn_t from the connected hash for timewait reclaim. 766 * This is essentially a TW reclaim fastpath where timewait 767 * collector checks under fanout lock (so no one else can 768 * get access to the conn_t) that refcnt is 2 i.e. one for 769 * TCP and one for the classifier hash list. If ref count 770 * is indeed 2, we can just remove the conn under lock and 771 * avoid cleaning up the conn under squeue. This gives us 772 * improved performance. 773 */ 774 void 775 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 776 { 777 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 778 ASSERT(MUTEX_HELD(&connp->conn_lock)); 779 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 780 781 if ((connp)->conn_next != NULL) { 782 (connp)->conn_next->conn_prev = 783 (connp)->conn_prev; 784 } 785 if ((connp)->conn_prev != NULL) { 786 (connp)->conn_prev->conn_next = 787 (connp)->conn_next; 788 } else { 789 connfp->connf_head = (connp)->conn_next; 790 } 791 (connp)->conn_fanout = NULL; 792 (connp)->conn_next = NULL; 793 (connp)->conn_prev = NULL; 794 (connp)->conn_flags |= IPCL_REMOVED; 795 ASSERT((connp)->conn_ref == 2); 796 (connp)->conn_ref--; 797 } 798 799 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 800 ASSERT((connp)->conn_fanout == NULL); \ 801 ASSERT((connp)->conn_next == NULL); \ 802 ASSERT((connp)->conn_prev == NULL); \ 803 if ((connfp)->connf_head != NULL) { \ 804 (connfp)->connf_head->conn_prev = (connp); \ 805 (connp)->conn_next = (connfp)->connf_head; \ 806 } \ 807 (connp)->conn_fanout = (connfp); \ 808 (connfp)->connf_head = (connp); \ 809 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 810 IPCL_CONNECTED; \ 811 CONN_INC_REF(connp); \ 812 } 813 814 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 815 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 816 "connp %p", (void *)(connfp), (void *)(connp))); \ 817 IPCL_HASH_REMOVE((connp)); \ 818 mutex_enter(&(connfp)->connf_lock); \ 819 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 820 mutex_exit(&(connfp)->connf_lock); \ 821 } 822 823 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 824 conn_t *pconnp = NULL, *nconnp; \ 825 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 826 "connp %p", (void *)connfp, (void *)(connp))); \ 827 IPCL_HASH_REMOVE((connp)); \ 828 mutex_enter(&(connfp)->connf_lock); \ 829 nconnp = (connfp)->connf_head; \ 830 while (nconnp != NULL && \ 831 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 832 pconnp = nconnp; \ 833 nconnp = nconnp->conn_next; \ 834 } \ 835 if (pconnp != NULL) { \ 836 pconnp->conn_next = (connp); \ 837 (connp)->conn_prev = pconnp; \ 838 } else { \ 839 (connfp)->connf_head = (connp); \ 840 } \ 841 if (nconnp != NULL) { \ 842 (connp)->conn_next = nconnp; \ 843 nconnp->conn_prev = (connp); \ 844 } \ 845 (connp)->conn_fanout = (connfp); \ 846 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 847 IPCL_BOUND; \ 848 CONN_INC_REF(connp); \ 849 mutex_exit(&(connfp)->connf_lock); \ 850 } 851 852 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 853 conn_t **list, *prev, *next; \ 854 boolean_t isv4mapped = \ 855 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 856 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 857 "connp %p", (void *)(connfp), (void *)(connp))); \ 858 IPCL_HASH_REMOVE((connp)); \ 859 mutex_enter(&(connfp)->connf_lock); \ 860 list = &(connfp)->connf_head; \ 861 prev = NULL; \ 862 while ((next = *list) != NULL) { \ 863 if (isv4mapped && \ 864 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 865 connp->conn_zoneid == next->conn_zoneid) { \ 866 (connp)->conn_next = next; \ 867 if (prev != NULL) \ 868 prev = next->conn_prev; \ 869 next->conn_prev = (connp); \ 870 break; \ 871 } \ 872 list = &next->conn_next; \ 873 prev = next; \ 874 } \ 875 (connp)->conn_prev = prev; \ 876 *list = (connp); \ 877 (connp)->conn_fanout = (connfp); \ 878 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 879 IPCL_BOUND; \ 880 CONN_INC_REF((connp)); \ 881 mutex_exit(&(connfp)->connf_lock); \ 882 } 883 884 void 885 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 886 { 887 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 888 } 889 890 void 891 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 892 { 893 connf_t *connfp; 894 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 895 896 ASSERT(connp != NULL); 897 ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 898 protocol == IPPROTO_ESP); 899 900 connp->conn_ulp = protocol; 901 902 /* Insert it in the protocol hash */ 903 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 904 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 905 } 906 907 void 908 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 909 { 910 connf_t *connfp; 911 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 912 913 ASSERT(connp != NULL); 914 ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 915 protocol == IPPROTO_ESP); 916 917 connp->conn_ulp = protocol; 918 919 /* Insert it in the Bind Hash */ 920 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 921 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 922 } 923 924 /* 925 * This function is used only for inserting SCTP raw socket now. 926 * This may change later. 927 * 928 * Note that only one raw socket can be bound to a port. The param 929 * lport is in network byte order. 930 */ 931 static int 932 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 933 { 934 connf_t *connfp; 935 conn_t *oconnp; 936 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 937 938 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 939 940 /* Check for existing raw socket already bound to the port. */ 941 mutex_enter(&connfp->connf_lock); 942 for (oconnp = connfp->connf_head; oconnp != NULL; 943 oconnp = oconnp->conn_next) { 944 if (oconnp->conn_lport == lport && 945 oconnp->conn_zoneid == connp->conn_zoneid && 946 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 947 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 948 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 949 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 950 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 951 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 952 &connp->conn_srcv6))) { 953 break; 954 } 955 } 956 mutex_exit(&connfp->connf_lock); 957 if (oconnp != NULL) 958 return (EADDRNOTAVAIL); 959 960 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 961 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 962 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 963 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 964 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 965 } else { 966 IPCL_HASH_INSERT_BOUND(connfp, connp); 967 } 968 } else { 969 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 970 } 971 return (0); 972 } 973 974 /* 975 * Check for a MAC exemption conflict on a labeled system. Note that for 976 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 977 * transport layer. This check is for binding all other protocols. 978 * 979 * Returns true if there's a conflict. 980 */ 981 static boolean_t 982 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 983 { 984 connf_t *connfp; 985 conn_t *tconn; 986 987 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 988 mutex_enter(&connfp->connf_lock); 989 for (tconn = connfp->connf_head; tconn != NULL; 990 tconn = tconn->conn_next) { 991 /* We don't allow v4 fallback for v6 raw socket */ 992 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 993 continue; 994 /* If neither is exempt, then there's no conflict */ 995 if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 996 continue; 997 /* If both are bound to different specific addrs, ok */ 998 if (connp->conn_src != INADDR_ANY && 999 tconn->conn_src != INADDR_ANY && 1000 connp->conn_src != tconn->conn_src) 1001 continue; 1002 /* These two conflict; fail */ 1003 break; 1004 } 1005 mutex_exit(&connfp->connf_lock); 1006 return (tconn != NULL); 1007 } 1008 1009 static boolean_t 1010 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1011 { 1012 connf_t *connfp; 1013 conn_t *tconn; 1014 1015 connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 1016 mutex_enter(&connfp->connf_lock); 1017 for (tconn = connfp->connf_head; tconn != NULL; 1018 tconn = tconn->conn_next) { 1019 /* We don't allow v4 fallback for v6 raw socket */ 1020 if (connp->conn_af_isv6 != tconn->conn_af_isv6) 1021 continue; 1022 /* If neither is exempt, then there's no conflict */ 1023 if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 1024 continue; 1025 /* If both are bound to different addrs, ok */ 1026 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 1027 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 1028 !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 1029 continue; 1030 /* These two conflict; fail */ 1031 break; 1032 } 1033 mutex_exit(&connfp->connf_lock); 1034 return (tconn != NULL); 1035 } 1036 1037 /* 1038 * (v4, v6) bind hash insertion routines 1039 */ 1040 int 1041 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 1042 { 1043 connf_t *connfp; 1044 #ifdef IPCL_DEBUG 1045 char buf[INET_NTOA_BUFSIZE]; 1046 #endif 1047 int ret = 0; 1048 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1049 1050 ASSERT(connp); 1051 1052 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 1053 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 1054 1055 connp->conn_ulp = protocol; 1056 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 1057 connp->conn_lport = lport; 1058 1059 switch (protocol) { 1060 default: 1061 if (is_system_labeled() && 1062 check_exempt_conflict_v4(connp, ipst)) 1063 return (EADDRINUSE); 1064 /* FALLTHROUGH */ 1065 case IPPROTO_UDP: 1066 if (protocol == IPPROTO_UDP) { 1067 IPCL_DEBUG_LVL(64, 1068 ("ipcl_bind_insert: connp %p - udp\n", 1069 (void *)connp)); 1070 connfp = &ipst->ips_ipcl_udp_fanout[ 1071 IPCL_UDP_HASH(lport, ipst)]; 1072 } else { 1073 IPCL_DEBUG_LVL(64, 1074 ("ipcl_bind_insert: connp %p - protocol\n", 1075 (void *)connp)); 1076 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1077 } 1078 1079 if (connp->conn_rem != INADDR_ANY) { 1080 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1081 } else if (connp->conn_src != INADDR_ANY) { 1082 IPCL_HASH_INSERT_BOUND(connfp, connp); 1083 } else { 1084 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1085 } 1086 break; 1087 1088 case IPPROTO_TCP: 1089 1090 /* Insert it in the Bind Hash */ 1091 ASSERT(connp->conn_zoneid != ALL_ZONES); 1092 connfp = &ipst->ips_ipcl_bind_fanout[ 1093 IPCL_BIND_HASH(lport, ipst)]; 1094 if (connp->conn_src != INADDR_ANY) { 1095 IPCL_HASH_INSERT_BOUND(connfp, connp); 1096 } else { 1097 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1098 } 1099 if (cl_inet_listen != NULL) { 1100 ASSERT(!connp->conn_pkt_isv6); 1101 connp->conn_flags |= IPCL_CL_LISTENER; 1102 (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 1103 (uint8_t *)&connp->conn_bound_source, lport); 1104 } 1105 break; 1106 1107 case IPPROTO_SCTP: 1108 ret = ipcl_sctp_hash_insert(connp, lport); 1109 break; 1110 } 1111 1112 return (ret); 1113 } 1114 1115 int 1116 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1117 uint16_t lport) 1118 { 1119 connf_t *connfp; 1120 int ret = 0; 1121 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1122 1123 ASSERT(connp); 1124 1125 connp->conn_ulp = protocol; 1126 connp->conn_srcv6 = *src; 1127 connp->conn_lport = lport; 1128 1129 switch (protocol) { 1130 default: 1131 if (is_system_labeled() && 1132 check_exempt_conflict_v6(connp, ipst)) 1133 return (EADDRINUSE); 1134 /* FALLTHROUGH */ 1135 case IPPROTO_UDP: 1136 if (protocol == IPPROTO_UDP) { 1137 IPCL_DEBUG_LVL(128, 1138 ("ipcl_bind_insert_v6: connp %p - udp\n", 1139 (void *)connp)); 1140 connfp = &ipst->ips_ipcl_udp_fanout[ 1141 IPCL_UDP_HASH(lport, ipst)]; 1142 } else { 1143 IPCL_DEBUG_LVL(128, 1144 ("ipcl_bind_insert_v6: connp %p - protocol\n", 1145 (void *)connp)); 1146 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1147 } 1148 1149 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1150 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1151 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1152 IPCL_HASH_INSERT_BOUND(connfp, connp); 1153 } else { 1154 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1155 } 1156 break; 1157 1158 case IPPROTO_TCP: 1159 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 1160 1161 /* Insert it in the Bind Hash */ 1162 ASSERT(connp->conn_zoneid != ALL_ZONES); 1163 connfp = &ipst->ips_ipcl_bind_fanout[ 1164 IPCL_BIND_HASH(lport, ipst)]; 1165 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1166 IPCL_HASH_INSERT_BOUND(connfp, connp); 1167 } else { 1168 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1169 } 1170 if (cl_inet_listen != NULL) { 1171 sa_family_t addr_family; 1172 uint8_t *laddrp; 1173 1174 if (connp->conn_pkt_isv6) { 1175 addr_family = AF_INET6; 1176 laddrp = 1177 (uint8_t *)&connp->conn_bound_source_v6; 1178 } else { 1179 addr_family = AF_INET; 1180 laddrp = (uint8_t *)&connp->conn_bound_source; 1181 } 1182 connp->conn_flags |= IPCL_CL_LISTENER; 1183 (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 1184 lport); 1185 } 1186 break; 1187 1188 case IPPROTO_SCTP: 1189 ret = ipcl_sctp_hash_insert(connp, lport); 1190 break; 1191 } 1192 1193 return (ret); 1194 } 1195 1196 /* 1197 * ipcl_conn_hash insertion routines. 1198 */ 1199 int 1200 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 1201 ipaddr_t rem, uint32_t ports) 1202 { 1203 connf_t *connfp; 1204 uint16_t *up; 1205 conn_t *tconnp; 1206 #ifdef IPCL_DEBUG 1207 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1208 #endif 1209 in_port_t lport; 1210 int ret = 0; 1211 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1212 1213 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1214 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1215 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1216 ports, protocol)); 1217 1218 switch (protocol) { 1219 case IPPROTO_TCP: 1220 if (!(connp->conn_flags & IPCL_EAGER)) { 1221 /* 1222 * for a eager connection, i.e connections which 1223 * have just been created, the initialization is 1224 * already done in ip at conn_creation time, so 1225 * we can skip the checks here. 1226 */ 1227 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1228 } 1229 connfp = &ipst->ips_ipcl_conn_fanout[ 1230 IPCL_CONN_HASH(connp->conn_rem, 1231 connp->conn_ports, ipst)]; 1232 mutex_enter(&connfp->connf_lock); 1233 for (tconnp = connfp->connf_head; tconnp != NULL; 1234 tconnp = tconnp->conn_next) { 1235 if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1236 connp->conn_rem, connp->conn_src, 1237 connp->conn_ports)) { 1238 1239 /* Already have a conn. bail out */ 1240 mutex_exit(&connfp->connf_lock); 1241 return (EADDRINUSE); 1242 } 1243 } 1244 if (connp->conn_fanout != NULL) { 1245 /* 1246 * Probably a XTI/TLI application trying to do a 1247 * rebind. Let it happen. 1248 */ 1249 mutex_exit(&connfp->connf_lock); 1250 IPCL_HASH_REMOVE(connp); 1251 mutex_enter(&connfp->connf_lock); 1252 } 1253 1254 ASSERT(connp->conn_recv != NULL); 1255 1256 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1257 mutex_exit(&connfp->connf_lock); 1258 break; 1259 1260 case IPPROTO_SCTP: 1261 /* 1262 * The raw socket may have already been bound, remove it 1263 * from the hash first. 1264 */ 1265 IPCL_HASH_REMOVE(connp); 1266 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1267 ret = ipcl_sctp_hash_insert(connp, lport); 1268 break; 1269 1270 default: 1271 /* 1272 * Check for conflicts among MAC exempt bindings. For 1273 * transports with port numbers, this is done by the upper 1274 * level per-transport binding logic. For all others, it's 1275 * done here. 1276 */ 1277 if (is_system_labeled() && 1278 check_exempt_conflict_v4(connp, ipst)) 1279 return (EADDRINUSE); 1280 /* FALLTHROUGH */ 1281 1282 case IPPROTO_UDP: 1283 up = (uint16_t *)&ports; 1284 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1285 if (protocol == IPPROTO_UDP) { 1286 connfp = &ipst->ips_ipcl_udp_fanout[ 1287 IPCL_UDP_HASH(up[1], ipst)]; 1288 } else { 1289 connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 1290 } 1291 1292 if (connp->conn_rem != INADDR_ANY) { 1293 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1294 } else if (connp->conn_src != INADDR_ANY) { 1295 IPCL_HASH_INSERT_BOUND(connfp, connp); 1296 } else { 1297 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1298 } 1299 break; 1300 } 1301 1302 return (ret); 1303 } 1304 1305 int 1306 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1307 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1308 { 1309 connf_t *connfp; 1310 uint16_t *up; 1311 conn_t *tconnp; 1312 in_port_t lport; 1313 int ret = 0; 1314 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1315 1316 switch (protocol) { 1317 case IPPROTO_TCP: 1318 /* Just need to insert a conn struct */ 1319 if (!(connp->conn_flags & IPCL_EAGER)) { 1320 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1321 } 1322 connfp = &ipst->ips_ipcl_conn_fanout[ 1323 IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1324 ipst)]; 1325 mutex_enter(&connfp->connf_lock); 1326 for (tconnp = connfp->connf_head; tconnp != NULL; 1327 tconnp = tconnp->conn_next) { 1328 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1329 connp->conn_remv6, connp->conn_srcv6, 1330 connp->conn_ports) && 1331 (tconnp->conn_tcp->tcp_bound_if == 0 || 1332 tconnp->conn_tcp->tcp_bound_if == ifindex)) { 1333 /* Already have a conn. bail out */ 1334 mutex_exit(&connfp->connf_lock); 1335 return (EADDRINUSE); 1336 } 1337 } 1338 if (connp->conn_fanout != NULL) { 1339 /* 1340 * Probably a XTI/TLI application trying to do a 1341 * rebind. Let it happen. 1342 */ 1343 mutex_exit(&connfp->connf_lock); 1344 IPCL_HASH_REMOVE(connp); 1345 mutex_enter(&connfp->connf_lock); 1346 } 1347 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1348 mutex_exit(&connfp->connf_lock); 1349 break; 1350 1351 case IPPROTO_SCTP: 1352 IPCL_HASH_REMOVE(connp); 1353 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1354 ret = ipcl_sctp_hash_insert(connp, lport); 1355 break; 1356 1357 default: 1358 if (is_system_labeled() && 1359 check_exempt_conflict_v6(connp, ipst)) 1360 return (EADDRINUSE); 1361 /* FALLTHROUGH */ 1362 case IPPROTO_UDP: 1363 up = (uint16_t *)&ports; 1364 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1365 if (protocol == IPPROTO_UDP) { 1366 connfp = &ipst->ips_ipcl_udp_fanout[ 1367 IPCL_UDP_HASH(up[1], ipst)]; 1368 } else { 1369 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1370 } 1371 1372 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1373 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1374 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1375 IPCL_HASH_INSERT_BOUND(connfp, connp); 1376 } else { 1377 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1378 } 1379 break; 1380 } 1381 1382 return (ret); 1383 } 1384 1385 /* 1386 * v4 packet classifying function. looks up the fanout table to 1387 * find the conn, the packet belongs to. returns the conn with 1388 * the reference held, null otherwise. 1389 * 1390 * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1391 * Lookup" comment block are applied. Labels are also checked as described 1392 * above. If the packet is from the inside (looped back), and is from the same 1393 * zone, then label checks are omitted. 1394 */ 1395 conn_t * 1396 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1397 ip_stack_t *ipst) 1398 { 1399 ipha_t *ipha; 1400 connf_t *connfp, *bind_connfp; 1401 uint16_t lport; 1402 uint16_t fport; 1403 uint32_t ports; 1404 conn_t *connp; 1405 uint16_t *up; 1406 boolean_t shared_addr; 1407 boolean_t unlabeled; 1408 1409 ipha = (ipha_t *)mp->b_rptr; 1410 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1411 1412 switch (protocol) { 1413 case IPPROTO_TCP: 1414 ports = *(uint32_t *)up; 1415 connfp = 1416 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1417 ports, ipst)]; 1418 mutex_enter(&connfp->connf_lock); 1419 for (connp = connfp->connf_head; connp != NULL; 1420 connp = connp->conn_next) { 1421 if (IPCL_CONN_MATCH(connp, protocol, 1422 ipha->ipha_src, ipha->ipha_dst, ports)) 1423 break; 1424 } 1425 1426 if (connp != NULL) { 1427 /* 1428 * We have a fully-bound TCP connection. 1429 * 1430 * For labeled systems, there's no need to check the 1431 * label here. It's known to be good as we checked 1432 * before allowing the connection to become bound. 1433 */ 1434 CONN_INC_REF(connp); 1435 mutex_exit(&connfp->connf_lock); 1436 return (connp); 1437 } 1438 1439 mutex_exit(&connfp->connf_lock); 1440 1441 lport = up[1]; 1442 unlabeled = B_FALSE; 1443 /* Cred cannot be null on IPv4 */ 1444 if (is_system_labeled()) 1445 unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 1446 TSLF_UNLABELED) != 0; 1447 shared_addr = (zoneid == ALL_ZONES); 1448 if (shared_addr) { 1449 /* 1450 * No need to handle exclusive-stack zones since 1451 * ALL_ZONES only applies to the shared stack. 1452 */ 1453 zoneid = tsol_mlp_findzone(protocol, lport); 1454 /* 1455 * If no shared MLP is found, tsol_mlp_findzone returns 1456 * ALL_ZONES. In that case, we assume it's SLP, and 1457 * search for the zone based on the packet label. 1458 * 1459 * If there is such a zone, we prefer to find a 1460 * connection in it. Otherwise, we look for a 1461 * MAC-exempt connection in any zone whose label 1462 * dominates the default label on the packet. 1463 */ 1464 if (zoneid == ALL_ZONES) 1465 zoneid = tsol_packet_to_zoneid(mp); 1466 else 1467 unlabeled = B_FALSE; 1468 } 1469 1470 bind_connfp = 1471 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1472 mutex_enter(&bind_connfp->connf_lock); 1473 for (connp = bind_connfp->connf_head; connp != NULL; 1474 connp = connp->conn_next) { 1475 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1476 lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 1477 (unlabeled && connp->conn_mac_exempt))) 1478 break; 1479 } 1480 1481 /* 1482 * If the matching connection is SLP on a private address, then 1483 * the label on the packet must match the local zone's label. 1484 * Otherwise, it must be in the label range defined by tnrh. 1485 * This is ensured by tsol_receive_label. 1486 */ 1487 if (connp != NULL && is_system_labeled() && 1488 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1489 shared_addr, connp)) { 1490 DTRACE_PROBE3( 1491 tx__ip__log__info__classify__tcp, 1492 char *, 1493 "connp(1) could not receive mp(2)", 1494 conn_t *, connp, mblk_t *, mp); 1495 connp = NULL; 1496 } 1497 1498 if (connp != NULL) { 1499 /* Have a listener at least */ 1500 CONN_INC_REF(connp); 1501 mutex_exit(&bind_connfp->connf_lock); 1502 return (connp); 1503 } 1504 1505 mutex_exit(&bind_connfp->connf_lock); 1506 1507 IPCL_DEBUG_LVL(512, 1508 ("ipcl_classify: couldn't classify mp = %p\n", 1509 (void *)mp)); 1510 break; 1511 1512 case IPPROTO_UDP: 1513 lport = up[1]; 1514 unlabeled = B_FALSE; 1515 /* Cred cannot be null on IPv4 */ 1516 if (is_system_labeled()) 1517 unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 1518 TSLF_UNLABELED) != 0; 1519 shared_addr = (zoneid == ALL_ZONES); 1520 if (shared_addr) { 1521 /* 1522 * No need to handle exclusive-stack zones since 1523 * ALL_ZONES only applies to the shared stack. 1524 */ 1525 zoneid = tsol_mlp_findzone(protocol, lport); 1526 /* 1527 * If no shared MLP is found, tsol_mlp_findzone returns 1528 * ALL_ZONES. In that case, we assume it's SLP, and 1529 * search for the zone based on the packet label. 1530 * 1531 * If there is such a zone, we prefer to find a 1532 * connection in it. Otherwise, we look for a 1533 * MAC-exempt connection in any zone whose label 1534 * dominates the default label on the packet. 1535 */ 1536 if (zoneid == ALL_ZONES) 1537 zoneid = tsol_packet_to_zoneid(mp); 1538 else 1539 unlabeled = B_FALSE; 1540 } 1541 fport = up[0]; 1542 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1543 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1544 mutex_enter(&connfp->connf_lock); 1545 for (connp = connfp->connf_head; connp != NULL; 1546 connp = connp->conn_next) { 1547 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1548 fport, ipha->ipha_src) && 1549 (IPCL_ZONE_MATCH(connp, zoneid) || 1550 (unlabeled && connp->conn_mac_exempt))) 1551 break; 1552 } 1553 1554 if (connp != NULL && is_system_labeled() && 1555 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1556 shared_addr, connp)) { 1557 DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1558 char *, "connp(1) could not receive mp(2)", 1559 conn_t *, connp, mblk_t *, mp); 1560 connp = NULL; 1561 } 1562 1563 if (connp != NULL) { 1564 CONN_INC_REF(connp); 1565 mutex_exit(&connfp->connf_lock); 1566 return (connp); 1567 } 1568 1569 /* 1570 * We shouldn't come here for multicast/broadcast packets 1571 */ 1572 mutex_exit(&connfp->connf_lock); 1573 IPCL_DEBUG_LVL(512, 1574 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1575 lport, fport)); 1576 break; 1577 } 1578 1579 return (NULL); 1580 } 1581 1582 conn_t * 1583 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1584 ip_stack_t *ipst) 1585 { 1586 ip6_t *ip6h; 1587 connf_t *connfp, *bind_connfp; 1588 uint16_t lport; 1589 uint16_t fport; 1590 tcph_t *tcph; 1591 uint32_t ports; 1592 conn_t *connp; 1593 uint16_t *up; 1594 boolean_t shared_addr; 1595 boolean_t unlabeled; 1596 1597 ip6h = (ip6_t *)mp->b_rptr; 1598 1599 switch (protocol) { 1600 case IPPROTO_TCP: 1601 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1602 up = (uint16_t *)tcph->th_lport; 1603 ports = *(uint32_t *)up; 1604 1605 connfp = 1606 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1607 ports, ipst)]; 1608 mutex_enter(&connfp->connf_lock); 1609 for (connp = connfp->connf_head; connp != NULL; 1610 connp = connp->conn_next) { 1611 if (IPCL_CONN_MATCH_V6(connp, protocol, 1612 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1613 break; 1614 } 1615 1616 if (connp != NULL) { 1617 /* 1618 * We have a fully-bound TCP connection. 1619 * 1620 * For labeled systems, there's no need to check the 1621 * label here. It's known to be good as we checked 1622 * before allowing the connection to become bound. 1623 */ 1624 CONN_INC_REF(connp); 1625 mutex_exit(&connfp->connf_lock); 1626 return (connp); 1627 } 1628 1629 mutex_exit(&connfp->connf_lock); 1630 1631 lport = up[1]; 1632 unlabeled = B_FALSE; 1633 /* Cred can be null on IPv6 */ 1634 if (is_system_labeled()) { 1635 cred_t *cr = DB_CRED(mp); 1636 1637 unlabeled = (cr != NULL && 1638 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1639 } 1640 shared_addr = (zoneid == ALL_ZONES); 1641 if (shared_addr) { 1642 /* 1643 * No need to handle exclusive-stack zones since 1644 * ALL_ZONES only applies to the shared stack. 1645 */ 1646 zoneid = tsol_mlp_findzone(protocol, lport); 1647 /* 1648 * If no shared MLP is found, tsol_mlp_findzone returns 1649 * ALL_ZONES. In that case, we assume it's SLP, and 1650 * search for the zone based on the packet label. 1651 * 1652 * If there is such a zone, we prefer to find a 1653 * connection in it. Otherwise, we look for a 1654 * MAC-exempt connection in any zone whose label 1655 * dominates the default label on the packet. 1656 */ 1657 if (zoneid == ALL_ZONES) 1658 zoneid = tsol_packet_to_zoneid(mp); 1659 else 1660 unlabeled = B_FALSE; 1661 } 1662 1663 bind_connfp = 1664 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1665 mutex_enter(&bind_connfp->connf_lock); 1666 for (connp = bind_connfp->connf_head; connp != NULL; 1667 connp = connp->conn_next) { 1668 if (IPCL_BIND_MATCH_V6(connp, protocol, 1669 ip6h->ip6_dst, lport) && 1670 (IPCL_ZONE_MATCH(connp, zoneid) || 1671 (unlabeled && connp->conn_mac_exempt))) 1672 break; 1673 } 1674 1675 if (connp != NULL && is_system_labeled() && 1676 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1677 shared_addr, connp)) { 1678 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1679 char *, "connp(1) could not receive mp(2)", 1680 conn_t *, connp, mblk_t *, mp); 1681 connp = NULL; 1682 } 1683 1684 if (connp != NULL) { 1685 /* Have a listner at least */ 1686 CONN_INC_REF(connp); 1687 mutex_exit(&bind_connfp->connf_lock); 1688 IPCL_DEBUG_LVL(512, 1689 ("ipcl_classify_v6: found listner " 1690 "connp = %p\n", (void *)connp)); 1691 1692 return (connp); 1693 } 1694 1695 mutex_exit(&bind_connfp->connf_lock); 1696 1697 IPCL_DEBUG_LVL(512, 1698 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1699 (void *)mp)); 1700 break; 1701 1702 case IPPROTO_UDP: 1703 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1704 lport = up[1]; 1705 unlabeled = B_FALSE; 1706 /* Cred can be null on IPv6 */ 1707 if (is_system_labeled()) { 1708 cred_t *cr = DB_CRED(mp); 1709 1710 unlabeled = (cr != NULL && 1711 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1712 } 1713 shared_addr = (zoneid == ALL_ZONES); 1714 if (shared_addr) { 1715 /* 1716 * No need to handle exclusive-stack zones since 1717 * ALL_ZONES only applies to the shared stack. 1718 */ 1719 zoneid = tsol_mlp_findzone(protocol, lport); 1720 /* 1721 * If no shared MLP is found, tsol_mlp_findzone returns 1722 * ALL_ZONES. In that case, we assume it's SLP, and 1723 * search for the zone based on the packet label. 1724 * 1725 * If there is such a zone, we prefer to find a 1726 * connection in it. Otherwise, we look for a 1727 * MAC-exempt connection in any zone whose label 1728 * dominates the default label on the packet. 1729 */ 1730 if (zoneid == ALL_ZONES) 1731 zoneid = tsol_packet_to_zoneid(mp); 1732 else 1733 unlabeled = B_FALSE; 1734 } 1735 1736 fport = up[0]; 1737 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 1738 fport)); 1739 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1740 mutex_enter(&connfp->connf_lock); 1741 for (connp = connfp->connf_head; connp != NULL; 1742 connp = connp->conn_next) { 1743 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1744 fport, ip6h->ip6_src) && 1745 (IPCL_ZONE_MATCH(connp, zoneid) || 1746 (unlabeled && connp->conn_mac_exempt))) 1747 break; 1748 } 1749 1750 if (connp != NULL && is_system_labeled() && 1751 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1752 shared_addr, connp)) { 1753 DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 1754 char *, "connp(1) could not receive mp(2)", 1755 conn_t *, connp, mblk_t *, mp); 1756 connp = NULL; 1757 } 1758 1759 if (connp != NULL) { 1760 CONN_INC_REF(connp); 1761 mutex_exit(&connfp->connf_lock); 1762 return (connp); 1763 } 1764 1765 /* 1766 * We shouldn't come here for multicast/broadcast packets 1767 */ 1768 mutex_exit(&connfp->connf_lock); 1769 IPCL_DEBUG_LVL(512, 1770 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 1771 lport, fport)); 1772 break; 1773 } 1774 1775 return (NULL); 1776 } 1777 1778 /* 1779 * wrapper around ipcl_classify_(v4,v6) routines. 1780 */ 1781 conn_t * 1782 ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 1783 { 1784 uint16_t hdr_len; 1785 ipha_t *ipha; 1786 uint8_t *nexthdrp; 1787 1788 if (MBLKL(mp) < sizeof (ipha_t)) 1789 return (NULL); 1790 1791 switch (IPH_HDR_VERSION(mp->b_rptr)) { 1792 case IPV4_VERSION: 1793 ipha = (ipha_t *)mp->b_rptr; 1794 hdr_len = IPH_HDR_LENGTH(ipha); 1795 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1796 zoneid, ipst)); 1797 case IPV6_VERSION: 1798 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 1799 &hdr_len, &nexthdrp)) 1800 return (NULL); 1801 1802 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 1803 } 1804 1805 return (NULL); 1806 } 1807 1808 conn_t * 1809 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 1810 uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 1811 { 1812 connf_t *connfp; 1813 conn_t *connp; 1814 in_port_t lport; 1815 int af; 1816 boolean_t shared_addr; 1817 boolean_t unlabeled; 1818 const void *dst; 1819 1820 lport = ((uint16_t *)&ports)[1]; 1821 1822 unlabeled = B_FALSE; 1823 /* Cred can be null on IPv6 */ 1824 if (is_system_labeled()) { 1825 cred_t *cr = DB_CRED(mp); 1826 1827 unlabeled = (cr != NULL && 1828 crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 1829 } 1830 shared_addr = (zoneid == ALL_ZONES); 1831 if (shared_addr) { 1832 /* 1833 * No need to handle exclusive-stack zones since ALL_ZONES 1834 * only applies to the shared stack. 1835 */ 1836 zoneid = tsol_mlp_findzone(protocol, lport); 1837 /* 1838 * If no shared MLP is found, tsol_mlp_findzone returns 1839 * ALL_ZONES. In that case, we assume it's SLP, and search for 1840 * the zone based on the packet label. 1841 * 1842 * If there is such a zone, we prefer to find a connection in 1843 * it. Otherwise, we look for a MAC-exempt connection in any 1844 * zone whose label dominates the default label on the packet. 1845 */ 1846 if (zoneid == ALL_ZONES) 1847 zoneid = tsol_packet_to_zoneid(mp); 1848 else 1849 unlabeled = B_FALSE; 1850 } 1851 1852 af = IPH_HDR_VERSION(hdr); 1853 dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 1854 (const void *)&((ip6_t *)hdr)->ip6_dst; 1855 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1856 1857 mutex_enter(&connfp->connf_lock); 1858 for (connp = connfp->connf_head; connp != NULL; 1859 connp = connp->conn_next) { 1860 /* We don't allow v4 fallback for v6 raw socket. */ 1861 if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 1862 IPV6_VERSION)) 1863 continue; 1864 if (connp->conn_fully_bound) { 1865 if (af == IPV4_VERSION) { 1866 if (!IPCL_CONN_MATCH(connp, protocol, 1867 hdr->ipha_src, hdr->ipha_dst, ports)) 1868 continue; 1869 } else { 1870 if (!IPCL_CONN_MATCH_V6(connp, protocol, 1871 ((ip6_t *)hdr)->ip6_src, 1872 ((ip6_t *)hdr)->ip6_dst, ports)) 1873 continue; 1874 } 1875 } else { 1876 if (af == IPV4_VERSION) { 1877 if (!IPCL_BIND_MATCH(connp, protocol, 1878 hdr->ipha_dst, lport)) 1879 continue; 1880 } else { 1881 if (!IPCL_BIND_MATCH_V6(connp, protocol, 1882 ((ip6_t *)hdr)->ip6_dst, lport)) 1883 continue; 1884 } 1885 } 1886 1887 if (IPCL_ZONE_MATCH(connp, zoneid) || 1888 (unlabeled && connp->conn_mac_exempt)) 1889 break; 1890 } 1891 /* 1892 * If the connection is fully-bound and connection-oriented (TCP or 1893 * SCTP), then we've already validated the remote system's label. 1894 * There's no need to do it again for every packet. 1895 */ 1896 if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 1897 !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 1898 !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 1899 DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 1900 char *, "connp(1) could not receive mp(2)", 1901 conn_t *, connp, mblk_t *, mp); 1902 connp = NULL; 1903 } 1904 1905 if (connp != NULL) 1906 goto found; 1907 mutex_exit(&connfp->connf_lock); 1908 1909 /* Try to look for a wildcard match. */ 1910 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 1911 mutex_enter(&connfp->connf_lock); 1912 for (connp = connfp->connf_head; connp != NULL; 1913 connp = connp->conn_next) { 1914 /* We don't allow v4 fallback for v6 raw socket. */ 1915 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1916 IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 1917 continue; 1918 } 1919 if (af == IPV4_VERSION) { 1920 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1921 break; 1922 } else { 1923 if (IPCL_RAW_MATCH_V6(connp, protocol, 1924 ((ip6_t *)hdr)->ip6_dst)) { 1925 break; 1926 } 1927 } 1928 } 1929 1930 if (connp != NULL) 1931 goto found; 1932 1933 mutex_exit(&connfp->connf_lock); 1934 return (NULL); 1935 1936 found: 1937 ASSERT(connp != NULL); 1938 CONN_INC_REF(connp); 1939 mutex_exit(&connfp->connf_lock); 1940 return (connp); 1941 } 1942 1943 /* ARGSUSED */ 1944 static int 1945 ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 1946 { 1947 itc_t *itc = (itc_t *)buf; 1948 conn_t *connp = &itc->itc_conn; 1949 tcp_t *tcp = &itc->itc_tcp; 1950 bzero(itc, sizeof (itc_t)); 1951 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 1952 connp->conn_tcp = tcp; 1953 connp->conn_flags = IPCL_TCPCONN; 1954 connp->conn_ulp = IPPROTO_TCP; 1955 tcp->tcp_connp = connp; 1956 return (0); 1957 } 1958 1959 /* ARGSUSED */ 1960 static void 1961 ipcl_tcpconn_destructor(void *buf, void *cdrarg) 1962 { 1963 tcp_timermp_free(((conn_t *)buf)->conn_tcp); 1964 } 1965 1966 /* 1967 * All conns are inserted in a global multi-list for the benefit of 1968 * walkers. The walk is guaranteed to walk all open conns at the time 1969 * of the start of the walk exactly once. This property is needed to 1970 * achieve some cleanups during unplumb of interfaces. This is achieved 1971 * as follows. 1972 * 1973 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 1974 * call the insert and delete functions below at creation and deletion 1975 * time respectively. The conn never moves or changes its position in this 1976 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 1977 * won't increase due to walkers, once the conn deletion has started. Note 1978 * that we can't remove the conn from the global list and then wait for 1979 * the refcnt to drop to zero, since walkers would then see a truncated 1980 * list. CONN_INCIPIENT ensures that walkers don't start looking at 1981 * conns until ip_open is ready to make them globally visible. 1982 * The global round robin multi-list locks are held only to get the 1983 * next member/insertion/deletion and contention should be negligible 1984 * if the multi-list is much greater than the number of cpus. 1985 */ 1986 void 1987 ipcl_globalhash_insert(conn_t *connp) 1988 { 1989 int index; 1990 struct connf_s *connfp; 1991 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1992 1993 /* 1994 * No need for atomic here. Approximate even distribution 1995 * in the global lists is sufficient. 1996 */ 1997 ipst->ips_conn_g_index++; 1998 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 1999 2000 connp->conn_g_prev = NULL; 2001 /* 2002 * Mark as INCIPIENT, so that walkers will ignore this 2003 * for now, till ip_open is ready to make it visible globally. 2004 */ 2005 connp->conn_state_flags |= CONN_INCIPIENT; 2006 2007 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2008 /* Insert at the head of the list */ 2009 mutex_enter(&connfp->connf_lock); 2010 connp->conn_g_next = connfp->connf_head; 2011 if (connp->conn_g_next != NULL) 2012 connp->conn_g_next->conn_g_prev = connp; 2013 connfp->connf_head = connp; 2014 2015 /* The fanout bucket this conn points to */ 2016 connp->conn_g_fanout = connfp; 2017 2018 mutex_exit(&connfp->connf_lock); 2019 } 2020 2021 void 2022 ipcl_globalhash_remove(conn_t *connp) 2023 { 2024 struct connf_s *connfp; 2025 2026 /* 2027 * We were never inserted in the global multi list. 2028 * IPCL_NONE variety is never inserted in the global multilist 2029 * since it is presumed to not need any cleanup and is transient. 2030 */ 2031 if (connp->conn_g_fanout == NULL) 2032 return; 2033 2034 connfp = connp->conn_g_fanout; 2035 mutex_enter(&connfp->connf_lock); 2036 if (connp->conn_g_prev != NULL) 2037 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2038 else 2039 connfp->connf_head = connp->conn_g_next; 2040 if (connp->conn_g_next != NULL) 2041 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2042 mutex_exit(&connfp->connf_lock); 2043 2044 /* Better to stumble on a null pointer than to corrupt memory */ 2045 connp->conn_g_next = NULL; 2046 connp->conn_g_prev = NULL; 2047 } 2048 2049 /* 2050 * Walk the list of all conn_t's in the system, calling the function provided 2051 * with the specified argument for each. 2052 * Applies to both IPv4 and IPv6. 2053 * 2054 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 2055 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2056 * unplumbed or removed. New conn_t's that are created while we are walking 2057 * may be missed by this walk, because they are not necessarily inserted 2058 * at the tail of the list. They are new conn_t's and thus don't have any 2059 * stale pointers. The CONN_CLOSING flag ensures that no new reference 2060 * is created to the struct that is going away. 2061 */ 2062 void 2063 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2064 { 2065 int i; 2066 conn_t *connp; 2067 conn_t *prev_connp; 2068 2069 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2070 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2071 prev_connp = NULL; 2072 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2073 while (connp != NULL) { 2074 mutex_enter(&connp->conn_lock); 2075 if (connp->conn_state_flags & 2076 (CONN_CONDEMNED | CONN_INCIPIENT)) { 2077 mutex_exit(&connp->conn_lock); 2078 connp = connp->conn_g_next; 2079 continue; 2080 } 2081 CONN_INC_REF_LOCKED(connp); 2082 mutex_exit(&connp->conn_lock); 2083 mutex_exit( 2084 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2085 (*func)(connp, arg); 2086 if (prev_connp != NULL) 2087 CONN_DEC_REF(prev_connp); 2088 mutex_enter( 2089 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2090 prev_connp = connp; 2091 connp = connp->conn_g_next; 2092 } 2093 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2094 if (prev_connp != NULL) 2095 CONN_DEC_REF(prev_connp); 2096 } 2097 } 2098 2099 /* 2100 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2101 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2102 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2103 * (peer tcp in ESTABLISHED state). 2104 */ 2105 conn_t * 2106 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2107 ip_stack_t *ipst) 2108 { 2109 uint32_t ports; 2110 uint16_t *pports = (uint16_t *)&ports; 2111 connf_t *connfp; 2112 conn_t *tconnp; 2113 boolean_t zone_chk; 2114 2115 /* 2116 * If either the source of destination address is loopback, then 2117 * both endpoints must be in the same Zone. Otherwise, both of 2118 * the addresses are system-wide unique (tcp is in ESTABLISHED 2119 * state) and the endpoints may reside in different Zones. 2120 */ 2121 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2122 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2123 2124 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2125 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2126 2127 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2128 ports, ipst)]; 2129 2130 mutex_enter(&connfp->connf_lock); 2131 for (tconnp = connfp->connf_head; tconnp != NULL; 2132 tconnp = tconnp->conn_next) { 2133 2134 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2135 ipha->ipha_dst, ipha->ipha_src, ports) && 2136 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2137 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2138 2139 ASSERT(tconnp != connp); 2140 CONN_INC_REF(tconnp); 2141 mutex_exit(&connfp->connf_lock); 2142 return (tconnp); 2143 } 2144 } 2145 mutex_exit(&connfp->connf_lock); 2146 return (NULL); 2147 } 2148 2149 /* 2150 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2151 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2152 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2153 * (peer tcp in ESTABLISHED state). 2154 */ 2155 conn_t * 2156 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2157 ip_stack_t *ipst) 2158 { 2159 uint32_t ports; 2160 uint16_t *pports = (uint16_t *)&ports; 2161 connf_t *connfp; 2162 conn_t *tconnp; 2163 boolean_t zone_chk; 2164 2165 /* 2166 * If either the source of destination address is loopback, then 2167 * both endpoints must be in the same Zone. Otherwise, both of 2168 * the addresses are system-wide unique (tcp is in ESTABLISHED 2169 * state) and the endpoints may reside in different Zones. We 2170 * don't do Zone check for link local address(es) because the 2171 * current Zone implementation treats each link local address as 2172 * being unique per system node, i.e. they belong to global Zone. 2173 */ 2174 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2175 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2176 2177 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2178 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2179 2180 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2181 ports, ipst)]; 2182 2183 mutex_enter(&connfp->connf_lock); 2184 for (tconnp = connfp->connf_head; tconnp != NULL; 2185 tconnp = tconnp->conn_next) { 2186 2187 /* We skip tcp_bound_if check here as this is loopback tcp */ 2188 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2189 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2190 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2191 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2192 2193 ASSERT(tconnp != connp); 2194 CONN_INC_REF(tconnp); 2195 mutex_exit(&connfp->connf_lock); 2196 return (tconnp); 2197 } 2198 } 2199 mutex_exit(&connfp->connf_lock); 2200 return (NULL); 2201 } 2202 2203 /* 2204 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2205 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2206 * Only checks for connected entries i.e. no INADDR_ANY checks. 2207 */ 2208 conn_t * 2209 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2210 ip_stack_t *ipst) 2211 { 2212 uint32_t ports; 2213 uint16_t *pports; 2214 connf_t *connfp; 2215 conn_t *tconnp; 2216 2217 pports = (uint16_t *)&ports; 2218 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 2219 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 2220 2221 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2222 ports, ipst)]; 2223 2224 mutex_enter(&connfp->connf_lock); 2225 for (tconnp = connfp->connf_head; tconnp != NULL; 2226 tconnp = tconnp->conn_next) { 2227 2228 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2229 ipha->ipha_dst, ipha->ipha_src, ports) && 2230 tconnp->conn_tcp->tcp_state >= min_state) { 2231 2232 CONN_INC_REF(tconnp); 2233 mutex_exit(&connfp->connf_lock); 2234 return (tconnp); 2235 } 2236 } 2237 mutex_exit(&connfp->connf_lock); 2238 return (NULL); 2239 } 2240 2241 /* 2242 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2243 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2244 * Only checks for connected entries i.e. no INADDR_ANY checks. 2245 * Match on ifindex in addition to addresses. 2246 */ 2247 conn_t * 2248 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2249 uint_t ifindex, ip_stack_t *ipst) 2250 { 2251 tcp_t *tcp; 2252 uint32_t ports; 2253 uint16_t *pports; 2254 connf_t *connfp; 2255 conn_t *tconnp; 2256 2257 pports = (uint16_t *)&ports; 2258 pports[0] = tcpha->tha_fport; 2259 pports[1] = tcpha->tha_lport; 2260 2261 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2262 ports, ipst)]; 2263 2264 mutex_enter(&connfp->connf_lock); 2265 for (tconnp = connfp->connf_head; tconnp != NULL; 2266 tconnp = tconnp->conn_next) { 2267 2268 tcp = tconnp->conn_tcp; 2269 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2270 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2271 tcp->tcp_state >= min_state && 2272 (tcp->tcp_bound_if == 0 || 2273 tcp->tcp_bound_if == ifindex)) { 2274 2275 CONN_INC_REF(tconnp); 2276 mutex_exit(&connfp->connf_lock); 2277 return (tconnp); 2278 } 2279 } 2280 mutex_exit(&connfp->connf_lock); 2281 return (NULL); 2282 } 2283 2284 /* 2285 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2286 * a listener when changing state. 2287 */ 2288 conn_t * 2289 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2290 ip_stack_t *ipst) 2291 { 2292 connf_t *bind_connfp; 2293 conn_t *connp; 2294 tcp_t *tcp; 2295 2296 /* 2297 * Avoid false matches for packets sent to an IP destination of 2298 * all zeros. 2299 */ 2300 if (laddr == 0) 2301 return (NULL); 2302 2303 ASSERT(zoneid != ALL_ZONES); 2304 2305 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2306 mutex_enter(&bind_connfp->connf_lock); 2307 for (connp = bind_connfp->connf_head; connp != NULL; 2308 connp = connp->conn_next) { 2309 tcp = connp->conn_tcp; 2310 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2311 IPCL_ZONE_MATCH(connp, zoneid) && 2312 (tcp->tcp_listener == NULL)) { 2313 CONN_INC_REF(connp); 2314 mutex_exit(&bind_connfp->connf_lock); 2315 return (connp); 2316 } 2317 } 2318 mutex_exit(&bind_connfp->connf_lock); 2319 return (NULL); 2320 } 2321 2322 /* 2323 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2324 * a listener when changing state. 2325 */ 2326 conn_t * 2327 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2328 zoneid_t zoneid, ip_stack_t *ipst) 2329 { 2330 connf_t *bind_connfp; 2331 conn_t *connp = NULL; 2332 tcp_t *tcp; 2333 2334 /* 2335 * Avoid false matches for packets sent to an IP destination of 2336 * all zeros. 2337 */ 2338 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2339 return (NULL); 2340 2341 ASSERT(zoneid != ALL_ZONES); 2342 2343 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2344 mutex_enter(&bind_connfp->connf_lock); 2345 for (connp = bind_connfp->connf_head; connp != NULL; 2346 connp = connp->conn_next) { 2347 tcp = connp->conn_tcp; 2348 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2349 IPCL_ZONE_MATCH(connp, zoneid) && 2350 (tcp->tcp_bound_if == 0 || 2351 tcp->tcp_bound_if == ifindex) && 2352 tcp->tcp_listener == NULL) { 2353 CONN_INC_REF(connp); 2354 mutex_exit(&bind_connfp->connf_lock); 2355 return (connp); 2356 } 2357 } 2358 mutex_exit(&bind_connfp->connf_lock); 2359 return (NULL); 2360 } 2361 2362 /* 2363 * ipcl_get_next_conn 2364 * get the next entry in the conn global list 2365 * and put a reference on the next_conn. 2366 * decrement the reference on the current conn. 2367 * 2368 * This is an iterator based walker function that also provides for 2369 * some selection by the caller. It walks through the conn_hash bucket 2370 * searching for the next valid connp in the list, and selects connections 2371 * that are neither closed nor condemned. It also REFHOLDS the conn 2372 * thus ensuring that the conn exists when the caller uses the conn. 2373 */ 2374 conn_t * 2375 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2376 { 2377 conn_t *next_connp; 2378 2379 if (connfp == NULL) 2380 return (NULL); 2381 2382 mutex_enter(&connfp->connf_lock); 2383 2384 next_connp = (connp == NULL) ? 2385 connfp->connf_head : connp->conn_g_next; 2386 2387 while (next_connp != NULL) { 2388 mutex_enter(&next_connp->conn_lock); 2389 if (!(next_connp->conn_flags & conn_flags) || 2390 (next_connp->conn_state_flags & 2391 (CONN_CONDEMNED | CONN_INCIPIENT))) { 2392 /* 2393 * This conn has been condemned or 2394 * is closing, or the flags don't match 2395 */ 2396 mutex_exit(&next_connp->conn_lock); 2397 next_connp = next_connp->conn_g_next; 2398 continue; 2399 } 2400 CONN_INC_REF_LOCKED(next_connp); 2401 mutex_exit(&next_connp->conn_lock); 2402 break; 2403 } 2404 2405 mutex_exit(&connfp->connf_lock); 2406 2407 if (connp != NULL) 2408 CONN_DEC_REF(connp); 2409 2410 return (next_connp); 2411 } 2412 2413 #ifdef CONN_DEBUG 2414 /* 2415 * Trace of the last NBUF refhold/refrele 2416 */ 2417 int 2418 conn_trace_ref(conn_t *connp) 2419 { 2420 int last; 2421 conn_trace_t *ctb; 2422 2423 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2424 last = connp->conn_trace_last; 2425 last++; 2426 if (last == CONN_TRACE_MAX) 2427 last = 0; 2428 2429 ctb = &connp->conn_trace_buf[last]; 2430 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 2431 connp->conn_trace_last = last; 2432 return (1); 2433 } 2434 2435 int 2436 conn_untrace_ref(conn_t *connp) 2437 { 2438 int last; 2439 conn_trace_t *ctb; 2440 2441 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2442 last = connp->conn_trace_last; 2443 last++; 2444 if (last == CONN_TRACE_MAX) 2445 last = 0; 2446 2447 ctb = &connp->conn_trace_buf[last]; 2448 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 2449 connp->conn_trace_last = last; 2450 return (1); 2451 } 2452 #endif 2453