1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char ipclassifier_version[] = "@(#)ipclassifier.c 1.6 04/03/31 SMI"; 30 31 /* 32 * IP PACKET CLASSIFIER 33 * 34 * The IP packet classifier provides mapping between IP packets and persistent 35 * connection state for connection-oriented protocols. It also provides 36 * interface for managing connection states. 37 * 38 * The connection state is kept in conn_t data structure and contains, among 39 * other things: 40 * 41 * o local/remote address and ports 42 * o Transport protocol 43 * o squeue for the connection (for TCP only) 44 * o reference counter 45 * o Connection state 46 * o hash table linkage 47 * o interface/ire information 48 * o credentials 49 * o ipsec policy 50 * o send and receive functions. 51 * o mutex lock. 52 * 53 * Connections use a reference counting scheme. They are freed when the 54 * reference counter drops to zero. A reference is incremented when connection 55 * is placed in a list or table, when incoming packet for the connection arrives 56 * and when connection is processed via squeue (squeue processing may be 57 * asynchronous and the reference protects the connection from being destroyed 58 * before its processing is finished). 59 * 60 * send and receive functions are currently used for TCP only. The send function 61 * determines the IP entry point for the packet once it leaves TCP to be sent to 62 * the destination address. The receive function is used by IP when the packet 63 * should be passed for TCP processing. When a new connection is created these 64 * are set to ip_output() and tcp_input() respectively. During the lifetime of 65 * the connection the send and receive functions may change depending on the 66 * changes in the connection state. For example, Once the connection is bound to 67 * an addresse, the receive function for this connection is set to 68 * tcp_conn_request(). This allows incoming SYNs to go directly into the 69 * listener SYN processing function without going to tcp_input() first. 70 * 71 * Classifier uses several hash tables: 72 * 73 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 74 * ipcl_bind_fanout: contains all connections in BOUND state 75 * ipcl_proto_fanout: IPv4 protocol fanout 76 * ipcl_proto_fanout_v6: IPv6 protocol fanout 77 * ipcl_udp_fanout: contains all UDP connections 78 * ipcl_globalhash_fanout: contains all connections 79 * 80 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 81 * which need to view all existing connections. 82 * 83 * All tables are protected by per-bucket locks. When both per-bucket lock and 84 * connection lock need to be held, the per-bucket lock should be acquired 85 * first, followed by the connection lock. 86 * 87 * All functions doing search in one of these tables increment a reference 88 * counter on the connection found (if any). This reference should be dropped 89 * when the caller has finished processing the connection. 90 * 91 * 92 * INTERFACES: 93 * =========== 94 * 95 * Connection Lookup: 96 * ------------------ 97 * 98 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) 99 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) 100 * 101 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 102 * it can't find any associated connection. If the connection is found, its 103 * reference counter is incremented. 104 * 105 * mp: mblock, containing packet header. The full header should fit 106 * into a single mblock. It should also contain at least full IP 107 * and TCP or UDP header. 108 * 109 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 110 * 111 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 112 * the packet. 113 * 114 * zoneid: The zone in which the returned connection must be. 115 * 116 * For TCP connections, the lookup order is as follows: 117 * 5-tuple {src, dst, protocol, local port, remote port} 118 * lookup in ipcl_conn_fanout table. 119 * 3-tuple {dst, remote port, protocol} lookup in 120 * ipcl_bind_fanout table. 121 * 122 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 123 * remote port} lookup is done on ipcl_udp_fanout. Note that, 124 * these interfaces do not handle cases where a packets belongs 125 * to multiple UDP clients, which is handled in IP itself. 126 * 127 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); 128 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); 129 * 130 * Lookup routine to find a exact match for {src, dst, local port, 131 * remote port) for TCP connections in ipcl_conn_fanout. The address and 132 * ports are read from the IP and TCP header respectively. 133 * 134 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); 135 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); 136 * 137 * Lookup routine to find a listener with the tuple {lport, laddr, 138 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 139 * parameter interface index is also compared. 140 * 141 * void ipcl_walk(func, arg) 142 * 143 * Apply 'func' to every connection available. The 'func' is called as 144 * (*func)(connp, arg). The walk is non-atomic so connections may be 145 * created and destroyed during the walk. The CONN_CONDEMNED and 146 * CONN_INCIPIENT flags ensure that connections which are newly created 147 * or being destroyed are not selected by the walker. 148 * 149 * Table Updates 150 * ------------- 151 * 152 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 153 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 154 * 155 * Insert 'connp' in the ipcl_conn_fanout. 156 * Arguements : 157 * connp conn_t to be inserted 158 * protocol connection protocol 159 * src source address 160 * dst destination address 161 * ports local and remote port 162 * ifindex interface index for IPv6 connections 163 * 164 * Return value : 165 * 0 if connp was inserted 166 * EADDRINUSE if the connection with the same tuple 167 * already exists. 168 * 169 * int ipcl_bind_insert(connp, protocol, src, lport); 170 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 171 * 172 * Insert 'connp' in ipcl_bind_fanout. 173 * Arguements : 174 * connp conn_t to be inserted 175 * protocol connection protocol 176 * src source address connection wants 177 * to bind to 178 * lport local port connection wants to 179 * bind to 180 * 181 * 182 * void ipcl_hash_remove(connp); 183 * 184 * Removes the 'connp' from the connection fanout table. 185 * 186 * Connection Creation/Destruction 187 * ------------------------------- 188 * 189 * conn_t *ipcl_conn_create(type, sleep) 190 * 191 * Creates a new conn based on the type flag, inserts it into 192 * globalhash table. 193 * 194 * type: This flag determines the type of conn_t which needs to be 195 * created. 196 * IPCL_TCPCONN indicates a TCP connection 197 * IPCL_IPCONN indicates all non-TCP connections. 198 * 199 * void ipcl_conn_destroy(connp) 200 * 201 * Destroys the connection state, removes it from the global 202 * connection hash table and frees its memory. 203 */ 204 205 #include <sys/types.h> 206 #include <sys/stream.h> 207 #include <sys/dlpi.h> 208 #include <sys/stropts.h> 209 #include <sys/sysmacros.h> 210 #include <sys/strsubr.h> 211 #include <sys/strlog.h> 212 #include <sys/strsun.h> 213 #define _SUN_TPI_VERSION 2 214 #include <sys/ddi.h> 215 #include <sys/cmn_err.h> 216 #include <sys/debug.h> 217 218 #include <sys/systm.h> 219 #include <sys/param.h> 220 #include <sys/kmem.h> 221 #include <sys/isa_defs.h> 222 #include <inet/common.h> 223 #include <netinet/ip6.h> 224 #include <netinet/icmp6.h> 225 226 #include <inet/ip.h> 227 #include <inet/ip6.h> 228 #include <inet/tcp.h> 229 #include <inet/tcp_trace.h> 230 #include <inet/ip_multi.h> 231 #include <inet/ip_if.h> 232 #include <inet/ip_ire.h> 233 #include <inet/ip_rts.h> 234 #include <inet/optcom.h> 235 #include <inet/ip_ndp.h> 236 #include <inet/udp_impl.h> 237 #include <inet/sctp_ip.h> 238 239 #include <sys/ethernet.h> 240 #include <net/if_types.h> 241 #include <sys/cpuvar.h> 242 243 #include <inet/mi.h> 244 #include <inet/ipclassifier.h> 245 #include <inet/ipsec_impl.h> 246 247 #ifdef DEBUG 248 #define IPCL_DEBUG 249 #else 250 #undef IPCL_DEBUG 251 #endif 252 253 #ifdef IPCL_DEBUG 254 int ipcl_debug_level = 0; 255 #define IPCL_DEBUG_LVL(level, args) \ 256 if (ipcl_debug_level & level) { printf args; } 257 #else 258 #define IPCL_DEBUG_LVL(level, args) {; } 259 #endif 260 connf_t *ipcl_conn_fanout; 261 connf_t *ipcl_bind_fanout; 262 connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; 263 connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; 264 connf_t *ipcl_udp_fanout; 265 266 /* A separate hash list for raw socket. */ 267 connf_t *ipcl_raw_fanout; 268 269 connf_t rts_clients; 270 271 /* Old value for compatibility */ 272 uint_t tcp_conn_hash_size = 0; 273 274 /* New value. Zero means choose automatically. */ 275 uint_t ipcl_conn_hash_size = 0; 276 uint_t ipcl_conn_hash_memfactor = 8192; 277 uint_t ipcl_conn_hash_maxsize = 82500; 278 279 uint_t ipcl_conn_fanout_size = 0; 280 281 282 /* bind/udp fanout table size */ 283 uint_t ipcl_bind_fanout_size = 512; 284 uint_t ipcl_udp_fanout_size = 256; 285 286 /* Raw socket fanout size. Must be a power of 2. */ 287 uint_t ipcl_raw_fanout_size = 256; 288 289 /* 290 * Power of 2^N Primes useful for hashing for N of 0-28, 291 * these primes are the nearest prime <= 2^N - 2^(N-2). 292 */ 293 294 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 295 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 296 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 297 50331599, 100663291, 201326557, 0} 298 299 /* 300 * wrapper structure to ensure that conn+tcpb are aligned 301 * on cache lines. 302 */ 303 typedef struct itc_s { 304 union { 305 conn_t itcu_conn; 306 char itcu_filler[CACHE_ALIGN(conn_s)]; 307 } itc_u; 308 tcp_t itc_tcp; 309 } itc_t; 310 311 #define itc_conn itc_u.itcu_conn 312 313 struct kmem_cache *ipcl_tcpconn_cache; 314 struct kmem_cache *ipcl_tcp_cache; 315 struct kmem_cache *ipcl_conn_cache; 316 extern struct kmem_cache *sctp_conn_cache; 317 extern struct kmem_cache *tcp_sack_info_cache; 318 extern struct kmem_cache *tcp_iphc_cache; 319 320 extern void tcp_timermp_free(tcp_t *); 321 extern mblk_t *tcp_timermp_alloc(int); 322 323 static int ipcl_tcpconn_constructor(void *, void *, int); 324 static void ipcl_tcpconn_destructor(void *, void *); 325 326 static int conn_g_index; 327 connf_t *ipcl_globalhash_fanout; 328 329 #ifdef IPCL_DEBUG 330 #define INET_NTOA_BUFSIZE 18 331 332 static char * 333 inet_ntoa_r(uint32_t in, char *b) 334 { 335 unsigned char *p; 336 337 p = (unsigned char *)∈ 338 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 339 return (b); 340 } 341 #endif 342 343 /* 344 * ipclassifier intialization routine, sets up hash tables and 345 * conn caches. 346 */ 347 void 348 ipcl_init(void) 349 { 350 int i; 351 int sizes[] = P2Ps(); 352 353 ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 354 sizeof (conn_t), CACHE_ALIGN_SIZE, 355 NULL, NULL, NULL, NULL, NULL, 0); 356 357 ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 358 sizeof (itc_t), CACHE_ALIGN_SIZE, 359 ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 360 NULL, NULL, NULL, 0); 361 362 /* 363 * Calculate size of conn fanout table. 364 */ 365 if (ipcl_conn_hash_size != 0) { 366 ipcl_conn_fanout_size = ipcl_conn_hash_size; 367 } else if (tcp_conn_hash_size != 0) { 368 ipcl_conn_fanout_size = tcp_conn_hash_size; 369 } else { 370 extern pgcnt_t freemem; 371 372 ipcl_conn_fanout_size = 373 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 374 375 if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) 376 ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; 377 } 378 379 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 380 if (sizes[i] >= ipcl_conn_fanout_size) { 381 break; 382 } 383 } 384 if ((ipcl_conn_fanout_size = sizes[i]) == 0) { 385 /* Out of range, use the 2^16 value */ 386 ipcl_conn_fanout_size = sizes[16]; 387 } 388 ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * 389 sizeof (*ipcl_conn_fanout), KM_SLEEP); 390 391 for (i = 0; i < ipcl_conn_fanout_size; i++) { 392 mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, 393 MUTEX_DEFAULT, NULL); 394 } 395 396 ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * 397 sizeof (*ipcl_bind_fanout), KM_SLEEP); 398 399 for (i = 0; i < ipcl_bind_fanout_size; i++) { 400 mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, 401 MUTEX_DEFAULT, NULL); 402 } 403 404 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { 405 mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, 406 MUTEX_DEFAULT, NULL); 407 } 408 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { 409 mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, 410 MUTEX_DEFAULT, NULL); 411 } 412 413 mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); 414 415 ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * 416 sizeof (*ipcl_udp_fanout), KM_SLEEP); 417 418 for (i = 0; i < ipcl_udp_fanout_size; i++) { 419 mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, 420 MUTEX_DEFAULT, NULL); 421 } 422 423 ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * 424 sizeof (*ipcl_raw_fanout), KM_SLEEP); 425 426 for (i = 0; i < ipcl_raw_fanout_size; i++) { 427 mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, 428 MUTEX_DEFAULT, NULL); 429 } 430 431 ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * 432 CONN_G_HASH_SIZE, KM_SLEEP); 433 434 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 435 mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, 436 MUTEX_DEFAULT, NULL); 437 } 438 } 439 440 void 441 ipcl_destroy(void) 442 { 443 int i; 444 kmem_cache_destroy(ipcl_conn_cache); 445 kmem_cache_destroy(ipcl_tcpconn_cache); 446 for (i = 0; i < ipcl_conn_fanout_size; i++) 447 mutex_destroy(&ipcl_conn_fanout[i].connf_lock); 448 kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * 449 sizeof (*ipcl_conn_fanout)); 450 for (i = 0; i < ipcl_bind_fanout_size; i++) 451 mutex_destroy(&ipcl_bind_fanout[i].connf_lock); 452 kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * 453 sizeof (*ipcl_bind_fanout)); 454 455 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) 456 mutex_destroy(&ipcl_proto_fanout[i].connf_lock); 457 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) 458 mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); 459 460 for (i = 0; i < ipcl_udp_fanout_size; i++) 461 mutex_destroy(&ipcl_udp_fanout[i].connf_lock); 462 kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * 463 sizeof (*ipcl_udp_fanout)); 464 465 for (i = 0; i < ipcl_raw_fanout_size; i++) 466 mutex_destroy(&ipcl_raw_fanout[i].connf_lock); 467 kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * 468 sizeof (*ipcl_raw_fanout)); 469 470 kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); 471 mutex_destroy(&rts_clients.connf_lock); 472 } 473 474 /* 475 * conn creation routine. initialize the conn, sets the reference 476 * and inserts it in the global hash table. 477 */ 478 conn_t * 479 ipcl_conn_create(uint32_t type, int sleep) 480 { 481 itc_t *itc; 482 conn_t *connp; 483 484 switch (type) { 485 case IPCL_TCPCONN: 486 if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 487 sleep)) == NULL) 488 return (NULL); 489 connp = &itc->itc_conn; 490 connp->conn_ref = 1; 491 IPCL_DEBUG_LVL(1, 492 ("ipcl_conn_create: connp = %p tcp (%p)", 493 (void *)connp, (void *)connp->conn_tcp)); 494 ipcl_globalhash_insert(connp); 495 break; 496 case IPCL_SCTPCONN: 497 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 498 return (NULL); 499 connp->conn_flags = IPCL_SCTPCONN; 500 break; 501 case IPCL_IPCCONN: 502 connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 503 if (connp == NULL) 504 return (NULL); 505 bzero(connp, sizeof (conn_t)); 506 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 507 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 508 connp->conn_flags = IPCL_IPCCONN; 509 connp->conn_ref = 1; 510 IPCL_DEBUG_LVL(1, 511 ("ipcl_conn_create: connp = %p\n", (void *)connp)); 512 ipcl_globalhash_insert(connp); 513 break; 514 default: 515 connp = NULL; 516 ASSERT(0); 517 } 518 519 return (connp); 520 } 521 522 void 523 ipcl_conn_destroy(conn_t *connp) 524 { 525 mblk_t *mp; 526 527 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 528 ASSERT(connp->conn_ref == 0); 529 ASSERT(connp->conn_ire_cache == NULL); 530 531 ipcl_globalhash_remove(connp); 532 533 cv_destroy(&connp->conn_cv); 534 if (connp->conn_flags & IPCL_TCPCONN) { 535 tcp_t *tcp = connp->conn_tcp; 536 537 mutex_destroy(&connp->conn_lock); 538 ASSERT(connp->conn_tcp != NULL); 539 tcp_free(tcp); 540 mp = tcp->tcp_timercache; 541 542 if (tcp->tcp_sack_info != NULL) { 543 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 544 kmem_cache_free(tcp_sack_info_cache, 545 tcp->tcp_sack_info); 546 } 547 if (tcp->tcp_iphc != NULL) { 548 if (tcp->tcp_hdr_grown) { 549 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 550 } else { 551 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 552 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 553 } 554 tcp->tcp_iphc_len = 0; 555 } 556 ASSERT(tcp->tcp_iphc_len == 0); 557 558 if (connp->conn_latch != NULL) 559 IPLATCH_REFRELE(connp->conn_latch); 560 if (connp->conn_policy != NULL) 561 IPPH_REFRELE(connp->conn_policy); 562 bzero(connp, sizeof (itc_t)); 563 564 tcp->tcp_timercache = mp; 565 connp->conn_tcp = tcp; 566 connp->conn_flags = IPCL_TCPCONN; 567 connp->conn_ulp = IPPROTO_TCP; 568 tcp->tcp_connp = connp; 569 kmem_cache_free(ipcl_tcpconn_cache, connp); 570 } else if (connp->conn_flags & IPCL_SCTPCONN) { 571 sctp_free(connp); 572 } else { 573 ASSERT(connp->conn_udp == NULL); 574 mutex_destroy(&connp->conn_lock); 575 kmem_cache_free(ipcl_conn_cache, connp); 576 } 577 } 578 579 /* 580 * Running in cluster mode - deregister listener information 581 */ 582 583 static void 584 ipcl_conn_unlisten(conn_t *connp) 585 { 586 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 587 ASSERT(connp->conn_lport != 0); 588 589 if (cl_inet_unlisten != NULL) { 590 sa_family_t addr_family; 591 uint8_t *laddrp; 592 593 if (connp->conn_pkt_isv6) { 594 addr_family = AF_INET6; 595 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 596 } else { 597 addr_family = AF_INET; 598 laddrp = (uint8_t *)&connp->conn_bound_source; 599 } 600 (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 601 connp->conn_lport); 602 } 603 connp->conn_flags &= ~IPCL_CL_LISTENER; 604 } 605 606 /* 607 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 608 * which table the conn belonged to). So for debugging we can see which hash 609 * table this connection was in. 610 */ 611 #define IPCL_HASH_REMOVE(connp) { \ 612 connf_t *connfp = (connp)->conn_fanout; \ 613 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 614 if (connfp != NULL) { \ 615 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 616 (void *)(connp))); \ 617 mutex_enter(&connfp->connf_lock); \ 618 if ((connp)->conn_next != NULL) \ 619 (connp)->conn_next->conn_prev = \ 620 (connp)->conn_prev; \ 621 if ((connp)->conn_prev != NULL) \ 622 (connp)->conn_prev->conn_next = \ 623 (connp)->conn_next; \ 624 else \ 625 connfp->connf_head = (connp)->conn_next; \ 626 (connp)->conn_fanout = NULL; \ 627 (connp)->conn_next = NULL; \ 628 (connp)->conn_prev = NULL; \ 629 (connp)->conn_flags |= IPCL_REMOVED; \ 630 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 631 ipcl_conn_unlisten((connp)); \ 632 CONN_DEC_REF((connp)); \ 633 mutex_exit(&connfp->connf_lock); \ 634 } \ 635 } 636 637 void 638 ipcl_hash_remove(conn_t *connp) 639 { 640 IPCL_HASH_REMOVE(connp); 641 } 642 643 /* 644 * The whole purpose of this function is allow removal of 645 * a conn_t from the connected hash for timewait reclaim. 646 * This is essentially a TW reclaim fastpath where timewait 647 * collector checks under fanout lock (so no one else can 648 * get access to the conn_t) that refcnt is 2 i.e. one for 649 * TCP and one for the classifier hash list. If ref count 650 * is indeed 2, we can just remove the conn under lock and 651 * avoid cleaning up the conn under squeue. This gives us 652 * improved performance. 653 */ 654 void 655 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 656 { 657 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 658 ASSERT(MUTEX_HELD(&connp->conn_lock)); 659 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 660 661 if ((connp)->conn_next != NULL) { 662 (connp)->conn_next->conn_prev = 663 (connp)->conn_prev; 664 } 665 if ((connp)->conn_prev != NULL) { 666 (connp)->conn_prev->conn_next = 667 (connp)->conn_next; 668 } else { 669 connfp->connf_head = (connp)->conn_next; 670 } 671 (connp)->conn_fanout = NULL; 672 (connp)->conn_next = NULL; 673 (connp)->conn_prev = NULL; 674 (connp)->conn_flags |= IPCL_REMOVED; 675 ASSERT((connp)->conn_ref == 2); 676 (connp)->conn_ref--; 677 } 678 679 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 680 ASSERT((connp)->conn_fanout == NULL); \ 681 ASSERT((connp)->conn_next == NULL); \ 682 ASSERT((connp)->conn_prev == NULL); \ 683 if ((connfp)->connf_head != NULL) { \ 684 (connfp)->connf_head->conn_prev = (connp); \ 685 (connp)->conn_next = (connfp)->connf_head; \ 686 } \ 687 (connp)->conn_fanout = (connfp); \ 688 (connfp)->connf_head = (connp); \ 689 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 690 IPCL_CONNECTED; \ 691 CONN_INC_REF(connp); \ 692 } 693 694 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 695 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 696 "connp %p", (void *)(connfp), (void *)(connp))); \ 697 IPCL_HASH_REMOVE((connp)); \ 698 mutex_enter(&(connfp)->connf_lock); \ 699 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 700 mutex_exit(&(connfp)->connf_lock); \ 701 } 702 703 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 704 conn_t *pconnp = NULL, *nconnp; \ 705 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 706 "connp %p", (void *)connfp, (void *)(connp))); \ 707 IPCL_HASH_REMOVE((connp)); \ 708 mutex_enter(&(connfp)->connf_lock); \ 709 nconnp = (connfp)->connf_head; \ 710 while (nconnp != NULL && \ 711 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 712 pconnp = nconnp; \ 713 nconnp = nconnp->conn_next; \ 714 } \ 715 if (pconnp != NULL) { \ 716 pconnp->conn_next = (connp); \ 717 (connp)->conn_prev = pconnp; \ 718 } else { \ 719 (connfp)->connf_head = (connp); \ 720 } \ 721 if (nconnp != NULL) { \ 722 (connp)->conn_next = nconnp; \ 723 nconnp->conn_prev = (connp); \ 724 } \ 725 (connp)->conn_fanout = (connfp); \ 726 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 727 IPCL_BOUND; \ 728 CONN_INC_REF(connp); \ 729 mutex_exit(&(connfp)->connf_lock); \ 730 } 731 732 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 733 conn_t **list, *prev, *next; \ 734 boolean_t isv4mapped = \ 735 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 736 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 737 "connp %p", (void *)(connfp), (void *)(connp))); \ 738 IPCL_HASH_REMOVE((connp)); \ 739 mutex_enter(&(connfp)->connf_lock); \ 740 list = &(connfp)->connf_head; \ 741 prev = NULL; \ 742 while ((next = *list) != NULL) { \ 743 if (isv4mapped && \ 744 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 745 connp->conn_zoneid == next->conn_zoneid) { \ 746 (connp)->conn_next = next; \ 747 if (prev != NULL) \ 748 prev = next->conn_prev; \ 749 next->conn_prev = (connp); \ 750 break; \ 751 } \ 752 list = &next->conn_next; \ 753 prev = next; \ 754 } \ 755 (connp)->conn_prev = prev; \ 756 *list = (connp); \ 757 (connp)->conn_fanout = (connfp); \ 758 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 759 IPCL_BOUND; \ 760 CONN_INC_REF((connp)); \ 761 mutex_exit(&(connfp)->connf_lock); \ 762 } 763 764 void 765 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 766 { 767 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 768 } 769 770 void 771 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 772 { 773 connf_t *connfp; 774 775 ASSERT(connp != NULL); 776 777 connp->conn_ulp = protocol; 778 779 /* Insert it in the protocol hash */ 780 connfp = &ipcl_proto_fanout[protocol]; 781 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 782 } 783 784 void 785 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 786 { 787 connf_t *connfp; 788 789 ASSERT(connp != NULL); 790 791 connp->conn_ulp = protocol; 792 793 /* Insert it in the Bind Hash */ 794 connfp = &ipcl_proto_fanout_v6[protocol]; 795 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 796 } 797 798 /* 799 * This function is used only for inserting SCTP raw socket now. 800 * This may change later. 801 * 802 * Note that only one raw socket can be bound to a port. The param 803 * lport is in network byte order. 804 */ 805 static int 806 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 807 { 808 connf_t *connfp; 809 conn_t *oconnp; 810 811 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 812 813 /* Check for existing raw socket already bound to the port. */ 814 mutex_enter(&connfp->connf_lock); 815 for (oconnp = connfp->connf_head; oconnp != NULL; 816 oconnp = oconnp->conn_next) { 817 if (oconnp->conn_lport == lport && 818 oconnp->conn_zoneid == connp->conn_zoneid && 819 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 820 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 821 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 822 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 823 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 824 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 825 &connp->conn_srcv6))) { 826 break; 827 } 828 } 829 mutex_exit(&connfp->connf_lock); 830 if (oconnp != NULL) 831 return (EADDRNOTAVAIL); 832 833 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 834 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 835 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 836 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 837 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 838 } else { 839 IPCL_HASH_INSERT_BOUND(connfp, connp); 840 } 841 } else { 842 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 843 } 844 return (0); 845 } 846 847 /* 848 * (v4, v6) bind hash insertion routines 849 */ 850 int 851 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 852 { 853 connf_t *connfp; 854 #ifdef IPCL_DEBUG 855 char buf[INET_NTOA_BUFSIZE]; 856 #endif 857 int ret = 0; 858 859 ASSERT(connp); 860 861 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 862 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 863 864 connp->conn_ulp = protocol; 865 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 866 connp->conn_lport = lport; 867 868 switch (protocol) { 869 case IPPROTO_UDP: 870 default: 871 if (protocol == IPPROTO_UDP) { 872 IPCL_DEBUG_LVL(64, 873 ("ipcl_bind_insert: connp %p - udp\n", 874 (void *)connp)); 875 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 876 } else { 877 IPCL_DEBUG_LVL(64, 878 ("ipcl_bind_insert: connp %p - protocol\n", 879 (void *)connp)); 880 connfp = &ipcl_proto_fanout[protocol]; 881 } 882 883 if (connp->conn_rem != INADDR_ANY) { 884 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 885 } else if (connp->conn_src != INADDR_ANY) { 886 IPCL_HASH_INSERT_BOUND(connfp, connp); 887 } else { 888 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 889 } 890 break; 891 892 case IPPROTO_TCP: 893 894 /* Insert it in the Bind Hash */ 895 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 896 if (connp->conn_src != INADDR_ANY) { 897 IPCL_HASH_INSERT_BOUND(connfp, connp); 898 } else { 899 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 900 } 901 if (cl_inet_listen != NULL) { 902 ASSERT(!connp->conn_pkt_isv6); 903 connp->conn_flags |= IPCL_CL_LISTENER; 904 (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 905 (uint8_t *)&connp->conn_bound_source, lport); 906 } 907 break; 908 909 case IPPROTO_SCTP: 910 ret = ipcl_sctp_hash_insert(connp, lport); 911 break; 912 } 913 914 return (ret); 915 } 916 917 int 918 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 919 uint16_t lport) 920 { 921 connf_t *connfp; 922 int ret = 0; 923 924 ASSERT(connp); 925 926 connp->conn_ulp = protocol; 927 connp->conn_srcv6 = *src; 928 connp->conn_lport = lport; 929 930 switch (protocol) { 931 case IPPROTO_UDP: 932 default: 933 if (protocol == IPPROTO_UDP) { 934 IPCL_DEBUG_LVL(128, 935 ("ipcl_bind_insert_v6: connp %p - udp\n", 936 (void *)connp)); 937 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 938 } else { 939 IPCL_DEBUG_LVL(128, 940 ("ipcl_bind_insert_v6: connp %p - protocol\n", 941 (void *)connp)); 942 connfp = &ipcl_proto_fanout_v6[protocol]; 943 } 944 945 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 946 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 947 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 948 IPCL_HASH_INSERT_BOUND(connfp, connp); 949 } else { 950 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 951 } 952 break; 953 954 case IPPROTO_TCP: 955 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 956 957 /* Insert it in the Bind Hash */ 958 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 959 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 960 IPCL_HASH_INSERT_BOUND(connfp, connp); 961 } else { 962 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 963 } 964 if (cl_inet_listen != NULL) { 965 sa_family_t addr_family; 966 uint8_t *laddrp; 967 968 if (connp->conn_pkt_isv6) { 969 addr_family = AF_INET6; 970 laddrp = 971 (uint8_t *)&connp->conn_bound_source_v6; 972 } else { 973 addr_family = AF_INET; 974 laddrp = (uint8_t *)&connp->conn_bound_source; 975 } 976 connp->conn_flags |= IPCL_CL_LISTENER; 977 (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 978 lport); 979 } 980 break; 981 982 case IPPROTO_SCTP: 983 ret = ipcl_sctp_hash_insert(connp, lport); 984 break; 985 } 986 987 return (ret); 988 } 989 990 /* 991 * ipcl_conn_hash insertion routines. 992 */ 993 int 994 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 995 ipaddr_t rem, uint32_t ports) 996 { 997 connf_t *connfp; 998 uint16_t *up; 999 conn_t *tconnp; 1000 #ifdef IPCL_DEBUG 1001 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1002 #endif 1003 in_port_t lport; 1004 int ret = 0; 1005 1006 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1007 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1008 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1009 ports, protocol)); 1010 1011 switch (protocol) { 1012 case IPPROTO_TCP: 1013 if (!(connp->conn_flags & IPCL_EAGER)) { 1014 /* 1015 * for a eager connection, i.e connections which 1016 * have just been created, the initialization is 1017 * already done in ip at conn_creation time, so 1018 * we can skip the checks here. 1019 */ 1020 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1021 } 1022 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, 1023 connp->conn_ports)]; 1024 mutex_enter(&connfp->connf_lock); 1025 for (tconnp = connfp->connf_head; tconnp != NULL; 1026 tconnp = tconnp->conn_next) { 1027 if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1028 connp->conn_rem, connp->conn_src, 1029 connp->conn_ports)) { 1030 1031 /* Already have a conn. bail out */ 1032 mutex_exit(&connfp->connf_lock); 1033 return (EADDRINUSE); 1034 } 1035 } 1036 if (connp->conn_fanout != NULL) { 1037 /* 1038 * Probably a XTI/TLI application trying to do a 1039 * rebind. Let it happen. 1040 */ 1041 mutex_exit(&connfp->connf_lock); 1042 IPCL_HASH_REMOVE(connp); 1043 mutex_enter(&connfp->connf_lock); 1044 } 1045 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1046 mutex_exit(&connfp->connf_lock); 1047 break; 1048 1049 case IPPROTO_SCTP: 1050 /* 1051 * The raw socket may have already been bound, remove it 1052 * from the hash first. 1053 */ 1054 IPCL_HASH_REMOVE(connp); 1055 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1056 ret = ipcl_sctp_hash_insert(connp, lport); 1057 break; 1058 1059 case IPPROTO_UDP: 1060 default: 1061 up = (uint16_t *)&ports; 1062 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1063 if (protocol == IPPROTO_UDP) { 1064 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1065 } else { 1066 connfp = &ipcl_proto_fanout[protocol]; 1067 } 1068 1069 if (connp->conn_rem != INADDR_ANY) { 1070 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1071 } else if (connp->conn_src != INADDR_ANY) { 1072 IPCL_HASH_INSERT_BOUND(connfp, connp); 1073 } else { 1074 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1075 } 1076 break; 1077 } 1078 1079 return (ret); 1080 } 1081 1082 int 1083 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1084 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1085 { 1086 connf_t *connfp; 1087 uint16_t *up; 1088 conn_t *tconnp; 1089 in_port_t lport; 1090 int ret = 0; 1091 1092 switch (protocol) { 1093 case IPPROTO_TCP: 1094 /* Just need to insert a conn struct */ 1095 if (!(connp->conn_flags & IPCL_EAGER)) { 1096 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1097 } 1098 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, 1099 connp->conn_ports)]; 1100 mutex_enter(&connfp->connf_lock); 1101 for (tconnp = connfp->connf_head; tconnp != NULL; 1102 tconnp = tconnp->conn_next) { 1103 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1104 connp->conn_remv6, connp->conn_srcv6, 1105 connp->conn_ports) && 1106 (tconnp->conn_tcp->tcp_bound_if == 0 || 1107 tconnp->conn_tcp->tcp_bound_if == ifindex)) { 1108 /* Already have a conn. bail out */ 1109 mutex_exit(&connfp->connf_lock); 1110 return (EADDRINUSE); 1111 } 1112 } 1113 if (connp->conn_fanout != NULL) { 1114 /* 1115 * Probably a XTI/TLI application trying to do a 1116 * rebind. Let it happen. 1117 */ 1118 mutex_exit(&connfp->connf_lock); 1119 IPCL_HASH_REMOVE(connp); 1120 mutex_enter(&connfp->connf_lock); 1121 } 1122 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1123 mutex_exit(&connfp->connf_lock); 1124 break; 1125 1126 case IPPROTO_SCTP: 1127 IPCL_HASH_REMOVE(connp); 1128 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1129 ret = ipcl_sctp_hash_insert(connp, lport); 1130 break; 1131 1132 case IPPROTO_UDP: 1133 default: 1134 up = (uint16_t *)&ports; 1135 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1136 if (protocol == IPPROTO_UDP) { 1137 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1138 } else { 1139 connfp = &ipcl_proto_fanout_v6[protocol]; 1140 } 1141 1142 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1143 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1144 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1145 IPCL_HASH_INSERT_BOUND(connfp, connp); 1146 } else { 1147 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1148 } 1149 break; 1150 } 1151 1152 return (ret); 1153 } 1154 1155 /* 1156 * v4 packet classifying function. looks up the fanout table to 1157 * find the conn, the packet belongs to. returns the conn with 1158 * the reference held, null otherwise. 1159 */ 1160 conn_t * 1161 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1162 { 1163 ipha_t *ipha; 1164 connf_t *connfp, *bind_connfp; 1165 uint16_t lport; 1166 uint16_t fport; 1167 uint32_t ports; 1168 conn_t *connp; 1169 uint16_t *up; 1170 1171 ipha = (ipha_t *)mp->b_rptr; 1172 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1173 1174 switch (protocol) { 1175 case IPPROTO_TCP: 1176 ports = *(uint32_t *)up; 1177 connfp = 1178 &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; 1179 mutex_enter(&connfp->connf_lock); 1180 for (connp = connfp->connf_head; connp != NULL; 1181 connp = connp->conn_next) { 1182 if (IPCL_CONN_MATCH(connp, protocol, 1183 ipha->ipha_src, ipha->ipha_dst, ports)) 1184 break; 1185 } 1186 1187 if (connp != NULL) { 1188 CONN_INC_REF(connp); 1189 mutex_exit(&connfp->connf_lock); 1190 return (connp); 1191 } 1192 1193 mutex_exit(&connfp->connf_lock); 1194 1195 lport = up[1]; 1196 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1197 mutex_enter(&bind_connfp->connf_lock); 1198 for (connp = bind_connfp->connf_head; connp != NULL; 1199 connp = connp->conn_next) { 1200 if (IPCL_BIND_MATCH(connp, protocol, 1201 ipha->ipha_dst, lport) && 1202 connp->conn_zoneid == zoneid) 1203 break; 1204 } 1205 1206 if (connp != NULL) { 1207 /* Have a listner at least */ 1208 CONN_INC_REF(connp); 1209 mutex_exit(&bind_connfp->connf_lock); 1210 return (connp); 1211 } 1212 1213 mutex_exit(&bind_connfp->connf_lock); 1214 1215 IPCL_DEBUG_LVL(512, 1216 ("ipcl_classify: couldn't classify mp = %p\n", 1217 (void *)mp)); 1218 break; 1219 1220 case IPPROTO_UDP: 1221 lport = up[1]; 1222 fport = up[0]; 1223 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1224 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1225 mutex_enter(&connfp->connf_lock); 1226 for (connp = connfp->connf_head; connp != NULL; 1227 connp = connp->conn_next) { 1228 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1229 fport, ipha->ipha_src) && 1230 connp->conn_zoneid == zoneid) 1231 break; 1232 } 1233 1234 if (connp != NULL) { 1235 CONN_INC_REF(connp); 1236 mutex_exit(&connfp->connf_lock); 1237 return (connp); 1238 } 1239 1240 /* 1241 * We shouldn't come here for multicast/broadcast packets 1242 */ 1243 mutex_exit(&connfp->connf_lock); 1244 IPCL_DEBUG_LVL(512, 1245 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1246 lport, fport)); 1247 break; 1248 } 1249 1250 return (NULL); 1251 } 1252 1253 conn_t * 1254 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1255 { 1256 ip6_t *ip6h; 1257 connf_t *connfp, *bind_connfp; 1258 uint16_t lport; 1259 uint16_t fport; 1260 tcph_t *tcph; 1261 uint32_t ports; 1262 conn_t *connp; 1263 uint16_t *up; 1264 1265 1266 ip6h = (ip6_t *)mp->b_rptr; 1267 1268 switch (protocol) { 1269 case IPPROTO_TCP: 1270 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1271 up = (uint16_t *)tcph->th_lport; 1272 ports = *(uint32_t *)up; 1273 1274 connfp = 1275 &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; 1276 mutex_enter(&connfp->connf_lock); 1277 for (connp = connfp->connf_head; connp != NULL; 1278 connp = connp->conn_next) { 1279 if (IPCL_CONN_MATCH_V6(connp, protocol, 1280 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1281 break; 1282 } 1283 1284 if (connp != NULL) { 1285 CONN_INC_REF(connp); 1286 mutex_exit(&connfp->connf_lock); 1287 return (connp); 1288 } 1289 1290 mutex_exit(&connfp->connf_lock); 1291 1292 lport = up[1]; 1293 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1294 mutex_enter(&bind_connfp->connf_lock); 1295 for (connp = bind_connfp->connf_head; connp != NULL; 1296 connp = connp->conn_next) { 1297 if (IPCL_BIND_MATCH_V6(connp, protocol, 1298 ip6h->ip6_dst, lport) && 1299 connp->conn_zoneid == zoneid) 1300 break; 1301 } 1302 1303 if (connp != NULL) { 1304 /* Have a listner at least */ 1305 CONN_INC_REF(connp); 1306 mutex_exit(&bind_connfp->connf_lock); 1307 IPCL_DEBUG_LVL(512, 1308 ("ipcl_classify_v6: found listner " 1309 "connp = %p\n", (void *)connp)); 1310 1311 return (connp); 1312 } 1313 1314 mutex_exit(&bind_connfp->connf_lock); 1315 1316 IPCL_DEBUG_LVL(512, 1317 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1318 (void *)mp)); 1319 break; 1320 1321 case IPPROTO_UDP: 1322 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1323 lport = up[1]; 1324 fport = up[0]; 1325 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 1326 fport)); 1327 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1328 mutex_enter(&connfp->connf_lock); 1329 for (connp = connfp->connf_head; connp != NULL; 1330 connp = connp->conn_next) { 1331 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1332 fport, ip6h->ip6_src) && 1333 connp->conn_zoneid == zoneid) 1334 break; 1335 } 1336 1337 if (connp != NULL) { 1338 CONN_INC_REF(connp); 1339 mutex_exit(&connfp->connf_lock); 1340 return (connp); 1341 } 1342 1343 /* 1344 * We shouldn't come here for multicast/broadcast packets 1345 */ 1346 mutex_exit(&connfp->connf_lock); 1347 IPCL_DEBUG_LVL(512, 1348 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 1349 lport, fport)); 1350 break; 1351 } 1352 1353 1354 return (NULL); 1355 } 1356 1357 /* 1358 * wrapper around ipcl_classify_(v4,v6) routines. 1359 */ 1360 conn_t * 1361 ipcl_classify(mblk_t *mp, zoneid_t zoneid) 1362 { 1363 uint16_t hdr_len; 1364 ipha_t *ipha; 1365 uint8_t *nexthdrp; 1366 1367 if (MBLKL(mp) < sizeof (ipha_t)) 1368 return (NULL); 1369 1370 switch (IPH_HDR_VERSION(mp->b_rptr)) { 1371 case IPV4_VERSION: 1372 ipha = (ipha_t *)mp->b_rptr; 1373 hdr_len = IPH_HDR_LENGTH(ipha); 1374 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1375 zoneid)); 1376 case IPV6_VERSION: 1377 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 1378 &hdr_len, &nexthdrp)) 1379 return (NULL); 1380 1381 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); 1382 } 1383 1384 return (NULL); 1385 } 1386 1387 conn_t * 1388 ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports, 1389 ipha_t *hdr) 1390 { 1391 struct connf_s *connfp; 1392 conn_t *connp; 1393 in_port_t lport; 1394 int af; 1395 1396 lport = ((uint16_t *)&ports)[1]; 1397 af = IPH_HDR_VERSION(hdr); 1398 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 1399 1400 mutex_enter(&connfp->connf_lock); 1401 for (connp = connfp->connf_head; connp != NULL; 1402 connp = connp->conn_next) { 1403 /* We don't allow v4 fallback for v6 raw socket. */ 1404 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1405 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1406 continue; 1407 } 1408 if (connp->conn_fully_bound) { 1409 if (af == IPV4_VERSION) { 1410 if (IPCL_CONN_MATCH(connp, protocol, 1411 hdr->ipha_src, hdr->ipha_dst, ports)) { 1412 break; 1413 } 1414 } else { 1415 if (IPCL_CONN_MATCH_V6(connp, protocol, 1416 ((ip6_t *)hdr)->ip6_src, 1417 ((ip6_t *)hdr)->ip6_dst, ports)) { 1418 break; 1419 } 1420 } 1421 } else { 1422 if (af == IPV4_VERSION) { 1423 if (IPCL_BIND_MATCH(connp, protocol, 1424 hdr->ipha_dst, lport)) { 1425 break; 1426 } 1427 } else { 1428 if (IPCL_BIND_MATCH_V6(connp, protocol, 1429 ((ip6_t *)hdr)->ip6_dst, lport)) { 1430 break; 1431 } 1432 } 1433 } 1434 } 1435 1436 if (connp != NULL) 1437 goto found; 1438 mutex_exit(&connfp->connf_lock); 1439 1440 /* Try to look for a wildcard match. */ 1441 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; 1442 mutex_enter(&connfp->connf_lock); 1443 for (connp = connfp->connf_head; connp != NULL; 1444 connp = connp->conn_next) { 1445 /* We don't allow v4 fallback for v6 raw socket. */ 1446 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1447 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1448 continue; 1449 } 1450 if (af == IPV4_VERSION) { 1451 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1452 break; 1453 } else { 1454 if (IPCL_RAW_MATCH_V6(connp, protocol, 1455 ((ip6_t *)hdr)->ip6_dst)) { 1456 break; 1457 } 1458 } 1459 } 1460 1461 if (connp != NULL) 1462 goto found; 1463 1464 mutex_exit(&connfp->connf_lock); 1465 return (NULL); 1466 1467 found: 1468 ASSERT(connp != NULL); 1469 CONN_INC_REF(connp); 1470 mutex_exit(&connfp->connf_lock); 1471 return (connp); 1472 } 1473 1474 /* ARGSUSED */ 1475 static int 1476 ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 1477 { 1478 itc_t *itc = (itc_t *)buf; 1479 conn_t *connp = &itc->itc_conn; 1480 tcp_t *tcp = &itc->itc_tcp; 1481 bzero(itc, sizeof (itc_t)); 1482 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 1483 connp->conn_tcp = tcp; 1484 connp->conn_flags = IPCL_TCPCONN; 1485 connp->conn_ulp = IPPROTO_TCP; 1486 tcp->tcp_connp = connp; 1487 return (0); 1488 } 1489 1490 /* ARGSUSED */ 1491 static void 1492 ipcl_tcpconn_destructor(void *buf, void *cdrarg) 1493 { 1494 tcp_timermp_free(((conn_t *)buf)->conn_tcp); 1495 } 1496 1497 /* 1498 * All conns are inserted in a global multi-list for the benefit of 1499 * walkers. The walk is guaranteed to walk all open conns at the time 1500 * of the start of the walk exactly once. This property is needed to 1501 * achieve some cleanups during unplumb of interfaces. This is achieved 1502 * as follows. 1503 * 1504 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 1505 * call the insert and delete functions below at creation and deletion 1506 * time respectively. The conn never moves or changes its position in this 1507 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 1508 * won't increase due to walkers, once the conn deletion has started. Note 1509 * that we can't remove the conn from the global list and then wait for 1510 * the refcnt to drop to zero, since walkers would then see a truncated 1511 * list. CONN_INCIPIENT ensures that walkers don't start looking at 1512 * conns until ip_open is ready to make them globally visible. 1513 * The global round robin multi-list locks are held only to get the 1514 * next member/insertion/deletion and contention should be negligible 1515 * if the multi-list is much greater than the number of cpus. 1516 */ 1517 void 1518 ipcl_globalhash_insert(conn_t *connp) 1519 { 1520 int index; 1521 1522 /* 1523 * No need for atomic here. Approximate even distribution 1524 * in the global lists is sufficient. 1525 */ 1526 conn_g_index++; 1527 index = conn_g_index & (CONN_G_HASH_SIZE - 1); 1528 1529 connp->conn_g_prev = NULL; 1530 /* 1531 * Mark as INCIPIENT, so that walkers will ignore this 1532 * for now, till ip_open is ready to make it visible globally. 1533 */ 1534 connp->conn_state_flags |= CONN_INCIPIENT; 1535 1536 /* Insert at the head of the list */ 1537 mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); 1538 connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; 1539 if (connp->conn_g_next != NULL) 1540 connp->conn_g_next->conn_g_prev = connp; 1541 ipcl_globalhash_fanout[index].connf_head = connp; 1542 1543 /* The fanout bucket this conn points to */ 1544 connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; 1545 1546 mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); 1547 } 1548 1549 void 1550 ipcl_globalhash_remove(conn_t *connp) 1551 { 1552 /* 1553 * We were never inserted in the global multi list. 1554 * IPCL_NONE variety is never inserted in the global multilist 1555 * since it is presumed to not need any cleanup and is transient. 1556 */ 1557 if (connp->conn_g_fanout == NULL) 1558 return; 1559 1560 mutex_enter(&connp->conn_g_fanout->connf_lock); 1561 if (connp->conn_g_prev != NULL) 1562 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 1563 else 1564 connp->conn_g_fanout->connf_head = connp->conn_g_next; 1565 if (connp->conn_g_next != NULL) 1566 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 1567 mutex_exit(&connp->conn_g_fanout->connf_lock); 1568 1569 /* Better to stumble on a null pointer than to corrupt memory */ 1570 connp->conn_g_next = NULL; 1571 connp->conn_g_prev = NULL; 1572 } 1573 1574 /* 1575 * Walk the list of all conn_t's in the system, calling the function provided 1576 * with the specified argument for each. 1577 * Applies to both IPv4 and IPv6. 1578 * 1579 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 1580 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 1581 * unplumbed or removed. New conn_t's that are created while we are walking 1582 * may be missed by this walk, because they are not necessarily inserted 1583 * at the tail of the list. They are new conn_t's and thus don't have any 1584 * stale pointers. The CONN_CLOSING flag ensures that no new reference 1585 * is created to the struct that is going away. 1586 */ 1587 void 1588 ipcl_walk(pfv_t func, void *arg) 1589 { 1590 int i; 1591 conn_t *connp; 1592 conn_t *prev_connp; 1593 1594 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 1595 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1596 prev_connp = NULL; 1597 connp = ipcl_globalhash_fanout[i].connf_head; 1598 while (connp != NULL) { 1599 mutex_enter(&connp->conn_lock); 1600 if (connp->conn_state_flags & 1601 (CONN_CONDEMNED | CONN_INCIPIENT)) { 1602 mutex_exit(&connp->conn_lock); 1603 connp = connp->conn_g_next; 1604 continue; 1605 } 1606 CONN_INC_REF_LOCKED(connp); 1607 mutex_exit(&connp->conn_lock); 1608 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1609 (*func)(connp, arg); 1610 if (prev_connp != NULL) 1611 CONN_DEC_REF(prev_connp); 1612 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1613 prev_connp = connp; 1614 connp = connp->conn_g_next; 1615 } 1616 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1617 if (prev_connp != NULL) 1618 CONN_DEC_REF(prev_connp); 1619 } 1620 } 1621 1622 /* 1623 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 1624 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1625 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1626 * (peer tcp in at least ESTABLISHED state). 1627 */ 1628 conn_t * 1629 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) 1630 { 1631 uint32_t ports; 1632 uint16_t *pports = (uint16_t *)&ports; 1633 connf_t *connfp; 1634 conn_t *tconnp; 1635 boolean_t zone_chk; 1636 1637 /* 1638 * If either the source of destination address is loopback, then 1639 * both endpoints must be in the same Zone. Otherwise, both of 1640 * the addresses are system-wide unique (tcp is in ESTABLISHED 1641 * state) and the endpoints may reside in different Zones. 1642 */ 1643 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 1644 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 1645 1646 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1647 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1648 1649 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1650 1651 mutex_enter(&connfp->connf_lock); 1652 for (tconnp = connfp->connf_head; tconnp != NULL; 1653 tconnp = tconnp->conn_next) { 1654 1655 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1656 ipha->ipha_dst, ipha->ipha_src, ports) && 1657 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1658 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1659 1660 ASSERT(tconnp != connp); 1661 CONN_INC_REF(tconnp); 1662 mutex_exit(&connfp->connf_lock); 1663 return (tconnp); 1664 } 1665 } 1666 mutex_exit(&connfp->connf_lock); 1667 return (NULL); 1668 } 1669 1670 /* 1671 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 1672 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1673 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1674 * (peer tcp in at least ESTABLISHED state). 1675 */ 1676 conn_t * 1677 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) 1678 { 1679 uint32_t ports; 1680 uint16_t *pports = (uint16_t *)&ports; 1681 connf_t *connfp; 1682 conn_t *tconnp; 1683 boolean_t zone_chk; 1684 1685 /* 1686 * If either the source of destination address is loopback, then 1687 * both endpoints must be in the same Zone. Otherwise, both of 1688 * the addresses are system-wide unique (tcp is in ESTABLISHED 1689 * state) and the endpoints may reside in different Zones. We 1690 * don't do Zone check for link local address(es) because the 1691 * current Zone implementation treats each link local address as 1692 * being unique per system node, i.e. they belong to global Zone. 1693 */ 1694 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 1695 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 1696 1697 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1698 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1699 1700 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1701 1702 mutex_enter(&connfp->connf_lock); 1703 for (tconnp = connfp->connf_head; tconnp != NULL; 1704 tconnp = tconnp->conn_next) { 1705 1706 /* We skip tcp_bound_if check here as this is loopback tcp */ 1707 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1708 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1709 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1710 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1711 1712 ASSERT(tconnp != connp); 1713 CONN_INC_REF(tconnp); 1714 mutex_exit(&connfp->connf_lock); 1715 return (tconnp); 1716 } 1717 } 1718 mutex_exit(&connfp->connf_lock); 1719 return (NULL); 1720 } 1721 1722 /* 1723 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1724 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1725 * Only checks for connected entries i.e. no INADDR_ANY checks. 1726 */ 1727 conn_t * 1728 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) 1729 { 1730 uint32_t ports; 1731 uint16_t *pports; 1732 connf_t *connfp; 1733 conn_t *tconnp; 1734 1735 pports = (uint16_t *)&ports; 1736 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1737 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1738 1739 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1740 1741 mutex_enter(&connfp->connf_lock); 1742 for (tconnp = connfp->connf_head; tconnp != NULL; 1743 tconnp = tconnp->conn_next) { 1744 1745 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1746 ipha->ipha_dst, ipha->ipha_src, ports) && 1747 tconnp->conn_tcp->tcp_state >= min_state) { 1748 1749 CONN_INC_REF(tconnp); 1750 mutex_exit(&connfp->connf_lock); 1751 return (tconnp); 1752 } 1753 } 1754 mutex_exit(&connfp->connf_lock); 1755 return (NULL); 1756 } 1757 1758 /* 1759 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1760 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1761 * Only checks for connected entries i.e. no INADDR_ANY checks. 1762 * Match on ifindex in addition to addresses. 1763 */ 1764 conn_t * 1765 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 1766 uint_t ifindex) 1767 { 1768 tcp_t *tcp; 1769 uint32_t ports; 1770 uint16_t *pports; 1771 connf_t *connfp; 1772 conn_t *tconnp; 1773 1774 pports = (uint16_t *)&ports; 1775 pports[0] = tcpha->tha_fport; 1776 pports[1] = tcpha->tha_lport; 1777 1778 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1779 1780 mutex_enter(&connfp->connf_lock); 1781 for (tconnp = connfp->connf_head; tconnp != NULL; 1782 tconnp = tconnp->conn_next) { 1783 1784 tcp = tconnp->conn_tcp; 1785 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1786 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1787 tcp->tcp_state >= min_state && 1788 (tcp->tcp_bound_if == 0 || 1789 tcp->tcp_bound_if == ifindex)) { 1790 1791 CONN_INC_REF(tconnp); 1792 mutex_exit(&connfp->connf_lock); 1793 return (tconnp); 1794 } 1795 } 1796 mutex_exit(&connfp->connf_lock); 1797 return (NULL); 1798 } 1799 1800 /* 1801 * To find a TCP listening connection matching the incoming segment. 1802 */ 1803 conn_t * 1804 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) 1805 { 1806 connf_t *bind_connfp; 1807 conn_t *connp; 1808 tcp_t *tcp; 1809 1810 /* 1811 * Avoid false matches for packets sent to an IP destination of 1812 * all zeros. 1813 */ 1814 if (laddr == 0) 1815 return (NULL); 1816 1817 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1818 mutex_enter(&bind_connfp->connf_lock); 1819 for (connp = bind_connfp->connf_head; connp != NULL; 1820 connp = connp->conn_next) { 1821 tcp = connp->conn_tcp; 1822 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 1823 connp->conn_zoneid == zoneid && 1824 (tcp->tcp_listener == NULL)) { 1825 CONN_INC_REF(connp); 1826 mutex_exit(&bind_connfp->connf_lock); 1827 return (connp); 1828 } 1829 } 1830 mutex_exit(&bind_connfp->connf_lock); 1831 return (NULL); 1832 } 1833 1834 1835 conn_t * 1836 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 1837 zoneid_t zoneid) 1838 { 1839 connf_t *bind_connfp; 1840 conn_t *connp = NULL; 1841 tcp_t *tcp; 1842 1843 /* 1844 * Avoid false matches for packets sent to an IP destination of 1845 * all zeros. 1846 */ 1847 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 1848 return (NULL); 1849 1850 1851 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1852 mutex_enter(&bind_connfp->connf_lock); 1853 for (connp = bind_connfp->connf_head; connp != NULL; 1854 connp = connp->conn_next) { 1855 tcp = connp->conn_tcp; 1856 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 1857 connp->conn_zoneid == zoneid && 1858 (tcp->tcp_bound_if == 0 || 1859 tcp->tcp_bound_if == ifindex) && 1860 tcp->tcp_listener == NULL) { 1861 CONN_INC_REF(connp); 1862 mutex_exit(&bind_connfp->connf_lock); 1863 return (connp); 1864 } 1865 } 1866 mutex_exit(&bind_connfp->connf_lock); 1867 return (NULL); 1868 } 1869 1870 /* 1871 * ipcl_get_next_conn 1872 * get the next entry in the conn global list 1873 * and put a reference on the next_conn. 1874 * decrement the reference on the current conn. 1875 * 1876 * This is an iterator based walker function that also provides for 1877 * some selection by the caller. It walks through the conn_hash bucket 1878 * searching for the next valid connp in the list, and selects connections 1879 * that are neither closed nor condemned. It also REFHOLDS the conn 1880 * thus ensuring that the conn exists when the caller uses the conn. 1881 */ 1882 conn_t * 1883 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 1884 { 1885 conn_t *next_connp; 1886 1887 if (connfp == NULL) 1888 return (NULL); 1889 1890 mutex_enter(&connfp->connf_lock); 1891 1892 next_connp = (connp == NULL) ? 1893 connfp->connf_head : connp->conn_g_next; 1894 1895 while (next_connp != NULL) { 1896 mutex_enter(&next_connp->conn_lock); 1897 if (!(next_connp->conn_flags & conn_flags) || 1898 (next_connp->conn_state_flags & 1899 (CONN_CONDEMNED | CONN_INCIPIENT))) { 1900 /* 1901 * This conn has been condemned or 1902 * is closing, or the flags don't match 1903 */ 1904 mutex_exit(&next_connp->conn_lock); 1905 next_connp = next_connp->conn_g_next; 1906 continue; 1907 } 1908 CONN_INC_REF_LOCKED(next_connp); 1909 mutex_exit(&next_connp->conn_lock); 1910 break; 1911 } 1912 1913 mutex_exit(&connfp->connf_lock); 1914 1915 if (connp != NULL) 1916 CONN_DEC_REF(connp); 1917 1918 return (next_connp); 1919 } 1920 1921 #ifdef CONN_DEBUG 1922 /* 1923 * Trace of the last NBUF refhold/refrele 1924 */ 1925 int 1926 conn_trace_ref(conn_t *connp) 1927 { 1928 int last; 1929 conn_trace_t *ctb; 1930 1931 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1932 last = connp->conn_trace_last; 1933 last++; 1934 if (last == CONN_TRACE_MAX) 1935 last = 0; 1936 1937 ctb = &connp->conn_trace_buf[last]; 1938 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1939 connp->conn_trace_last = last; 1940 return (1); 1941 } 1942 1943 int 1944 conn_untrace_ref(conn_t *connp) 1945 { 1946 int last; 1947 conn_trace_t *ctb; 1948 1949 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1950 last = connp->conn_trace_last; 1951 last++; 1952 if (last == CONN_TRACE_MAX) 1953 last = 0; 1954 1955 ctb = &connp->conn_trace_buf[last]; 1956 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1957 connp->conn_trace_last = last; 1958 return (1); 1959 } 1960 #endif 1961