1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 const char ipclassifier_version[] = "@(#)ipclassifier.c 1.6 04/03/31 SMI"; 29 30 /* 31 * IP PACKET CLASSIFIER 32 * 33 * The IP packet classifier provides mapping between IP packets and persistent 34 * connection state for connection-oriented protocols. It also provides 35 * interface for managing connection states. 36 * 37 * The connection state is kept in conn_t data structure and contains, among 38 * other things: 39 * 40 * o local/remote address and ports 41 * o Transport protocol 42 * o squeue for the connection (for TCP only) 43 * o reference counter 44 * o Connection state 45 * o hash table linkage 46 * o interface/ire information 47 * o credentials 48 * o ipsec policy 49 * o send and receive functions. 50 * o mutex lock. 51 * 52 * Connections use a reference counting scheme. They are freed when the 53 * reference counter drops to zero. A reference is incremented when connection 54 * is placed in a list or table, when incoming packet for the connection arrives 55 * and when connection is processed via squeue (squeue processing may be 56 * asynchronous and the reference protects the connection from being destroyed 57 * before its processing is finished). 58 * 59 * send and receive functions are currently used for TCP only. The send function 60 * determines the IP entry point for the packet once it leaves TCP to be sent to 61 * the destination address. The receive function is used by IP when the packet 62 * should be passed for TCP processing. When a new connection is created these 63 * are set to ip_output() and tcp_input() respectively. During the lifetime of 64 * the connection the send and receive functions may change depending on the 65 * changes in the connection state. For example, Once the connection is bound to 66 * an addresse, the receive function for this connection is set to 67 * tcp_conn_request(). This allows incoming SYNs to go directly into the 68 * listener SYN processing function without going to tcp_input() first. 69 * 70 * Classifier uses several hash tables: 71 * 72 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 73 * ipcl_bind_fanout: contains all connections in BOUND state 74 * ipcl_proto_fanout: IPv4 protocol fanout 75 * ipcl_proto_fanout_v6: IPv6 protocol fanout 76 * ipcl_udp_fanout: contains all UDP connections 77 * ipcl_globalhash_fanout: contains all connections 78 * 79 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 80 * which need to view all existing connections. 81 * 82 * All tables are protected by per-bucket locks. When both per-bucket lock and 83 * connection lock need to be held, the per-bucket lock should be acquired 84 * first, followed by the connection lock. 85 * 86 * All functions doing search in one of these tables increment a reference 87 * counter on the connection found (if any). This reference should be dropped 88 * when the caller has finished processing the connection. 89 * 90 * 91 * INTERFACES: 92 * =========== 93 * 94 * Connection Lookup: 95 * ------------------ 96 * 97 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) 98 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) 99 * 100 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 101 * it can't find any associated connection. If the connection is found, its 102 * reference counter is incremented. 103 * 104 * mp: mblock, containing packet header. The full header should fit 105 * into a single mblock. It should also contain at least full IP 106 * and TCP or UDP header. 107 * 108 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 109 * 110 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 111 * the packet. 112 * 113 * zoneid: The zone in which the returned connection must be. 114 * 115 * For TCP connections, the lookup order is as follows: 116 * 5-tuple {src, dst, protocol, local port, remote port} 117 * lookup in ipcl_conn_fanout table. 118 * 3-tuple {dst, remote port, protocol} lookup in 119 * ipcl_bind_fanout table. 120 * 121 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 122 * remote port} lookup is done on ipcl_udp_fanout. Note that, 123 * these interfaces do not handle cases where a packets belongs 124 * to multiple UDP clients, which is handled in IP itself. 125 * 126 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); 127 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); 128 * 129 * Lookup routine to find a exact match for {src, dst, local port, 130 * remote port) for TCP connections in ipcl_conn_fanout. The address and 131 * ports are read from the IP and TCP header respectively. 132 * 133 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); 134 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); 135 * 136 * Lookup routine to find a listener with the tuple {lport, laddr, 137 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 138 * parameter interface index is also compared. 139 * 140 * void ipcl_walk(func, arg) 141 * 142 * Apply 'func' to every connection available. The 'func' is called as 143 * (*func)(connp, arg). The walk is non-atomic so connections may be 144 * created and destroyed during the walk. The CONN_CONDEMNED and 145 * CONN_INCIPIENT flags ensure that connections which are newly created 146 * or being destroyed are not selected by the walker. 147 * 148 * Table Updates 149 * ------------- 150 * 151 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 152 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 153 * 154 * Insert 'connp' in the ipcl_conn_fanout. 155 * Arguements : 156 * connp conn_t to be inserted 157 * protocol connection protocol 158 * src source address 159 * dst destination address 160 * ports local and remote port 161 * ifindex interface index for IPv6 connections 162 * 163 * Return value : 164 * 0 if connp was inserted 165 * EADDRINUSE if the connection with the same tuple 166 * already exists. 167 * 168 * int ipcl_bind_insert(connp, protocol, src, lport); 169 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 170 * 171 * Insert 'connp' in ipcl_bind_fanout. 172 * Arguements : 173 * connp conn_t to be inserted 174 * protocol connection protocol 175 * src source address connection wants 176 * to bind to 177 * lport local port connection wants to 178 * bind to 179 * 180 * 181 * void ipcl_hash_remove(connp); 182 * 183 * Removes the 'connp' from the connection fanout table. 184 * 185 * Connection Creation/Destruction 186 * ------------------------------- 187 * 188 * conn_t *ipcl_conn_create(type, sleep) 189 * 190 * Creates a new conn based on the type flag, inserts it into 191 * globalhash table. 192 * 193 * type: This flag determines the type of conn_t which needs to be 194 * created. 195 * IPCL_TCPCONN indicates a TCP connection 196 * IPCL_IPCONN indicates all non-TCP connections. 197 * 198 * void ipcl_conn_destroy(connp) 199 * 200 * Destroys the connection state, removes it from the global 201 * connection hash table and frees its memory. 202 */ 203 204 #include <sys/types.h> 205 #include <sys/stream.h> 206 #include <sys/dlpi.h> 207 #include <sys/stropts.h> 208 #include <sys/sysmacros.h> 209 #include <sys/strsubr.h> 210 #include <sys/strlog.h> 211 #include <sys/strsun.h> 212 #define _SUN_TPI_VERSION 2 213 #include <sys/ddi.h> 214 #include <sys/cmn_err.h> 215 #include <sys/debug.h> 216 217 #include <sys/systm.h> 218 #include <sys/param.h> 219 #include <sys/kmem.h> 220 #include <sys/isa_defs.h> 221 #include <inet/common.h> 222 #include <netinet/ip6.h> 223 #include <netinet/icmp6.h> 224 225 #include <inet/ip.h> 226 #include <inet/ip6.h> 227 #include <inet/tcp.h> 228 #include <inet/tcp_trace.h> 229 #include <inet/ip_multi.h> 230 #include <inet/ip_if.h> 231 #include <inet/ip_ire.h> 232 #include <inet/ip_rts.h> 233 #include <inet/optcom.h> 234 #include <inet/ip_ndp.h> 235 #include <inet/udp_impl.h> 236 #include <inet/sctp_ip.h> 237 238 #include <sys/ethernet.h> 239 #include <net/if_types.h> 240 #include <sys/cpuvar.h> 241 242 #include <inet/mi.h> 243 #include <inet/ipclassifier.h> 244 #include <inet/ipsec_impl.h> 245 246 #ifdef DEBUG 247 #define IPCL_DEBUG 248 #else 249 #undef IPCL_DEBUG 250 #endif 251 252 #ifdef IPCL_DEBUG 253 int ipcl_debug_level = 0; 254 #define IPCL_DEBUG_LVL(level, args) \ 255 if (ipcl_debug_level & level) { printf args; } 256 #else 257 #define IPCL_DEBUG_LVL(level, args) {; } 258 #endif 259 connf_t *ipcl_conn_fanout; 260 connf_t *ipcl_bind_fanout; 261 connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; 262 connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; 263 connf_t *ipcl_udp_fanout; 264 265 /* A separate hash list for raw socket. */ 266 connf_t *ipcl_raw_fanout; 267 268 connf_t rts_clients; 269 270 /* Old value for compatibility */ 271 uint_t tcp_conn_hash_size = 0; 272 273 /* New value. Zero means choose automatically. */ 274 uint_t ipcl_conn_hash_size = 0; 275 uint_t ipcl_conn_hash_memfactor = 8192; 276 uint_t ipcl_conn_hash_maxsize = 82500; 277 278 uint_t ipcl_conn_fanout_size = 0; 279 280 281 /* bind/udp fanout table size */ 282 uint_t ipcl_bind_fanout_size = 512; 283 uint_t ipcl_udp_fanout_size = 16384; 284 285 /* Raw socket fanout size. Must be a power of 2. */ 286 uint_t ipcl_raw_fanout_size = 256; 287 288 /* 289 * Power of 2^N Primes useful for hashing for N of 0-28, 290 * these primes are the nearest prime <= 2^N - 2^(N-2). 291 */ 292 293 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 294 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 295 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 296 50331599, 100663291, 201326557, 0} 297 298 /* 299 * wrapper structure to ensure that conn+tcpb are aligned 300 * on cache lines. 301 */ 302 typedef struct itc_s { 303 union { 304 conn_t itcu_conn; 305 char itcu_filler[CACHE_ALIGN(conn_s)]; 306 } itc_u; 307 tcp_t itc_tcp; 308 } itc_t; 309 310 #define itc_conn itc_u.itcu_conn 311 312 struct kmem_cache *ipcl_tcpconn_cache; 313 struct kmem_cache *ipcl_tcp_cache; 314 struct kmem_cache *ipcl_conn_cache; 315 extern struct kmem_cache *sctp_conn_cache; 316 extern struct kmem_cache *tcp_sack_info_cache; 317 extern struct kmem_cache *tcp_iphc_cache; 318 319 extern void tcp_timermp_free(tcp_t *); 320 extern mblk_t *tcp_timermp_alloc(int); 321 322 static int ipcl_tcpconn_constructor(void *, void *, int); 323 static void ipcl_tcpconn_destructor(void *, void *); 324 325 static int conn_g_index; 326 connf_t *ipcl_globalhash_fanout; 327 328 #ifdef IPCL_DEBUG 329 #define INET_NTOA_BUFSIZE 18 330 331 static char * 332 inet_ntoa_r(uint32_t in, char *b) 333 { 334 unsigned char *p; 335 336 p = (unsigned char *)∈ 337 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 338 return (b); 339 } 340 #endif 341 342 /* 343 * ipclassifier intialization routine, sets up hash tables and 344 * conn caches. 345 */ 346 void 347 ipcl_init(void) 348 { 349 int i; 350 int sizes[] = P2Ps(); 351 352 ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 353 sizeof (conn_t), CACHE_ALIGN_SIZE, 354 NULL, NULL, NULL, NULL, NULL, 0); 355 356 ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 357 sizeof (itc_t), CACHE_ALIGN_SIZE, 358 ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 359 NULL, NULL, NULL, 0); 360 361 /* 362 * Calculate size of conn fanout table. 363 */ 364 if (ipcl_conn_hash_size != 0) { 365 ipcl_conn_fanout_size = ipcl_conn_hash_size; 366 } else if (tcp_conn_hash_size != 0) { 367 ipcl_conn_fanout_size = tcp_conn_hash_size; 368 } else { 369 extern pgcnt_t freemem; 370 371 ipcl_conn_fanout_size = 372 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 373 374 if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) 375 ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; 376 } 377 378 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 379 if (sizes[i] >= ipcl_conn_fanout_size) { 380 break; 381 } 382 } 383 if ((ipcl_conn_fanout_size = sizes[i]) == 0) { 384 /* Out of range, use the 2^16 value */ 385 ipcl_conn_fanout_size = sizes[16]; 386 } 387 ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * 388 sizeof (*ipcl_conn_fanout), KM_SLEEP); 389 390 for (i = 0; i < ipcl_conn_fanout_size; i++) { 391 mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, 392 MUTEX_DEFAULT, NULL); 393 } 394 395 ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * 396 sizeof (*ipcl_bind_fanout), KM_SLEEP); 397 398 for (i = 0; i < ipcl_bind_fanout_size; i++) { 399 mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, 400 MUTEX_DEFAULT, NULL); 401 } 402 403 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { 404 mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, 405 MUTEX_DEFAULT, NULL); 406 } 407 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { 408 mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, 409 MUTEX_DEFAULT, NULL); 410 } 411 412 mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); 413 414 ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * 415 sizeof (*ipcl_udp_fanout), KM_SLEEP); 416 417 for (i = 0; i < ipcl_udp_fanout_size; i++) { 418 mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, 419 MUTEX_DEFAULT, NULL); 420 } 421 422 ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * 423 sizeof (*ipcl_raw_fanout), KM_SLEEP); 424 425 for (i = 0; i < ipcl_raw_fanout_size; i++) { 426 mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, 427 MUTEX_DEFAULT, NULL); 428 } 429 430 ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * 431 CONN_G_HASH_SIZE, KM_SLEEP); 432 433 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 434 mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, 435 MUTEX_DEFAULT, NULL); 436 } 437 } 438 439 void 440 ipcl_destroy(void) 441 { 442 int i; 443 kmem_cache_destroy(ipcl_conn_cache); 444 kmem_cache_destroy(ipcl_tcpconn_cache); 445 for (i = 0; i < ipcl_conn_fanout_size; i++) 446 mutex_destroy(&ipcl_conn_fanout[i].connf_lock); 447 kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * 448 sizeof (*ipcl_conn_fanout)); 449 for (i = 0; i < ipcl_bind_fanout_size; i++) 450 mutex_destroy(&ipcl_bind_fanout[i].connf_lock); 451 kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * 452 sizeof (*ipcl_bind_fanout)); 453 454 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) 455 mutex_destroy(&ipcl_proto_fanout[i].connf_lock); 456 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) 457 mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); 458 459 for (i = 0; i < ipcl_udp_fanout_size; i++) 460 mutex_destroy(&ipcl_udp_fanout[i].connf_lock); 461 kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * 462 sizeof (*ipcl_udp_fanout)); 463 464 for (i = 0; i < ipcl_raw_fanout_size; i++) 465 mutex_destroy(&ipcl_raw_fanout[i].connf_lock); 466 kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * 467 sizeof (*ipcl_raw_fanout)); 468 469 kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); 470 mutex_destroy(&rts_clients.connf_lock); 471 } 472 473 /* 474 * conn creation routine. initialize the conn, sets the reference 475 * and inserts it in the global hash table. 476 */ 477 conn_t * 478 ipcl_conn_create(uint32_t type, int sleep) 479 { 480 itc_t *itc; 481 conn_t *connp; 482 483 switch (type) { 484 case IPCL_TCPCONN: 485 if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 486 sleep)) == NULL) 487 return (NULL); 488 connp = &itc->itc_conn; 489 connp->conn_ref = 1; 490 IPCL_DEBUG_LVL(1, 491 ("ipcl_conn_create: connp = %p tcp (%p)", 492 (void *)connp, (void *)connp->conn_tcp)); 493 ipcl_globalhash_insert(connp); 494 break; 495 case IPCL_SCTPCONN: 496 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 497 return (NULL); 498 connp->conn_flags = IPCL_SCTPCONN; 499 break; 500 case IPCL_IPCCONN: 501 connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 502 if (connp == NULL) 503 return (NULL); 504 bzero(connp, sizeof (conn_t)); 505 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 506 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 507 connp->conn_flags = IPCL_IPCCONN; 508 connp->conn_ref = 1; 509 IPCL_DEBUG_LVL(1, 510 ("ipcl_conn_create: connp = %p\n", (void *)connp)); 511 ipcl_globalhash_insert(connp); 512 break; 513 default: 514 connp = NULL; 515 ASSERT(0); 516 } 517 518 return (connp); 519 } 520 521 void 522 ipcl_conn_destroy(conn_t *connp) 523 { 524 mblk_t *mp; 525 526 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 527 ASSERT(connp->conn_ref == 0); 528 ASSERT(connp->conn_ire_cache == NULL); 529 530 ipcl_globalhash_remove(connp); 531 532 cv_destroy(&connp->conn_cv); 533 if (connp->conn_flags & IPCL_TCPCONN) { 534 tcp_t *tcp = connp->conn_tcp; 535 536 mutex_destroy(&connp->conn_lock); 537 ASSERT(connp->conn_tcp != NULL); 538 tcp_free(tcp); 539 mp = tcp->tcp_timercache; 540 541 if (tcp->tcp_sack_info != NULL) { 542 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 543 kmem_cache_free(tcp_sack_info_cache, 544 tcp->tcp_sack_info); 545 } 546 if (tcp->tcp_iphc != NULL) { 547 if (tcp->tcp_hdr_grown) { 548 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 549 } else { 550 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 551 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 552 } 553 tcp->tcp_iphc_len = 0; 554 } 555 ASSERT(tcp->tcp_iphc_len == 0); 556 557 if (connp->conn_latch != NULL) 558 IPLATCH_REFRELE(connp->conn_latch); 559 if (connp->conn_policy != NULL) 560 IPPH_REFRELE(connp->conn_policy); 561 bzero(connp, sizeof (itc_t)); 562 563 tcp->tcp_timercache = mp; 564 connp->conn_tcp = tcp; 565 connp->conn_flags = IPCL_TCPCONN; 566 connp->conn_ulp = IPPROTO_TCP; 567 tcp->tcp_connp = connp; 568 kmem_cache_free(ipcl_tcpconn_cache, connp); 569 } else if (connp->conn_flags & IPCL_SCTPCONN) { 570 sctp_free(connp); 571 } else { 572 ASSERT(connp->conn_udp == NULL); 573 mutex_destroy(&connp->conn_lock); 574 kmem_cache_free(ipcl_conn_cache, connp); 575 } 576 } 577 578 /* 579 * Running in cluster mode - deregister listener information 580 */ 581 582 static void 583 ipcl_conn_unlisten(conn_t *connp) 584 { 585 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 586 ASSERT(connp->conn_lport != 0); 587 588 if (cl_inet_unlisten != NULL) { 589 sa_family_t addr_family; 590 uint8_t *laddrp; 591 592 if (connp->conn_pkt_isv6) { 593 addr_family = AF_INET6; 594 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 595 } else { 596 addr_family = AF_INET; 597 laddrp = (uint8_t *)&connp->conn_bound_source; 598 } 599 (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 600 connp->conn_lport); 601 } 602 connp->conn_flags &= ~IPCL_CL_LISTENER; 603 } 604 605 /* 606 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 607 * which table the conn belonged to). So for debugging we can see which hash 608 * table this connection was in. 609 */ 610 #define IPCL_HASH_REMOVE(connp) { \ 611 connf_t *connfp = (connp)->conn_fanout; \ 612 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 613 if (connfp != NULL) { \ 614 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 615 (void *)(connp))); \ 616 mutex_enter(&connfp->connf_lock); \ 617 if ((connp)->conn_next != NULL) \ 618 (connp)->conn_next->conn_prev = \ 619 (connp)->conn_prev; \ 620 if ((connp)->conn_prev != NULL) \ 621 (connp)->conn_prev->conn_next = \ 622 (connp)->conn_next; \ 623 else \ 624 connfp->connf_head = (connp)->conn_next; \ 625 (connp)->conn_fanout = NULL; \ 626 (connp)->conn_next = NULL; \ 627 (connp)->conn_prev = NULL; \ 628 (connp)->conn_flags |= IPCL_REMOVED; \ 629 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 630 ipcl_conn_unlisten((connp)); \ 631 CONN_DEC_REF((connp)); \ 632 mutex_exit(&connfp->connf_lock); \ 633 } \ 634 } 635 636 void 637 ipcl_hash_remove(conn_t *connp) 638 { 639 IPCL_HASH_REMOVE(connp); 640 } 641 642 /* 643 * The whole purpose of this function is allow removal of 644 * a conn_t from the connected hash for timewait reclaim. 645 * This is essentially a TW reclaim fastpath where timewait 646 * collector checks under fanout lock (so no one else can 647 * get access to the conn_t) that refcnt is 2 i.e. one for 648 * TCP and one for the classifier hash list. If ref count 649 * is indeed 2, we can just remove the conn under lock and 650 * avoid cleaning up the conn under squeue. This gives us 651 * improved performance. 652 */ 653 void 654 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 655 { 656 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 657 ASSERT(MUTEX_HELD(&connp->conn_lock)); 658 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 659 660 if ((connp)->conn_next != NULL) { 661 (connp)->conn_next->conn_prev = 662 (connp)->conn_prev; 663 } 664 if ((connp)->conn_prev != NULL) { 665 (connp)->conn_prev->conn_next = 666 (connp)->conn_next; 667 } else { 668 connfp->connf_head = (connp)->conn_next; 669 } 670 (connp)->conn_fanout = NULL; 671 (connp)->conn_next = NULL; 672 (connp)->conn_prev = NULL; 673 (connp)->conn_flags |= IPCL_REMOVED; 674 ASSERT((connp)->conn_ref == 2); 675 (connp)->conn_ref--; 676 } 677 678 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 679 ASSERT((connp)->conn_fanout == NULL); \ 680 ASSERT((connp)->conn_next == NULL); \ 681 ASSERT((connp)->conn_prev == NULL); \ 682 if ((connfp)->connf_head != NULL) { \ 683 (connfp)->connf_head->conn_prev = (connp); \ 684 (connp)->conn_next = (connfp)->connf_head; \ 685 } \ 686 (connp)->conn_fanout = (connfp); \ 687 (connfp)->connf_head = (connp); \ 688 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 689 IPCL_CONNECTED; \ 690 CONN_INC_REF(connp); \ 691 } 692 693 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 694 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 695 "connp %p", (void *)(connfp), (void *)(connp))); \ 696 IPCL_HASH_REMOVE((connp)); \ 697 mutex_enter(&(connfp)->connf_lock); \ 698 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 699 mutex_exit(&(connfp)->connf_lock); \ 700 } 701 702 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 703 conn_t *pconnp = NULL, *nconnp; \ 704 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 705 "connp %p", (void *)connfp, (void *)(connp))); \ 706 IPCL_HASH_REMOVE((connp)); \ 707 mutex_enter(&(connfp)->connf_lock); \ 708 nconnp = (connfp)->connf_head; \ 709 while (nconnp != NULL && \ 710 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 711 pconnp = nconnp; \ 712 nconnp = nconnp->conn_next; \ 713 } \ 714 if (pconnp != NULL) { \ 715 pconnp->conn_next = (connp); \ 716 (connp)->conn_prev = pconnp; \ 717 } else { \ 718 (connfp)->connf_head = (connp); \ 719 } \ 720 if (nconnp != NULL) { \ 721 (connp)->conn_next = nconnp; \ 722 nconnp->conn_prev = (connp); \ 723 } \ 724 (connp)->conn_fanout = (connfp); \ 725 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 726 IPCL_BOUND; \ 727 CONN_INC_REF(connp); \ 728 mutex_exit(&(connfp)->connf_lock); \ 729 } 730 731 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 732 conn_t **list, *prev, *next; \ 733 boolean_t isv4mapped = \ 734 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 735 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 736 "connp %p", (void *)(connfp), (void *)(connp))); \ 737 IPCL_HASH_REMOVE((connp)); \ 738 mutex_enter(&(connfp)->connf_lock); \ 739 list = &(connfp)->connf_head; \ 740 prev = NULL; \ 741 while ((next = *list) != NULL) { \ 742 if (isv4mapped && \ 743 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 744 connp->conn_zoneid == next->conn_zoneid) { \ 745 (connp)->conn_next = next; \ 746 if (prev != NULL) \ 747 prev = next->conn_prev; \ 748 next->conn_prev = (connp); \ 749 break; \ 750 } \ 751 list = &next->conn_next; \ 752 prev = next; \ 753 } \ 754 (connp)->conn_prev = prev; \ 755 *list = (connp); \ 756 (connp)->conn_fanout = (connfp); \ 757 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 758 IPCL_BOUND; \ 759 CONN_INC_REF((connp)); \ 760 mutex_exit(&(connfp)->connf_lock); \ 761 } 762 763 void 764 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 765 { 766 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 767 } 768 769 void 770 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 771 { 772 connf_t *connfp; 773 774 ASSERT(connp != NULL); 775 776 connp->conn_ulp = protocol; 777 778 /* Insert it in the protocol hash */ 779 connfp = &ipcl_proto_fanout[protocol]; 780 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 781 } 782 783 void 784 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 785 { 786 connf_t *connfp; 787 788 ASSERT(connp != NULL); 789 790 connp->conn_ulp = protocol; 791 792 /* Insert it in the Bind Hash */ 793 connfp = &ipcl_proto_fanout_v6[protocol]; 794 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 795 } 796 797 /* 798 * This function is used only for inserting SCTP raw socket now. 799 * This may change later. 800 * 801 * Note that only one raw socket can be bound to a port. The param 802 * lport is in network byte order. 803 */ 804 static int 805 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 806 { 807 connf_t *connfp; 808 conn_t *oconnp; 809 810 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 811 812 /* Check for existing raw socket already bound to the port. */ 813 mutex_enter(&connfp->connf_lock); 814 for (oconnp = connfp->connf_head; oconnp != NULL; 815 oconnp = oconnp->conn_next) { 816 if (oconnp->conn_lport == lport && 817 oconnp->conn_zoneid == connp->conn_zoneid && 818 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 819 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 820 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 821 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 822 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 823 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 824 &connp->conn_srcv6))) { 825 break; 826 } 827 } 828 mutex_exit(&connfp->connf_lock); 829 if (oconnp != NULL) 830 return (EADDRNOTAVAIL); 831 832 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 833 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 834 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 835 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 836 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 837 } else { 838 IPCL_HASH_INSERT_BOUND(connfp, connp); 839 } 840 } else { 841 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 842 } 843 return (0); 844 } 845 846 /* 847 * (v4, v6) bind hash insertion routines 848 */ 849 int 850 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 851 { 852 connf_t *connfp; 853 #ifdef IPCL_DEBUG 854 char buf[INET_NTOA_BUFSIZE]; 855 #endif 856 int ret = 0; 857 858 ASSERT(connp); 859 860 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 861 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 862 863 connp->conn_ulp = protocol; 864 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 865 connp->conn_lport = lport; 866 867 switch (protocol) { 868 case IPPROTO_UDP: 869 default: 870 if (protocol == IPPROTO_UDP) { 871 IPCL_DEBUG_LVL(64, 872 ("ipcl_bind_insert: connp %p - udp\n", 873 (void *)connp)); 874 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 875 } else { 876 IPCL_DEBUG_LVL(64, 877 ("ipcl_bind_insert: connp %p - protocol\n", 878 (void *)connp)); 879 connfp = &ipcl_proto_fanout[protocol]; 880 } 881 882 if (connp->conn_rem != INADDR_ANY) { 883 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 884 } else if (connp->conn_src != INADDR_ANY) { 885 IPCL_HASH_INSERT_BOUND(connfp, connp); 886 } else { 887 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 888 } 889 break; 890 891 case IPPROTO_TCP: 892 893 /* Insert it in the Bind Hash */ 894 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 895 if (connp->conn_src != INADDR_ANY) { 896 IPCL_HASH_INSERT_BOUND(connfp, connp); 897 } else { 898 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 899 } 900 if (cl_inet_listen != NULL) { 901 ASSERT(!connp->conn_pkt_isv6); 902 connp->conn_flags |= IPCL_CL_LISTENER; 903 (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 904 (uint8_t *)&connp->conn_bound_source, lport); 905 } 906 break; 907 908 case IPPROTO_SCTP: 909 ret = ipcl_sctp_hash_insert(connp, lport); 910 break; 911 } 912 913 return (ret); 914 } 915 916 int 917 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 918 uint16_t lport) 919 { 920 connf_t *connfp; 921 int ret = 0; 922 923 ASSERT(connp); 924 925 connp->conn_ulp = protocol; 926 connp->conn_srcv6 = *src; 927 connp->conn_lport = lport; 928 929 switch (protocol) { 930 case IPPROTO_UDP: 931 default: 932 if (protocol == IPPROTO_UDP) { 933 IPCL_DEBUG_LVL(128, 934 ("ipcl_bind_insert_v6: connp %p - udp\n", 935 (void *)connp)); 936 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 937 } else { 938 IPCL_DEBUG_LVL(128, 939 ("ipcl_bind_insert_v6: connp %p - protocol\n", 940 (void *)connp)); 941 connfp = &ipcl_proto_fanout_v6[protocol]; 942 } 943 944 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 945 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 946 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 947 IPCL_HASH_INSERT_BOUND(connfp, connp); 948 } else { 949 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 950 } 951 break; 952 953 case IPPROTO_TCP: 954 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 955 956 /* Insert it in the Bind Hash */ 957 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 958 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 959 IPCL_HASH_INSERT_BOUND(connfp, connp); 960 } else { 961 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 962 } 963 if (cl_inet_listen != NULL) { 964 sa_family_t addr_family; 965 uint8_t *laddrp; 966 967 if (connp->conn_pkt_isv6) { 968 addr_family = AF_INET6; 969 laddrp = 970 (uint8_t *)&connp->conn_bound_source_v6; 971 } else { 972 addr_family = AF_INET; 973 laddrp = (uint8_t *)&connp->conn_bound_source; 974 } 975 connp->conn_flags |= IPCL_CL_LISTENER; 976 (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 977 lport); 978 } 979 break; 980 981 case IPPROTO_SCTP: 982 ret = ipcl_sctp_hash_insert(connp, lport); 983 break; 984 } 985 986 return (ret); 987 } 988 989 /* 990 * ipcl_conn_hash insertion routines. 991 */ 992 int 993 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 994 ipaddr_t rem, uint32_t ports) 995 { 996 connf_t *connfp; 997 uint16_t *up; 998 conn_t *tconnp; 999 #ifdef IPCL_DEBUG 1000 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1001 #endif 1002 in_port_t lport; 1003 int ret = 0; 1004 1005 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1006 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1007 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1008 ports, protocol)); 1009 1010 switch (protocol) { 1011 case IPPROTO_TCP: 1012 if (!(connp->conn_flags & IPCL_EAGER)) { 1013 /* 1014 * for a eager connection, i.e connections which 1015 * have just been created, the initialization is 1016 * already done in ip at conn_creation time, so 1017 * we can skip the checks here. 1018 */ 1019 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1020 } 1021 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, 1022 connp->conn_ports)]; 1023 mutex_enter(&connfp->connf_lock); 1024 for (tconnp = connfp->connf_head; tconnp != NULL; 1025 tconnp = tconnp->conn_next) { 1026 if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1027 connp->conn_rem, connp->conn_src, 1028 connp->conn_ports)) { 1029 1030 /* Already have a conn. bail out */ 1031 mutex_exit(&connfp->connf_lock); 1032 return (EADDRINUSE); 1033 } 1034 } 1035 if (connp->conn_fanout != NULL) { 1036 /* 1037 * Probably a XTI/TLI application trying to do a 1038 * rebind. Let it happen. 1039 */ 1040 mutex_exit(&connfp->connf_lock); 1041 IPCL_HASH_REMOVE(connp); 1042 mutex_enter(&connfp->connf_lock); 1043 } 1044 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1045 mutex_exit(&connfp->connf_lock); 1046 break; 1047 1048 case IPPROTO_SCTP: 1049 /* 1050 * The raw socket may have already been bound, remove it 1051 * from the hash first. 1052 */ 1053 IPCL_HASH_REMOVE(connp); 1054 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1055 ret = ipcl_sctp_hash_insert(connp, lport); 1056 break; 1057 1058 case IPPROTO_UDP: 1059 default: 1060 up = (uint16_t *)&ports; 1061 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1062 if (protocol == IPPROTO_UDP) { 1063 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1064 } else { 1065 connfp = &ipcl_proto_fanout[protocol]; 1066 } 1067 1068 if (connp->conn_rem != INADDR_ANY) { 1069 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1070 } else if (connp->conn_src != INADDR_ANY) { 1071 IPCL_HASH_INSERT_BOUND(connfp, connp); 1072 } else { 1073 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1074 } 1075 break; 1076 } 1077 1078 return (ret); 1079 } 1080 1081 int 1082 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1083 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1084 { 1085 connf_t *connfp; 1086 uint16_t *up; 1087 conn_t *tconnp; 1088 in_port_t lport; 1089 int ret = 0; 1090 1091 switch (protocol) { 1092 case IPPROTO_TCP: 1093 /* Just need to insert a conn struct */ 1094 if (!(connp->conn_flags & IPCL_EAGER)) { 1095 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1096 } 1097 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, 1098 connp->conn_ports)]; 1099 mutex_enter(&connfp->connf_lock); 1100 for (tconnp = connfp->connf_head; tconnp != NULL; 1101 tconnp = tconnp->conn_next) { 1102 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1103 connp->conn_remv6, connp->conn_srcv6, 1104 connp->conn_ports) && 1105 (tconnp->conn_tcp->tcp_bound_if == 0 || 1106 tconnp->conn_tcp->tcp_bound_if == ifindex)) { 1107 /* Already have a conn. bail out */ 1108 mutex_exit(&connfp->connf_lock); 1109 return (EADDRINUSE); 1110 } 1111 } 1112 if (connp->conn_fanout != NULL) { 1113 /* 1114 * Probably a XTI/TLI application trying to do a 1115 * rebind. Let it happen. 1116 */ 1117 mutex_exit(&connfp->connf_lock); 1118 IPCL_HASH_REMOVE(connp); 1119 mutex_enter(&connfp->connf_lock); 1120 } 1121 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1122 mutex_exit(&connfp->connf_lock); 1123 break; 1124 1125 case IPPROTO_SCTP: 1126 IPCL_HASH_REMOVE(connp); 1127 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1128 ret = ipcl_sctp_hash_insert(connp, lport); 1129 break; 1130 1131 case IPPROTO_UDP: 1132 default: 1133 up = (uint16_t *)&ports; 1134 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1135 if (protocol == IPPROTO_UDP) { 1136 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1137 } else { 1138 connfp = &ipcl_proto_fanout_v6[protocol]; 1139 } 1140 1141 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1142 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1143 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1144 IPCL_HASH_INSERT_BOUND(connfp, connp); 1145 } else { 1146 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1147 } 1148 break; 1149 } 1150 1151 return (ret); 1152 } 1153 1154 /* 1155 * v4 packet classifying function. looks up the fanout table to 1156 * find the conn, the packet belongs to. returns the conn with 1157 * the reference held, null otherwise. 1158 */ 1159 conn_t * 1160 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1161 { 1162 ipha_t *ipha; 1163 connf_t *connfp, *bind_connfp; 1164 uint16_t lport; 1165 uint16_t fport; 1166 uint32_t ports; 1167 conn_t *connp; 1168 uint16_t *up; 1169 1170 ipha = (ipha_t *)mp->b_rptr; 1171 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1172 1173 switch (protocol) { 1174 case IPPROTO_TCP: 1175 ports = *(uint32_t *)up; 1176 connfp = 1177 &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; 1178 mutex_enter(&connfp->connf_lock); 1179 for (connp = connfp->connf_head; connp != NULL; 1180 connp = connp->conn_next) { 1181 if (IPCL_CONN_MATCH(connp, protocol, 1182 ipha->ipha_src, ipha->ipha_dst, ports)) 1183 break; 1184 } 1185 1186 if (connp != NULL) { 1187 CONN_INC_REF(connp); 1188 mutex_exit(&connfp->connf_lock); 1189 return (connp); 1190 } 1191 1192 mutex_exit(&connfp->connf_lock); 1193 1194 lport = up[1]; 1195 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1196 mutex_enter(&bind_connfp->connf_lock); 1197 for (connp = bind_connfp->connf_head; connp != NULL; 1198 connp = connp->conn_next) { 1199 if (IPCL_BIND_MATCH(connp, protocol, 1200 ipha->ipha_dst, lport) && 1201 connp->conn_zoneid == zoneid) 1202 break; 1203 } 1204 1205 if (connp != NULL) { 1206 /* Have a listner at least */ 1207 CONN_INC_REF(connp); 1208 mutex_exit(&bind_connfp->connf_lock); 1209 return (connp); 1210 } 1211 1212 mutex_exit(&bind_connfp->connf_lock); 1213 1214 IPCL_DEBUG_LVL(512, 1215 ("ipcl_classify: couldn't classify mp = %p\n", 1216 (void *)mp)); 1217 break; 1218 1219 case IPPROTO_UDP: 1220 lport = up[1]; 1221 fport = up[0]; 1222 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1223 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1224 mutex_enter(&connfp->connf_lock); 1225 for (connp = connfp->connf_head; connp != NULL; 1226 connp = connp->conn_next) { 1227 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1228 fport, ipha->ipha_src) && 1229 connp->conn_zoneid == zoneid) 1230 break; 1231 } 1232 1233 if (connp != NULL) { 1234 CONN_INC_REF(connp); 1235 mutex_exit(&connfp->connf_lock); 1236 return (connp); 1237 } 1238 1239 /* 1240 * We shouldn't come here for multicast/broadcast packets 1241 */ 1242 mutex_exit(&connfp->connf_lock); 1243 IPCL_DEBUG_LVL(512, 1244 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1245 lport, fport)); 1246 break; 1247 } 1248 1249 return (NULL); 1250 } 1251 1252 conn_t * 1253 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1254 { 1255 ip6_t *ip6h; 1256 connf_t *connfp, *bind_connfp; 1257 uint16_t lport; 1258 uint16_t fport; 1259 tcph_t *tcph; 1260 uint32_t ports; 1261 conn_t *connp; 1262 uint16_t *up; 1263 1264 1265 ip6h = (ip6_t *)mp->b_rptr; 1266 1267 switch (protocol) { 1268 case IPPROTO_TCP: 1269 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1270 up = (uint16_t *)tcph->th_lport; 1271 ports = *(uint32_t *)up; 1272 1273 connfp = 1274 &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; 1275 mutex_enter(&connfp->connf_lock); 1276 for (connp = connfp->connf_head; connp != NULL; 1277 connp = connp->conn_next) { 1278 if (IPCL_CONN_MATCH_V6(connp, protocol, 1279 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1280 break; 1281 } 1282 1283 if (connp != NULL) { 1284 CONN_INC_REF(connp); 1285 mutex_exit(&connfp->connf_lock); 1286 return (connp); 1287 } 1288 1289 mutex_exit(&connfp->connf_lock); 1290 1291 lport = up[1]; 1292 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1293 mutex_enter(&bind_connfp->connf_lock); 1294 for (connp = bind_connfp->connf_head; connp != NULL; 1295 connp = connp->conn_next) { 1296 if (IPCL_BIND_MATCH_V6(connp, protocol, 1297 ip6h->ip6_dst, lport) && 1298 connp->conn_zoneid == zoneid) 1299 break; 1300 } 1301 1302 if (connp != NULL) { 1303 /* Have a listner at least */ 1304 CONN_INC_REF(connp); 1305 mutex_exit(&bind_connfp->connf_lock); 1306 IPCL_DEBUG_LVL(512, 1307 ("ipcl_classify_v6: found listner " 1308 "connp = %p\n", (void *)connp)); 1309 1310 return (connp); 1311 } 1312 1313 mutex_exit(&bind_connfp->connf_lock); 1314 1315 IPCL_DEBUG_LVL(512, 1316 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1317 (void *)mp)); 1318 break; 1319 1320 case IPPROTO_UDP: 1321 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1322 lport = up[1]; 1323 fport = up[0]; 1324 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 1325 fport)); 1326 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1327 mutex_enter(&connfp->connf_lock); 1328 for (connp = connfp->connf_head; connp != NULL; 1329 connp = connp->conn_next) { 1330 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1331 fport, ip6h->ip6_src) && 1332 connp->conn_zoneid == zoneid) 1333 break; 1334 } 1335 1336 if (connp != NULL) { 1337 CONN_INC_REF(connp); 1338 mutex_exit(&connfp->connf_lock); 1339 return (connp); 1340 } 1341 1342 /* 1343 * We shouldn't come here for multicast/broadcast packets 1344 */ 1345 mutex_exit(&connfp->connf_lock); 1346 IPCL_DEBUG_LVL(512, 1347 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 1348 lport, fport)); 1349 break; 1350 } 1351 1352 1353 return (NULL); 1354 } 1355 1356 /* 1357 * wrapper around ipcl_classify_(v4,v6) routines. 1358 */ 1359 conn_t * 1360 ipcl_classify(mblk_t *mp, zoneid_t zoneid) 1361 { 1362 uint16_t hdr_len; 1363 ipha_t *ipha; 1364 uint8_t *nexthdrp; 1365 1366 if (MBLKL(mp) < sizeof (ipha_t)) 1367 return (NULL); 1368 1369 switch (IPH_HDR_VERSION(mp->b_rptr)) { 1370 case IPV4_VERSION: 1371 ipha = (ipha_t *)mp->b_rptr; 1372 hdr_len = IPH_HDR_LENGTH(ipha); 1373 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1374 zoneid)); 1375 case IPV6_VERSION: 1376 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 1377 &hdr_len, &nexthdrp)) 1378 return (NULL); 1379 1380 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); 1381 } 1382 1383 return (NULL); 1384 } 1385 1386 conn_t * 1387 ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports, 1388 ipha_t *hdr) 1389 { 1390 struct connf_s *connfp; 1391 conn_t *connp; 1392 in_port_t lport; 1393 int af; 1394 1395 lport = ((uint16_t *)&ports)[1]; 1396 af = IPH_HDR_VERSION(hdr); 1397 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 1398 1399 mutex_enter(&connfp->connf_lock); 1400 for (connp = connfp->connf_head; connp != NULL; 1401 connp = connp->conn_next) { 1402 /* We don't allow v4 fallback for v6 raw socket. */ 1403 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1404 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1405 continue; 1406 } 1407 if (connp->conn_fully_bound) { 1408 if (af == IPV4_VERSION) { 1409 if (IPCL_CONN_MATCH(connp, protocol, 1410 hdr->ipha_src, hdr->ipha_dst, ports)) { 1411 break; 1412 } 1413 } else { 1414 if (IPCL_CONN_MATCH_V6(connp, protocol, 1415 ((ip6_t *)hdr)->ip6_src, 1416 ((ip6_t *)hdr)->ip6_dst, ports)) { 1417 break; 1418 } 1419 } 1420 } else { 1421 if (af == IPV4_VERSION) { 1422 if (IPCL_BIND_MATCH(connp, protocol, 1423 hdr->ipha_dst, lport)) { 1424 break; 1425 } 1426 } else { 1427 if (IPCL_BIND_MATCH_V6(connp, protocol, 1428 ((ip6_t *)hdr)->ip6_dst, lport)) { 1429 break; 1430 } 1431 } 1432 } 1433 } 1434 1435 if (connp != NULL) 1436 goto found; 1437 mutex_exit(&connfp->connf_lock); 1438 1439 /* Try to look for a wildcard match. */ 1440 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; 1441 mutex_enter(&connfp->connf_lock); 1442 for (connp = connfp->connf_head; connp != NULL; 1443 connp = connp->conn_next) { 1444 /* We don't allow v4 fallback for v6 raw socket. */ 1445 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1446 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1447 continue; 1448 } 1449 if (af == IPV4_VERSION) { 1450 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1451 break; 1452 } else { 1453 if (IPCL_RAW_MATCH_V6(connp, protocol, 1454 ((ip6_t *)hdr)->ip6_dst)) { 1455 break; 1456 } 1457 } 1458 } 1459 1460 if (connp != NULL) 1461 goto found; 1462 1463 mutex_exit(&connfp->connf_lock); 1464 return (NULL); 1465 1466 found: 1467 ASSERT(connp != NULL); 1468 CONN_INC_REF(connp); 1469 mutex_exit(&connfp->connf_lock); 1470 return (connp); 1471 } 1472 1473 /* ARGSUSED */ 1474 static int 1475 ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 1476 { 1477 itc_t *itc = (itc_t *)buf; 1478 conn_t *connp = &itc->itc_conn; 1479 tcp_t *tcp = &itc->itc_tcp; 1480 bzero(itc, sizeof (itc_t)); 1481 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 1482 connp->conn_tcp = tcp; 1483 connp->conn_flags = IPCL_TCPCONN; 1484 connp->conn_ulp = IPPROTO_TCP; 1485 tcp->tcp_connp = connp; 1486 return (0); 1487 } 1488 1489 /* ARGSUSED */ 1490 static void 1491 ipcl_tcpconn_destructor(void *buf, void *cdrarg) 1492 { 1493 tcp_timermp_free(((conn_t *)buf)->conn_tcp); 1494 } 1495 1496 /* 1497 * All conns are inserted in a global multi-list for the benefit of 1498 * walkers. The walk is guaranteed to walk all open conns at the time 1499 * of the start of the walk exactly once. This property is needed to 1500 * achieve some cleanups during unplumb of interfaces. This is achieved 1501 * as follows. 1502 * 1503 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 1504 * call the insert and delete functions below at creation and deletion 1505 * time respectively. The conn never moves or changes its position in this 1506 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 1507 * won't increase due to walkers, once the conn deletion has started. Note 1508 * that we can't remove the conn from the global list and then wait for 1509 * the refcnt to drop to zero, since walkers would then see a truncated 1510 * list. CONN_INCIPIENT ensures that walkers don't start looking at 1511 * conns until ip_open is ready to make them globally visible. 1512 * The global round robin multi-list locks are held only to get the 1513 * next member/insertion/deletion and contention should be negligible 1514 * if the multi-list is much greater than the number of cpus. 1515 */ 1516 void 1517 ipcl_globalhash_insert(conn_t *connp) 1518 { 1519 int index; 1520 1521 /* 1522 * No need for atomic here. Approximate even distribution 1523 * in the global lists is sufficient. 1524 */ 1525 conn_g_index++; 1526 index = conn_g_index & (CONN_G_HASH_SIZE - 1); 1527 1528 connp->conn_g_prev = NULL; 1529 /* 1530 * Mark as INCIPIENT, so that walkers will ignore this 1531 * for now, till ip_open is ready to make it visible globally. 1532 */ 1533 connp->conn_state_flags |= CONN_INCIPIENT; 1534 1535 /* Insert at the head of the list */ 1536 mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); 1537 connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; 1538 if (connp->conn_g_next != NULL) 1539 connp->conn_g_next->conn_g_prev = connp; 1540 ipcl_globalhash_fanout[index].connf_head = connp; 1541 1542 /* The fanout bucket this conn points to */ 1543 connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; 1544 1545 mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); 1546 } 1547 1548 void 1549 ipcl_globalhash_remove(conn_t *connp) 1550 { 1551 /* 1552 * We were never inserted in the global multi list. 1553 * IPCL_NONE variety is never inserted in the global multilist 1554 * since it is presumed to not need any cleanup and is transient. 1555 */ 1556 if (connp->conn_g_fanout == NULL) 1557 return; 1558 1559 mutex_enter(&connp->conn_g_fanout->connf_lock); 1560 if (connp->conn_g_prev != NULL) 1561 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 1562 else 1563 connp->conn_g_fanout->connf_head = connp->conn_g_next; 1564 if (connp->conn_g_next != NULL) 1565 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 1566 mutex_exit(&connp->conn_g_fanout->connf_lock); 1567 1568 /* Better to stumble on a null pointer than to corrupt memory */ 1569 connp->conn_g_next = NULL; 1570 connp->conn_g_prev = NULL; 1571 } 1572 1573 /* 1574 * Walk the list of all conn_t's in the system, calling the function provided 1575 * with the specified argument for each. 1576 * Applies to both IPv4 and IPv6. 1577 * 1578 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 1579 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 1580 * unplumbed or removed. New conn_t's that are created while we are walking 1581 * may be missed by this walk, because they are not necessarily inserted 1582 * at the tail of the list. They are new conn_t's and thus don't have any 1583 * stale pointers. The CONN_CLOSING flag ensures that no new reference 1584 * is created to the struct that is going away. 1585 */ 1586 void 1587 ipcl_walk(pfv_t func, void *arg) 1588 { 1589 int i; 1590 conn_t *connp; 1591 conn_t *prev_connp; 1592 1593 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 1594 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1595 prev_connp = NULL; 1596 connp = ipcl_globalhash_fanout[i].connf_head; 1597 while (connp != NULL) { 1598 mutex_enter(&connp->conn_lock); 1599 if (connp->conn_state_flags & 1600 (CONN_CONDEMNED | CONN_INCIPIENT)) { 1601 mutex_exit(&connp->conn_lock); 1602 connp = connp->conn_g_next; 1603 continue; 1604 } 1605 CONN_INC_REF_LOCKED(connp); 1606 mutex_exit(&connp->conn_lock); 1607 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1608 (*func)(connp, arg); 1609 if (prev_connp != NULL) 1610 CONN_DEC_REF(prev_connp); 1611 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1612 prev_connp = connp; 1613 connp = connp->conn_g_next; 1614 } 1615 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1616 if (prev_connp != NULL) 1617 CONN_DEC_REF(prev_connp); 1618 } 1619 } 1620 1621 /* 1622 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 1623 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1624 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1625 * (peer tcp in at least ESTABLISHED state). 1626 */ 1627 conn_t * 1628 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) 1629 { 1630 uint32_t ports; 1631 uint16_t *pports = (uint16_t *)&ports; 1632 connf_t *connfp; 1633 conn_t *tconnp; 1634 boolean_t zone_chk; 1635 1636 /* 1637 * If either the source of destination address is loopback, then 1638 * both endpoints must be in the same Zone. Otherwise, both of 1639 * the addresses are system-wide unique (tcp is in ESTABLISHED 1640 * state) and the endpoints may reside in different Zones. 1641 */ 1642 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 1643 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 1644 1645 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1646 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1647 1648 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1649 1650 mutex_enter(&connfp->connf_lock); 1651 for (tconnp = connfp->connf_head; tconnp != NULL; 1652 tconnp = tconnp->conn_next) { 1653 1654 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1655 ipha->ipha_dst, ipha->ipha_src, ports) && 1656 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1657 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1658 1659 ASSERT(tconnp != connp); 1660 CONN_INC_REF(tconnp); 1661 mutex_exit(&connfp->connf_lock); 1662 return (tconnp); 1663 } 1664 } 1665 mutex_exit(&connfp->connf_lock); 1666 return (NULL); 1667 } 1668 1669 /* 1670 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 1671 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1672 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1673 * (peer tcp in at least ESTABLISHED state). 1674 */ 1675 conn_t * 1676 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) 1677 { 1678 uint32_t ports; 1679 uint16_t *pports = (uint16_t *)&ports; 1680 connf_t *connfp; 1681 conn_t *tconnp; 1682 boolean_t zone_chk; 1683 1684 /* 1685 * If either the source of destination address is loopback, then 1686 * both endpoints must be in the same Zone. Otherwise, both of 1687 * the addresses are system-wide unique (tcp is in ESTABLISHED 1688 * state) and the endpoints may reside in different Zones. We 1689 * don't do Zone check for link local address(es) because the 1690 * current Zone implementation treats each link local address as 1691 * being unique per system node, i.e. they belong to global Zone. 1692 */ 1693 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 1694 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 1695 1696 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1697 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1698 1699 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1700 1701 mutex_enter(&connfp->connf_lock); 1702 for (tconnp = connfp->connf_head; tconnp != NULL; 1703 tconnp = tconnp->conn_next) { 1704 1705 /* We skip tcp_bound_if check here as this is loopback tcp */ 1706 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1707 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1708 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1709 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1710 1711 ASSERT(tconnp != connp); 1712 CONN_INC_REF(tconnp); 1713 mutex_exit(&connfp->connf_lock); 1714 return (tconnp); 1715 } 1716 } 1717 mutex_exit(&connfp->connf_lock); 1718 return (NULL); 1719 } 1720 1721 /* 1722 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1723 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1724 * Only checks for connected entries i.e. no INADDR_ANY checks. 1725 */ 1726 conn_t * 1727 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) 1728 { 1729 uint32_t ports; 1730 uint16_t *pports; 1731 connf_t *connfp; 1732 conn_t *tconnp; 1733 1734 pports = (uint16_t *)&ports; 1735 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1736 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1737 1738 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1739 1740 mutex_enter(&connfp->connf_lock); 1741 for (tconnp = connfp->connf_head; tconnp != NULL; 1742 tconnp = tconnp->conn_next) { 1743 1744 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1745 ipha->ipha_dst, ipha->ipha_src, ports) && 1746 tconnp->conn_tcp->tcp_state >= min_state) { 1747 1748 CONN_INC_REF(tconnp); 1749 mutex_exit(&connfp->connf_lock); 1750 return (tconnp); 1751 } 1752 } 1753 mutex_exit(&connfp->connf_lock); 1754 return (NULL); 1755 } 1756 1757 /* 1758 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1759 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1760 * Only checks for connected entries i.e. no INADDR_ANY checks. 1761 * Match on ifindex in addition to addresses. 1762 */ 1763 conn_t * 1764 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 1765 uint_t ifindex) 1766 { 1767 tcp_t *tcp; 1768 uint32_t ports; 1769 uint16_t *pports; 1770 connf_t *connfp; 1771 conn_t *tconnp; 1772 1773 pports = (uint16_t *)&ports; 1774 pports[0] = tcpha->tha_fport; 1775 pports[1] = tcpha->tha_lport; 1776 1777 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1778 1779 mutex_enter(&connfp->connf_lock); 1780 for (tconnp = connfp->connf_head; tconnp != NULL; 1781 tconnp = tconnp->conn_next) { 1782 1783 tcp = tconnp->conn_tcp; 1784 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1785 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1786 tcp->tcp_state >= min_state && 1787 (tcp->tcp_bound_if == 0 || 1788 tcp->tcp_bound_if == ifindex)) { 1789 1790 CONN_INC_REF(tconnp); 1791 mutex_exit(&connfp->connf_lock); 1792 return (tconnp); 1793 } 1794 } 1795 mutex_exit(&connfp->connf_lock); 1796 return (NULL); 1797 } 1798 1799 /* 1800 * To find a TCP listening connection matching the incoming segment. 1801 */ 1802 conn_t * 1803 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) 1804 { 1805 connf_t *bind_connfp; 1806 conn_t *connp; 1807 tcp_t *tcp; 1808 1809 /* 1810 * Avoid false matches for packets sent to an IP destination of 1811 * all zeros. 1812 */ 1813 if (laddr == 0) 1814 return (NULL); 1815 1816 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1817 mutex_enter(&bind_connfp->connf_lock); 1818 for (connp = bind_connfp->connf_head; connp != NULL; 1819 connp = connp->conn_next) { 1820 tcp = connp->conn_tcp; 1821 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 1822 connp->conn_zoneid == zoneid && 1823 (tcp->tcp_listener == NULL)) { 1824 CONN_INC_REF(connp); 1825 mutex_exit(&bind_connfp->connf_lock); 1826 return (connp); 1827 } 1828 } 1829 mutex_exit(&bind_connfp->connf_lock); 1830 return (NULL); 1831 } 1832 1833 1834 conn_t * 1835 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 1836 zoneid_t zoneid) 1837 { 1838 connf_t *bind_connfp; 1839 conn_t *connp = NULL; 1840 tcp_t *tcp; 1841 1842 /* 1843 * Avoid false matches for packets sent to an IP destination of 1844 * all zeros. 1845 */ 1846 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 1847 return (NULL); 1848 1849 1850 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1851 mutex_enter(&bind_connfp->connf_lock); 1852 for (connp = bind_connfp->connf_head; connp != NULL; 1853 connp = connp->conn_next) { 1854 tcp = connp->conn_tcp; 1855 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 1856 connp->conn_zoneid == zoneid && 1857 (tcp->tcp_bound_if == 0 || 1858 tcp->tcp_bound_if == ifindex) && 1859 tcp->tcp_listener == NULL) { 1860 CONN_INC_REF(connp); 1861 mutex_exit(&bind_connfp->connf_lock); 1862 return (connp); 1863 } 1864 } 1865 mutex_exit(&bind_connfp->connf_lock); 1866 return (NULL); 1867 } 1868 1869 /* 1870 * ipcl_get_next_conn 1871 * get the next entry in the conn global list 1872 * and put a reference on the next_conn. 1873 * decrement the reference on the current conn. 1874 * 1875 * This is an iterator based walker function that also provides for 1876 * some selection by the caller. It walks through the conn_hash bucket 1877 * searching for the next valid connp in the list, and selects connections 1878 * that are neither closed nor condemned. It also REFHOLDS the conn 1879 * thus ensuring that the conn exists when the caller uses the conn. 1880 */ 1881 conn_t * 1882 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 1883 { 1884 conn_t *next_connp; 1885 1886 if (connfp == NULL) 1887 return (NULL); 1888 1889 mutex_enter(&connfp->connf_lock); 1890 1891 next_connp = (connp == NULL) ? 1892 connfp->connf_head : connp->conn_g_next; 1893 1894 while (next_connp != NULL) { 1895 mutex_enter(&next_connp->conn_lock); 1896 if (!(next_connp->conn_flags & conn_flags) || 1897 (next_connp->conn_state_flags & 1898 (CONN_CONDEMNED | CONN_INCIPIENT))) { 1899 /* 1900 * This conn has been condemned or 1901 * is closing, or the flags don't match 1902 */ 1903 mutex_exit(&next_connp->conn_lock); 1904 next_connp = next_connp->conn_g_next; 1905 continue; 1906 } 1907 CONN_INC_REF_LOCKED(next_connp); 1908 mutex_exit(&next_connp->conn_lock); 1909 break; 1910 } 1911 1912 mutex_exit(&connfp->connf_lock); 1913 1914 if (connp != NULL) 1915 CONN_DEC_REF(connp); 1916 1917 return (next_connp); 1918 } 1919 1920 #ifdef CONN_DEBUG 1921 /* 1922 * Trace of the last NBUF refhold/refrele 1923 */ 1924 int 1925 conn_trace_ref(conn_t *connp) 1926 { 1927 int last; 1928 conn_trace_t *ctb; 1929 1930 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1931 last = connp->conn_trace_last; 1932 last++; 1933 if (last == CONN_TRACE_MAX) 1934 last = 0; 1935 1936 ctb = &connp->conn_trace_buf[last]; 1937 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1938 connp->conn_trace_last = last; 1939 return (1); 1940 } 1941 1942 int 1943 conn_untrace_ref(conn_t *connp) 1944 { 1945 int last; 1946 conn_trace_t *ctb; 1947 1948 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1949 last = connp->conn_trace_last; 1950 last++; 1951 if (last == CONN_TRACE_MAX) 1952 last = 0; 1953 1954 ctb = &connp->conn_trace_buf[last]; 1955 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1956 connp->conn_trace_last = last; 1957 return (1); 1958 } 1959 #endif 1960