1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char ipclassifier_version[] = "@(#)ipclassifier.c 1.6 04/03/31 SMI"; 30 31 /* 32 * IP PACKET CLASSIFIER 33 * 34 * The IP packet classifier provides mapping between IP packets and persistent 35 * connection state for connection-oriented protocols. It also provides 36 * interface for managing connection states. 37 * 38 * The connection state is kept in conn_t data structure and contains, among 39 * other things: 40 * 41 * o local/remote address and ports 42 * o Transport protocol 43 * o squeue for the connection (for TCP only) 44 * o reference counter 45 * o Connection state 46 * o hash table linkage 47 * o interface/ire information 48 * o credentials 49 * o ipsec policy 50 * o send and receive functions. 51 * o mutex lock. 52 * 53 * Connections use a reference counting scheme. They are freed when the 54 * reference counter drops to zero. A reference is incremented when connection 55 * is placed in a list or table, when incoming packet for the connection arrives 56 * and when connection is processed via squeue (squeue processing may be 57 * asynchronous and the reference protects the connection from being destroyed 58 * before its processing is finished). 59 * 60 * send and receive functions are currently used for TCP only. The send function 61 * determines the IP entry point for the packet once it leaves TCP to be sent to 62 * the destination address. The receive function is used by IP when the packet 63 * should be passed for TCP processing. When a new connection is created these 64 * are set to ip_output() and tcp_input() respectively. During the lifetime of 65 * the connection the send and receive functions may change depending on the 66 * changes in the connection state. For example, Once the connection is bound to 67 * an addresse, the receive function for this connection is set to 68 * tcp_conn_request(). This allows incoming SYNs to go directly into the 69 * listener SYN processing function without going to tcp_input() first. 70 * 71 * Classifier uses several hash tables: 72 * 73 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 74 * ipcl_bind_fanout: contains all connections in BOUND state 75 * ipcl_proto_fanout: IPv4 protocol fanout 76 * ipcl_proto_fanout_v6: IPv6 protocol fanout 77 * ipcl_udp_fanout: contains all UDP connections 78 * ipcl_globalhash_fanout: contains all connections 79 * 80 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 81 * which need to view all existing connections. 82 * 83 * All tables are protected by per-bucket locks. When both per-bucket lock and 84 * connection lock need to be held, the per-bucket lock should be acquired 85 * first, followed by the connection lock. 86 * 87 * All functions doing search in one of these tables increment a reference 88 * counter on the connection found (if any). This reference should be dropped 89 * when the caller has finished processing the connection. 90 * 91 * 92 * INTERFACES: 93 * =========== 94 * 95 * Connection Lookup: 96 * ------------------ 97 * 98 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) 99 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) 100 * 101 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 102 * it can't find any associated connection. If the connection is found, its 103 * reference counter is incremented. 104 * 105 * mp: mblock, containing packet header. The full header should fit 106 * into a single mblock. It should also contain at least full IP 107 * and TCP or UDP header. 108 * 109 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 110 * 111 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 112 * the packet. 113 * 114 * zoneid: The zone in which the returned connection must be. 115 * 116 * For TCP connections, the lookup order is as follows: 117 * 5-tuple {src, dst, protocol, local port, remote port} 118 * lookup in ipcl_conn_fanout table. 119 * 3-tuple {dst, remote port, protocol} lookup in 120 * ipcl_bind_fanout table. 121 * 122 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 123 * remote port} lookup is done on ipcl_udp_fanout. Note that, 124 * these interfaces do not handle cases where a packets belongs 125 * to multiple UDP clients, which is handled in IP itself. 126 * 127 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); 128 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); 129 * 130 * Lookup routine to find a exact match for {src, dst, local port, 131 * remote port) for TCP connections in ipcl_conn_fanout. The address and 132 * ports are read from the IP and TCP header respectively. 133 * 134 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); 135 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); 136 * 137 * Lookup routine to find a listener with the tuple {lport, laddr, 138 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 139 * parameter interface index is also compared. 140 * 141 * void ipcl_walk(func, arg) 142 * 143 * Apply 'func' to every connection available. The 'func' is called as 144 * (*func)(connp, arg). The walk is non-atomic so connections may be 145 * created and destroyed during the walk. The CONN_CONDEMNED and 146 * CONN_INCIPIENT flags ensure that connections which are newly created 147 * or being destroyed are not selected by the walker. 148 * 149 * Table Updates 150 * ------------- 151 * 152 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 153 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 154 * 155 * Insert 'connp' in the ipcl_conn_fanout. 156 * Arguements : 157 * connp conn_t to be inserted 158 * protocol connection protocol 159 * src source address 160 * dst destination address 161 * ports local and remote port 162 * ifindex interface index for IPv6 connections 163 * 164 * Return value : 165 * 0 if connp was inserted 166 * EADDRINUSE if the connection with the same tuple 167 * already exists. 168 * 169 * int ipcl_bind_insert(connp, protocol, src, lport); 170 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 171 * 172 * Insert 'connp' in ipcl_bind_fanout. 173 * Arguements : 174 * connp conn_t to be inserted 175 * protocol connection protocol 176 * src source address connection wants 177 * to bind to 178 * lport local port connection wants to 179 * bind to 180 * 181 * 182 * void ipcl_hash_remove(connp); 183 * 184 * Removes the 'connp' from the connection fanout table. 185 * 186 * Connection Creation/Destruction 187 * ------------------------------- 188 * 189 * conn_t *ipcl_conn_create(type, sleep) 190 * 191 * Creates a new conn based on the type flag, inserts it into 192 * globalhash table. 193 * 194 * type: This flag determines the type of conn_t which needs to be 195 * created. 196 * IPCL_TCPCONN indicates a TCP connection 197 * IPCL_IPCONN indicates all non-TCP connections. 198 * 199 * void ipcl_conn_destroy(connp) 200 * 201 * Destroys the connection state, removes it from the global 202 * connection hash table and frees its memory. 203 */ 204 205 #include <sys/types.h> 206 #include <sys/stream.h> 207 #include <sys/dlpi.h> 208 #include <sys/stropts.h> 209 #include <sys/sysmacros.h> 210 #include <sys/strsubr.h> 211 #include <sys/strlog.h> 212 #include <sys/strsun.h> 213 #define _SUN_TPI_VERSION 2 214 #include <sys/ddi.h> 215 #include <sys/cmn_err.h> 216 #include <sys/debug.h> 217 218 #include <sys/systm.h> 219 #include <sys/param.h> 220 #include <sys/kmem.h> 221 #include <sys/isa_defs.h> 222 #include <inet/common.h> 223 #include <netinet/ip6.h> 224 #include <netinet/icmp6.h> 225 226 #include <inet/ip.h> 227 #include <inet/ip6.h> 228 #include <inet/tcp.h> 229 #include <inet/tcp_trace.h> 230 #include <inet/ip_multi.h> 231 #include <inet/ip_if.h> 232 #include <inet/ip_ire.h> 233 #include <inet/ip_rts.h> 234 #include <inet/optcom.h> 235 #include <inet/ip_ndp.h> 236 #include <inet/sctp_ip.h> 237 238 #include <sys/ethernet.h> 239 #include <net/if_types.h> 240 #include <sys/cpuvar.h> 241 242 #include <inet/mi.h> 243 #include <inet/ipclassifier.h> 244 #include <inet/ipsec_impl.h> 245 246 #ifdef DEBUG 247 #define IPCL_DEBUG 248 #else 249 #undef IPCL_DEBUG 250 #endif 251 252 #ifdef IPCL_DEBUG 253 int ipcl_debug_level = 0; 254 #define IPCL_DEBUG_LVL(level, args) \ 255 if (ipcl_debug_level & level) { printf args; } 256 #else 257 #define IPCL_DEBUG_LVL(level, args) {; } 258 #endif 259 connf_t *ipcl_conn_fanout; 260 connf_t *ipcl_bind_fanout; 261 connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; 262 connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; 263 connf_t *ipcl_udp_fanout; 264 265 /* A separate hash list for raw socket. */ 266 connf_t *ipcl_raw_fanout; 267 268 connf_t rts_clients; 269 270 /* Old value for compatibility */ 271 uint_t tcp_conn_hash_size = 0; 272 273 /* New value. Zero means choose automatically. */ 274 uint_t ipcl_conn_hash_size = 0; 275 uint_t ipcl_conn_hash_memfactor = 8192; 276 uint_t ipcl_conn_hash_maxsize = 82500; 277 278 uint_t ipcl_conn_fanout_size = 0; 279 280 281 /* bind/udp fanout table size */ 282 uint_t ipcl_bind_fanout_size = 512; 283 uint_t ipcl_udp_fanout_size = 256; 284 285 /* Raw socket fanout size. Must be a power of 2. */ 286 uint_t ipcl_raw_fanout_size = 256; 287 288 /* 289 * Power of 2^N Primes useful for hashing for N of 0-28, 290 * these primes are the nearest prime <= 2^N - 2^(N-2). 291 */ 292 293 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 294 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 295 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 296 50331599, 100663291, 201326557, 0} 297 298 /* 299 * wrapper structure to ensure that conn+tcpb are aligned 300 * on cache lines. 301 */ 302 typedef struct itc_s { 303 union { 304 conn_t itcu_conn; 305 char itcu_filler[CACHE_ALIGN(conn_s)]; 306 } itc_u; 307 tcp_t itc_tcp; 308 } itc_t; 309 310 #define itc_conn itc_u.itcu_conn 311 312 struct kmem_cache *ipcl_tcpconn_cache; 313 struct kmem_cache *ipcl_tcp_cache; 314 struct kmem_cache *ipcl_conn_cache; 315 extern struct kmem_cache *sctp_conn_cache; 316 extern struct kmem_cache *tcp_sack_info_cache; 317 extern struct kmem_cache *tcp_iphc_cache; 318 319 extern void tcp_timermp_free(tcp_t *); 320 extern mblk_t *tcp_timermp_alloc(int); 321 322 static int ipcl_tcpconn_constructor(void *, void *, int); 323 static void ipcl_tcpconn_destructor(void *, void *); 324 325 static int conn_g_index; 326 connf_t *ipcl_globalhash_fanout; 327 328 #ifdef IPCL_DEBUG 329 #define INET_NTOA_BUFSIZE 18 330 331 static char * 332 inet_ntoa_r(uint32_t in, char *b) 333 { 334 unsigned char *p; 335 336 p = (unsigned char *)∈ 337 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 338 return (b); 339 } 340 #endif 341 342 /* 343 * ipclassifier intialization routine, sets up hash tables and 344 * conn caches. 345 */ 346 void 347 ipcl_init(void) 348 { 349 int i; 350 int sizes[] = P2Ps(); 351 352 ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 353 sizeof (conn_t), CACHE_ALIGN_SIZE, 354 NULL, NULL, 355 NULL, NULL, NULL, 0); 356 357 ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 358 sizeof (itc_t), CACHE_ALIGN_SIZE, 359 ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 360 NULL, NULL, NULL, 0); 361 362 /* 363 * Calculate size of conn fanout table. 364 */ 365 if (ipcl_conn_hash_size != 0) { 366 ipcl_conn_fanout_size = ipcl_conn_hash_size; 367 } else if (tcp_conn_hash_size != 0) { 368 ipcl_conn_fanout_size = tcp_conn_hash_size; 369 } else { 370 extern pgcnt_t freemem; 371 372 ipcl_conn_fanout_size = 373 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 374 375 if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) 376 ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; 377 } 378 379 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 380 if (sizes[i] >= ipcl_conn_fanout_size) { 381 break; 382 } 383 } 384 if ((ipcl_conn_fanout_size = sizes[i]) == 0) { 385 /* Out of range, use the 2^16 value */ 386 ipcl_conn_fanout_size = sizes[16]; 387 } 388 ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * 389 sizeof (*ipcl_conn_fanout), KM_SLEEP); 390 391 for (i = 0; i < ipcl_conn_fanout_size; i++) { 392 mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, 393 MUTEX_DEFAULT, NULL); 394 } 395 396 ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * 397 sizeof (*ipcl_bind_fanout), KM_SLEEP); 398 399 for (i = 0; i < ipcl_bind_fanout_size; i++) { 400 mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, 401 MUTEX_DEFAULT, NULL); 402 } 403 404 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { 405 mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, 406 MUTEX_DEFAULT, NULL); 407 } 408 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { 409 mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, 410 MUTEX_DEFAULT, NULL); 411 } 412 413 mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); 414 415 ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * 416 sizeof (*ipcl_udp_fanout), KM_SLEEP); 417 418 for (i = 0; i < ipcl_udp_fanout_size; i++) { 419 mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, 420 MUTEX_DEFAULT, NULL); 421 } 422 423 ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * 424 sizeof (*ipcl_raw_fanout), KM_SLEEP); 425 426 for (i = 0; i < ipcl_raw_fanout_size; i++) { 427 mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, 428 MUTEX_DEFAULT, NULL); 429 } 430 431 ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * 432 CONN_G_HASH_SIZE, KM_SLEEP); 433 434 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 435 mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, 436 MUTEX_DEFAULT, NULL); 437 } 438 } 439 440 void 441 ipcl_destroy(void) 442 { 443 int i; 444 kmem_cache_destroy(ipcl_conn_cache); 445 kmem_cache_destroy(ipcl_tcpconn_cache); 446 for (i = 0; i < ipcl_conn_fanout_size; i++) 447 mutex_destroy(&ipcl_conn_fanout[i].connf_lock); 448 kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * 449 sizeof (*ipcl_conn_fanout)); 450 for (i = 0; i < ipcl_bind_fanout_size; i++) 451 mutex_destroy(&ipcl_bind_fanout[i].connf_lock); 452 kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * 453 sizeof (*ipcl_bind_fanout)); 454 455 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) 456 mutex_destroy(&ipcl_proto_fanout[i].connf_lock); 457 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) 458 mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); 459 460 for (i = 0; i < ipcl_udp_fanout_size; i++) 461 mutex_destroy(&ipcl_udp_fanout[i].connf_lock); 462 kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * 463 sizeof (*ipcl_udp_fanout)); 464 465 for (i = 0; i < ipcl_raw_fanout_size; i++) 466 mutex_destroy(&ipcl_raw_fanout[i].connf_lock); 467 kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * 468 sizeof (*ipcl_raw_fanout)); 469 470 kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); 471 mutex_destroy(&rts_clients.connf_lock); 472 } 473 474 /* 475 * conn creation routine. initialize the conn, sets the reference 476 * and inserts it in the global hash table. 477 */ 478 conn_t * 479 ipcl_conn_create(uint32_t type, int sleep) 480 { 481 itc_t *itc; 482 conn_t *connp; 483 484 switch (type) { 485 case IPCL_TCPCONN: 486 if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 487 sleep)) == NULL) 488 return (NULL); 489 connp = &itc->itc_conn; 490 connp->conn_ref = 1; 491 IPCL_DEBUG_LVL(1, 492 ("ipcl_conn_create: connp = %p tcp (%p)", 493 (void *)connp, (void *)connp->conn_tcp)); 494 ipcl_globalhash_insert(connp); 495 break; 496 case IPCL_SCTPCONN: 497 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 498 return (NULL); 499 connp->conn_flags = IPCL_SCTPCONN; 500 break; 501 case IPCL_IPCCONN: 502 connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 503 if (connp == NULL) 504 return (connp); 505 bzero(connp, sizeof (conn_t)); 506 mutex_init(&connp->conn_lock, NULL, 507 MUTEX_DEFAULT, NULL); 508 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 509 connp->conn_flags |= IPCL_IPCCONN; 510 connp->conn_ref = 1; 511 IPCL_DEBUG_LVL(1, 512 ("ipcl_conn_create: connp = %p\n", (void *)connp)); 513 ipcl_globalhash_insert(connp); 514 break; 515 } 516 517 return (connp); 518 } 519 520 void 521 ipcl_conn_destroy(conn_t *connp) 522 { 523 mblk_t *mp; 524 tcp_t *tcp = connp->conn_tcp; 525 526 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 527 ASSERT(connp->conn_ref == 0); 528 ASSERT(connp->conn_ire_cache == NULL); 529 530 ipcl_globalhash_remove(connp); 531 532 cv_destroy(&connp->conn_cv); 533 if (connp->conn_flags & IPCL_TCPCONN) { 534 mutex_destroy(&connp->conn_lock); 535 ASSERT(connp->conn_tcp != NULL); 536 tcp_free(tcp); 537 mp = tcp->tcp_timercache; 538 539 if (tcp->tcp_sack_info != NULL) { 540 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 541 kmem_cache_free(tcp_sack_info_cache, 542 tcp->tcp_sack_info); 543 } 544 if (tcp->tcp_iphc != NULL) { 545 if (tcp->tcp_hdr_grown) { 546 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 547 } else { 548 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 549 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 550 } 551 tcp->tcp_iphc_len = 0; 552 } 553 ASSERT(tcp->tcp_iphc_len == 0); 554 555 if (connp->conn_latch != NULL) 556 IPLATCH_REFRELE(connp->conn_latch); 557 if (connp->conn_policy != NULL) 558 IPPH_REFRELE(connp->conn_policy); 559 bzero(connp, sizeof (itc_t)); 560 561 tcp->tcp_timercache = mp; 562 connp->conn_tcp = tcp; 563 connp->conn_flags = IPCL_TCPCONN; 564 connp->conn_ulp = IPPROTO_TCP; 565 tcp->tcp_connp = connp; 566 kmem_cache_free(ipcl_tcpconn_cache, connp); 567 } else if (connp->conn_flags & IPCL_SCTPCONN) { 568 sctp_free(connp); 569 } else { 570 mutex_destroy(&connp->conn_lock); 571 kmem_cache_free(ipcl_conn_cache, connp); 572 } 573 } 574 575 /* 576 * Running in cluster mode - deregister listener information 577 */ 578 579 static void 580 ipcl_conn_unlisten(conn_t *connp) 581 { 582 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 583 ASSERT(connp->conn_lport != 0); 584 585 if (cl_inet_unlisten != NULL) { 586 sa_family_t addr_family; 587 uint8_t *laddrp; 588 589 if (connp->conn_pkt_isv6) { 590 addr_family = AF_INET6; 591 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 592 } else { 593 addr_family = AF_INET; 594 laddrp = (uint8_t *)&connp->conn_bound_source; 595 } 596 (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 597 connp->conn_lport); 598 } 599 connp->conn_flags &= ~IPCL_CL_LISTENER; 600 } 601 602 /* 603 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 604 * which table the conn belonged to). So for debugging we can see which hash 605 * table this connection was in. 606 */ 607 #define IPCL_HASH_REMOVE(connp) { \ 608 connf_t *connfp = (connp)->conn_fanout; \ 609 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 610 if (connfp != NULL) { \ 611 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 612 (void *)(connp))); \ 613 mutex_enter(&connfp->connf_lock); \ 614 if ((connp)->conn_next != NULL) \ 615 (connp)->conn_next->conn_prev = \ 616 (connp)->conn_prev; \ 617 if ((connp)->conn_prev != NULL) \ 618 (connp)->conn_prev->conn_next = \ 619 (connp)->conn_next; \ 620 else \ 621 connfp->connf_head = (connp)->conn_next; \ 622 (connp)->conn_fanout = NULL; \ 623 (connp)->conn_next = NULL; \ 624 (connp)->conn_prev = NULL; \ 625 (connp)->conn_flags |= IPCL_REMOVED; \ 626 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 627 ipcl_conn_unlisten((connp)); \ 628 CONN_DEC_REF((connp)); \ 629 mutex_exit(&connfp->connf_lock); \ 630 } \ 631 } 632 633 void 634 ipcl_hash_remove(conn_t *connp) 635 { 636 IPCL_HASH_REMOVE(connp); 637 } 638 639 /* 640 * The whole purpose of this function is allow removal of 641 * a conn_t from the connected hash for timewait reclaim. 642 * This is essentially a TW reclaim fastpath where timewait 643 * collector checks under fanout lock (so no one else can 644 * get access to the conn_t) that refcnt is 2 i.e. one for 645 * TCP and one for the classifier hash list. If ref count 646 * is indeed 2, we can just remove the conn under lock and 647 * avoid cleaning up the conn under squeue. This gives us 648 * improved performance. 649 */ 650 void 651 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 652 { 653 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 654 ASSERT(MUTEX_HELD(&connp->conn_lock)); 655 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 656 657 if ((connp)->conn_next != NULL) { 658 (connp)->conn_next->conn_prev = 659 (connp)->conn_prev; 660 } 661 if ((connp)->conn_prev != NULL) { 662 (connp)->conn_prev->conn_next = 663 (connp)->conn_next; 664 } else { 665 connfp->connf_head = (connp)->conn_next; 666 } 667 (connp)->conn_fanout = NULL; 668 (connp)->conn_next = NULL; 669 (connp)->conn_prev = NULL; 670 (connp)->conn_flags |= IPCL_REMOVED; 671 ASSERT((connp)->conn_ref == 2); 672 (connp)->conn_ref--; 673 } 674 675 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 676 ASSERT((connp)->conn_fanout == NULL); \ 677 ASSERT((connp)->conn_next == NULL); \ 678 ASSERT((connp)->conn_prev == NULL); \ 679 if ((connfp)->connf_head != NULL) { \ 680 (connfp)->connf_head->conn_prev = (connp); \ 681 (connp)->conn_next = (connfp)->connf_head; \ 682 } \ 683 (connp)->conn_fanout = (connfp); \ 684 (connfp)->connf_head = (connp); \ 685 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 686 IPCL_CONNECTED; \ 687 CONN_INC_REF(connp); \ 688 } 689 690 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 691 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 692 "connp %p", (void *)(connfp), (void *)(connp))); \ 693 IPCL_HASH_REMOVE((connp)); \ 694 mutex_enter(&(connfp)->connf_lock); \ 695 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 696 mutex_exit(&(connfp)->connf_lock); \ 697 } 698 699 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 700 conn_t *pconnp = NULL, *nconnp; \ 701 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 702 "connp %p", (void *)connfp, (void *)(connp))); \ 703 IPCL_HASH_REMOVE((connp)); \ 704 mutex_enter(&(connfp)->connf_lock); \ 705 nconnp = (connfp)->connf_head; \ 706 if ((connp)->conn_flags & IPCL_ISV6) { \ 707 while (nconnp != NULL && \ 708 !IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_srcv6)) { \ 709 pconnp = nconnp; \ 710 nconnp = nconnp->conn_next; \ 711 } \ 712 } else { \ 713 while (nconnp != NULL && \ 714 !IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_srcv6)) { \ 715 pconnp = nconnp; \ 716 nconnp = nconnp->conn_next; \ 717 } \ 718 } \ 719 if (pconnp != NULL) { \ 720 pconnp->conn_next = (connp); \ 721 (connp)->conn_prev = pconnp; \ 722 } else { \ 723 (connfp)->connf_head = (connp); \ 724 } \ 725 if (nconnp != NULL) { \ 726 (connp)->conn_next = nconnp; \ 727 nconnp->conn_prev = (connp); \ 728 } \ 729 (connp)->conn_fanout = (connfp); \ 730 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 731 IPCL_BOUND; \ 732 CONN_INC_REF(connp); \ 733 mutex_exit(&(connfp)->connf_lock); \ 734 } 735 736 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 737 conn_t **list, *prev, *next; \ 738 boolean_t isv4mapped = \ 739 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 740 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 741 "connp %p", (void *)(connfp), (void *)(connp))); \ 742 IPCL_HASH_REMOVE((connp)); \ 743 mutex_enter(&(connfp)->connf_lock); \ 744 list = &(connfp)->connf_head; \ 745 prev = NULL; \ 746 while ((next = *list) != NULL) { \ 747 if (isv4mapped && \ 748 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 749 connp->conn_zoneid == next->conn_zoneid) { \ 750 (connp)->conn_next = next; \ 751 if (prev != NULL) \ 752 prev = next->conn_prev; \ 753 next->conn_prev = (connp); \ 754 break; \ 755 } \ 756 list = &next->conn_next; \ 757 prev = next; \ 758 } \ 759 (connp)->conn_prev = prev; \ 760 *list = (connp); \ 761 (connp)->conn_fanout = (connfp); \ 762 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 763 IPCL_BOUND; \ 764 CONN_INC_REF((connp)); \ 765 mutex_exit(&(connfp)->connf_lock); \ 766 } 767 768 void 769 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 770 { 771 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 772 } 773 774 void 775 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 776 { 777 connf_t *connfp; 778 779 ASSERT(connp != NULL); 780 781 connp->conn_ulp = protocol; 782 783 /* Insert it in the protocol hash */ 784 connfp = &ipcl_proto_fanout[protocol]; 785 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 786 } 787 788 void 789 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 790 { 791 connf_t *connfp; 792 793 ASSERT(connp != NULL); 794 795 connp->conn_ulp = protocol; 796 797 /* Insert it in the Bind Hash */ 798 connfp = &ipcl_proto_fanout_v6[protocol]; 799 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 800 } 801 802 /* 803 * This function is used only for inserting SCTP raw socket now. 804 * This may change later. 805 * 806 * Note that only one raw socket can be bound to a port. The param 807 * lport is in network byte order. 808 */ 809 static int 810 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 811 { 812 connf_t *connfp; 813 conn_t *oconnp; 814 815 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 816 817 /* Check for existing raw socket already bound to the port. */ 818 mutex_enter(&connfp->connf_lock); 819 for (oconnp = connfp->connf_head; oconnp != NULL; 820 oconnp = connp->conn_next) { 821 if (oconnp->conn_lport == lport && 822 oconnp->conn_zoneid == connp->conn_zoneid && 823 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 824 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 825 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 826 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 827 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 828 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 829 &connp->conn_srcv6))) { 830 break; 831 } 832 } 833 mutex_exit(&connfp->connf_lock); 834 if (oconnp != NULL) 835 return (EADDRNOTAVAIL); 836 837 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 838 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 839 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 840 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 841 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 842 } else { 843 IPCL_HASH_INSERT_BOUND(connfp, connp); 844 } 845 } else { 846 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 847 } 848 return (0); 849 } 850 851 /* 852 * (v4, v6) bind hash insertion routines 853 */ 854 int 855 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 856 { 857 connf_t *connfp; 858 #ifdef IPCL_DEBUG 859 char buf[INET_NTOA_BUFSIZE]; 860 #endif 861 int ret = 0; 862 863 ASSERT(connp); 864 865 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 866 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 867 868 connp->conn_ulp = protocol; 869 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 870 connp->conn_lport = lport; 871 872 switch (protocol) { 873 case IPPROTO_UDP: 874 default: 875 if (protocol == IPPROTO_UDP) { 876 IPCL_DEBUG_LVL(64, 877 ("ipcl_bind_insert: connp %p - udp\n", 878 (void *)connp)); 879 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 880 } else { 881 IPCL_DEBUG_LVL(64, 882 ("ipcl_bind_insert: connp %p - protocol\n", 883 (void *)connp)); 884 connfp = &ipcl_proto_fanout[protocol]; 885 } 886 887 if (connp->conn_rem != INADDR_ANY) { 888 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 889 } else if (connp->conn_src != INADDR_ANY) { 890 IPCL_HASH_INSERT_BOUND(connfp, connp); 891 } else { 892 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 893 } 894 break; 895 896 case IPPROTO_TCP: 897 898 /* Insert it in the Bind Hash */ 899 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 900 if (connp->conn_src != INADDR_ANY) { 901 IPCL_HASH_INSERT_BOUND(connfp, connp); 902 } else { 903 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 904 } 905 if (cl_inet_listen != NULL) { 906 ASSERT(!connp->conn_pkt_isv6); 907 connp->conn_flags |= IPCL_CL_LISTENER; 908 (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 909 (uint8_t *)&connp->conn_bound_source, lport); 910 } 911 break; 912 913 case IPPROTO_SCTP: 914 ret = ipcl_sctp_hash_insert(connp, lport); 915 break; 916 } 917 918 return (ret); 919 } 920 921 int 922 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 923 uint16_t lport) 924 { 925 connf_t *connfp; 926 int ret = 0; 927 928 ASSERT(connp); 929 930 connp->conn_ulp = protocol; 931 connp->conn_srcv6 = *src; 932 connp->conn_lport = lport; 933 934 switch (protocol) { 935 case IPPROTO_UDP: 936 default: 937 if (protocol == IPPROTO_UDP) { 938 IPCL_DEBUG_LVL(128, 939 ("ipcl_bind_insert_v6: connp %p - udp\n", 940 (void *)connp)); 941 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 942 } else { 943 IPCL_DEBUG_LVL(128, 944 ("ipcl_bind_insert_v6: connp %p - protocol\n", 945 (void *)connp)); 946 connfp = &ipcl_proto_fanout_v6[protocol]; 947 } 948 949 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 950 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 951 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 952 IPCL_HASH_INSERT_BOUND(connfp, connp); 953 } else { 954 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 955 } 956 break; 957 958 case IPPROTO_TCP: 959 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 960 961 /* Insert it in the Bind Hash */ 962 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 963 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 964 IPCL_HASH_INSERT_BOUND(connfp, connp); 965 } else { 966 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 967 } 968 if (cl_inet_listen != NULL) { 969 sa_family_t addr_family; 970 uint8_t *laddrp; 971 972 if (connp->conn_pkt_isv6) { 973 addr_family = AF_INET6; 974 laddrp = 975 (uint8_t *)&connp->conn_bound_source_v6; 976 } else { 977 addr_family = AF_INET; 978 laddrp = (uint8_t *)&connp->conn_bound_source; 979 } 980 connp->conn_flags |= IPCL_CL_LISTENER; 981 (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 982 lport); 983 } 984 break; 985 986 case IPPROTO_SCTP: 987 ret = ipcl_sctp_hash_insert(connp, lport); 988 break; 989 } 990 991 return (ret); 992 } 993 994 /* 995 * ipcl_conn_hash insertion routines. 996 */ 997 int 998 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 999 ipaddr_t rem, uint32_t ports) 1000 { 1001 connf_t *connfp; 1002 uint16_t *up; 1003 conn_t *tconnp; 1004 #ifdef IPCL_DEBUG 1005 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 1006 #endif 1007 in_port_t lport; 1008 int ret = 0; 1009 1010 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1011 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1012 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1013 ports, protocol)); 1014 1015 switch (protocol) { 1016 case IPPROTO_TCP: 1017 if (!(connp->conn_flags & IPCL_EAGER)) { 1018 /* 1019 * for a eager connection, i.e connections which 1020 * have just been created, the initialization is 1021 * already done in ip at conn_creation time, so 1022 * we can skip the checks here. 1023 */ 1024 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1025 } 1026 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, 1027 connp->conn_ports)]; 1028 mutex_enter(&connfp->connf_lock); 1029 for (tconnp = connfp->connf_head; tconnp != NULL; 1030 tconnp = tconnp->conn_next) { 1031 if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1032 connp->conn_rem, connp->conn_src, 1033 connp->conn_ports)) { 1034 1035 /* Already have a conn. bail out */ 1036 mutex_exit(&connfp->connf_lock); 1037 return (EADDRINUSE); 1038 } 1039 } 1040 if (connp->conn_fanout != NULL) { 1041 /* 1042 * Probably a XTI/TLI application trying to do a 1043 * rebind. Let it happen. 1044 */ 1045 mutex_exit(&connfp->connf_lock); 1046 IPCL_HASH_REMOVE(connp); 1047 mutex_enter(&connfp->connf_lock); 1048 } 1049 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1050 mutex_exit(&connfp->connf_lock); 1051 break; 1052 1053 case IPPROTO_SCTP: 1054 lport = (uint16_t)(ntohl(ports) & 0xFFFF); 1055 ret = ipcl_sctp_hash_insert(connp, lport); 1056 break; 1057 1058 case IPPROTO_UDP: 1059 default: 1060 up = (uint16_t *)&ports; 1061 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1062 if (protocol == IPPROTO_UDP) { 1063 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1064 } else { 1065 connfp = &ipcl_proto_fanout[protocol]; 1066 } 1067 1068 if (connp->conn_rem != INADDR_ANY) { 1069 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1070 } else if (connp->conn_src != INADDR_ANY) { 1071 IPCL_HASH_INSERT_BOUND(connfp, connp); 1072 } else { 1073 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1074 } 1075 break; 1076 } 1077 1078 return (ret); 1079 } 1080 1081 int 1082 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1083 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1084 { 1085 connf_t *connfp; 1086 uint16_t *up; 1087 conn_t *tconnp; 1088 in_port_t lport; 1089 int ret = 0; 1090 1091 switch (protocol) { 1092 case IPPROTO_TCP: 1093 /* Just need to insert a conn struct */ 1094 if (!(connp->conn_flags & IPCL_EAGER)) { 1095 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1096 } 1097 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, 1098 connp->conn_ports)]; 1099 mutex_enter(&connfp->connf_lock); 1100 for (tconnp = connfp->connf_head; tconnp != NULL; 1101 tconnp = tconnp->conn_next) { 1102 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1103 connp->conn_remv6, connp->conn_srcv6, 1104 connp->conn_ports) && 1105 (tconnp->conn_tcp->tcp_bound_if == 0 || 1106 tconnp->conn_tcp->tcp_bound_if == ifindex)) { 1107 /* Already have a conn. bail out */ 1108 mutex_exit(&connfp->connf_lock); 1109 return (EADDRINUSE); 1110 } 1111 } 1112 if (connp->conn_fanout != NULL) { 1113 /* 1114 * Probably a XTI/TLI application trying to do a 1115 * rebind. Let it happen. 1116 */ 1117 mutex_exit(&connfp->connf_lock); 1118 IPCL_HASH_REMOVE(connp); 1119 mutex_enter(&connfp->connf_lock); 1120 } 1121 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1122 mutex_exit(&connfp->connf_lock); 1123 break; 1124 1125 case IPPROTO_SCTP: 1126 lport = (uint16_t)(ntohl(ports) & 0xFFFF); 1127 ret = ipcl_sctp_hash_insert(connp, lport); 1128 break; 1129 1130 case IPPROTO_UDP: 1131 default: 1132 up = (uint16_t *)&ports; 1133 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1134 if (protocol == IPPROTO_UDP) { 1135 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1136 } else { 1137 connfp = &ipcl_proto_fanout_v6[protocol]; 1138 } 1139 1140 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1141 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1142 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1143 IPCL_HASH_INSERT_BOUND(connfp, connp); 1144 } else { 1145 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1146 } 1147 break; 1148 } 1149 1150 return (ret); 1151 } 1152 1153 /* 1154 * v4 packet classifying function. looks up the fanout table to 1155 * find the conn, the packet belongs to. returns the conn with 1156 * the reference held, null otherwise. 1157 */ 1158 conn_t * 1159 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1160 { 1161 ipha_t *ipha; 1162 connf_t *connfp, *bind_connfp; 1163 uint16_t lport; 1164 uint16_t fport; 1165 uint32_t ports; 1166 conn_t *connp; 1167 uint16_t *up; 1168 1169 ipha = (ipha_t *)mp->b_rptr; 1170 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1171 1172 switch (protocol) { 1173 case IPPROTO_TCP: 1174 ports = *(uint32_t *)up; 1175 connfp = 1176 &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; 1177 mutex_enter(&connfp->connf_lock); 1178 for (connp = connfp->connf_head; connp != NULL; 1179 connp = connp->conn_next) { 1180 if (IPCL_CONN_MATCH(connp, protocol, 1181 ipha->ipha_src, ipha->ipha_dst, ports)) 1182 break; 1183 } 1184 1185 if (connp != NULL) { 1186 CONN_INC_REF(connp); 1187 mutex_exit(&connfp->connf_lock); 1188 return (connp); 1189 } 1190 1191 mutex_exit(&connfp->connf_lock); 1192 1193 lport = up[1]; 1194 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1195 mutex_enter(&bind_connfp->connf_lock); 1196 for (connp = bind_connfp->connf_head; connp != NULL; 1197 connp = connp->conn_next) { 1198 if (IPCL_BIND_MATCH(connp, protocol, 1199 ipha->ipha_dst, lport) && 1200 connp->conn_zoneid == zoneid) 1201 break; 1202 } 1203 1204 if (connp != NULL) { 1205 /* Have a listner at least */ 1206 CONN_INC_REF(connp); 1207 mutex_exit(&bind_connfp->connf_lock); 1208 return (connp); 1209 } 1210 1211 mutex_exit(&bind_connfp->connf_lock); 1212 1213 IPCL_DEBUG_LVL(512, 1214 ("ipcl_classify: couldn't classify mp = %p\n", 1215 (void *)mp)); 1216 break; 1217 1218 case IPPROTO_UDP: 1219 lport = up[1]; 1220 fport = up[0]; 1221 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1222 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1223 mutex_enter(&connfp->connf_lock); 1224 for (connp = connfp->connf_head; connp != NULL; 1225 connp = connp->conn_next) { 1226 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1227 fport, ipha->ipha_src) && 1228 connp->conn_zoneid == zoneid) 1229 break; 1230 } 1231 1232 if (connp != NULL) { 1233 CONN_INC_REF(connp); 1234 mutex_exit(&connfp->connf_lock); 1235 return (connp); 1236 } 1237 1238 /* 1239 * We shouldn't come here for multicast/broadcast packets 1240 */ 1241 mutex_exit(&connfp->connf_lock); 1242 IPCL_DEBUG_LVL(512, 1243 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1244 lport, fport)); 1245 break; 1246 } 1247 1248 return (NULL); 1249 } 1250 1251 conn_t * 1252 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1253 { 1254 ip6_t *ip6h; 1255 connf_t *connfp, *bind_connfp; 1256 uint16_t lport; 1257 uint16_t fport; 1258 tcph_t *tcph; 1259 uint32_t ports; 1260 conn_t *connp; 1261 uint16_t *up; 1262 1263 1264 ip6h = (ip6_t *)mp->b_rptr; 1265 1266 switch (protocol) { 1267 case IPPROTO_TCP: 1268 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1269 up = (uint16_t *)tcph->th_lport; 1270 ports = *(uint32_t *)up; 1271 1272 connfp = 1273 &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; 1274 mutex_enter(&connfp->connf_lock); 1275 for (connp = connfp->connf_head; connp != NULL; 1276 connp = connp->conn_next) { 1277 if (IPCL_CONN_MATCH_V6(connp, protocol, 1278 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1279 break; 1280 } 1281 1282 if (connp != NULL) { 1283 CONN_INC_REF(connp); 1284 mutex_exit(&connfp->connf_lock); 1285 return (connp); 1286 } 1287 1288 mutex_exit(&connfp->connf_lock); 1289 1290 lport = up[1]; 1291 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1292 mutex_enter(&bind_connfp->connf_lock); 1293 for (connp = bind_connfp->connf_head; connp != NULL; 1294 connp = connp->conn_next) { 1295 if (IPCL_BIND_MATCH_V6(connp, protocol, 1296 ip6h->ip6_dst, lport) && 1297 connp->conn_zoneid == zoneid) 1298 break; 1299 } 1300 1301 if (connp != NULL) { 1302 /* Have a listner at least */ 1303 CONN_INC_REF(connp); 1304 mutex_exit(&bind_connfp->connf_lock); 1305 IPCL_DEBUG_LVL(512, 1306 ("ipcl_classify_v6: found listner " 1307 "connp = %p\n", (void *)connp)); 1308 1309 return (connp); 1310 } 1311 1312 mutex_exit(&bind_connfp->connf_lock); 1313 1314 IPCL_DEBUG_LVL(512, 1315 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1316 (void *)mp)); 1317 break; 1318 1319 case IPPROTO_UDP: 1320 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1321 lport = up[1]; 1322 fport = up[0]; 1323 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 1324 fport)); 1325 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1326 mutex_enter(&connfp->connf_lock); 1327 for (connp = connfp->connf_head; connp != NULL; 1328 connp = connp->conn_next) { 1329 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1330 fport, ip6h->ip6_src) && 1331 connp->conn_zoneid == zoneid) 1332 break; 1333 } 1334 1335 if (connp != NULL) { 1336 CONN_INC_REF(connp); 1337 mutex_exit(&connfp->connf_lock); 1338 return (connp); 1339 } 1340 1341 /* 1342 * We shouldn't come here for multicast/broadcast packets 1343 */ 1344 mutex_exit(&connfp->connf_lock); 1345 IPCL_DEBUG_LVL(512, 1346 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 1347 lport, fport)); 1348 break; 1349 } 1350 1351 1352 return (NULL); 1353 } 1354 1355 /* 1356 * wrapper around ipcl_classify_(v4,v6) routines. 1357 */ 1358 conn_t * 1359 ipcl_classify(mblk_t *mp, zoneid_t zoneid) 1360 { 1361 uint16_t hdr_len; 1362 ipha_t *ipha; 1363 uint8_t *nexthdrp; 1364 1365 if (MBLKL(mp) < sizeof (ipha_t)) 1366 return (NULL); 1367 1368 switch (IPH_HDR_VERSION(mp->b_rptr)) { 1369 case IPV4_VERSION: 1370 ipha = (ipha_t *)mp->b_rptr; 1371 hdr_len = IPH_HDR_LENGTH(ipha); 1372 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1373 zoneid)); 1374 case IPV6_VERSION: 1375 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 1376 &hdr_len, &nexthdrp)) 1377 return (NULL); 1378 1379 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); 1380 } 1381 1382 return (NULL); 1383 } 1384 1385 conn_t * 1386 ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports, 1387 ipha_t *hdr) 1388 { 1389 struct connf_s *connfp; 1390 conn_t *connp; 1391 in_port_t lport; 1392 int af; 1393 1394 lport = ((uint16_t *)&ports)[1]; 1395 af = IPH_HDR_VERSION(hdr); 1396 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 1397 1398 mutex_enter(&connfp->connf_lock); 1399 for (connp = connfp->connf_head; connp != NULL; 1400 connp = connp->conn_next) { 1401 /* We don't allow v4 fallback for v6 raw socket. */ 1402 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1403 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1404 continue; 1405 } 1406 if (connp->conn_fully_bound) { 1407 if (af == IPV4_VERSION) { 1408 if (IPCL_CONN_MATCH(connp, protocol, 1409 hdr->ipha_src, hdr->ipha_dst, ports)) { 1410 break; 1411 } 1412 } else { 1413 if (IPCL_CONN_MATCH_V6(connp, protocol, 1414 ((ip6_t *)hdr)->ip6_src, 1415 ((ip6_t *)hdr)->ip6_dst, ports)) { 1416 break; 1417 } 1418 } 1419 } else { 1420 if (af == IPV4_VERSION) { 1421 if (IPCL_BIND_MATCH(connp, protocol, 1422 hdr->ipha_dst, lport)) { 1423 break; 1424 } 1425 } else { 1426 if (IPCL_BIND_MATCH_V6(connp, protocol, 1427 ((ip6_t *)hdr)->ip6_dst, lport)) { 1428 break; 1429 } 1430 } 1431 } 1432 } 1433 if (connp != NULL) { 1434 CONN_INC_REF(connp); 1435 mutex_exit(&connfp->connf_lock); 1436 return (connp); 1437 } 1438 mutex_exit(&connfp->connf_lock); 1439 return (NULL); 1440 } 1441 1442 /* ARGSUSED */ 1443 static int 1444 ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 1445 { 1446 itc_t *itc = (itc_t *)buf; 1447 conn_t *connp = &itc->itc_conn; 1448 tcp_t *tcp = &itc->itc_tcp; 1449 bzero(itc, sizeof (itc_t)); 1450 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 1451 connp->conn_tcp = tcp; 1452 connp->conn_flags = IPCL_TCPCONN; 1453 connp->conn_ulp = IPPROTO_TCP; 1454 tcp->tcp_connp = connp; 1455 return (0); 1456 } 1457 1458 /* ARGSUSED */ 1459 static void 1460 ipcl_tcpconn_destructor(void *buf, void *cdrarg) 1461 { 1462 tcp_timermp_free(((conn_t *)buf)->conn_tcp); 1463 } 1464 1465 /* 1466 * All conns are inserted in a global multi-list for the benefit of 1467 * walkers. The walk is guaranteed to walk all open conns at the time 1468 * of the start of the walk exactly once. This property is needed to 1469 * achieve some cleanups during unplumb of interfaces. This is achieved 1470 * as follows. 1471 * 1472 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 1473 * call the insert and delete functions below at creation and deletion 1474 * time respectively. The conn never moves or changes its position in this 1475 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 1476 * won't increase due to walkers, once the conn deletion has started. Note 1477 * that we can't remove the conn from the global list and then wait for 1478 * the refcnt to drop to zero, since walkers would then see a truncated 1479 * list. CONN_INCIPIENT ensures that walkers don't start looking at 1480 * conns until ip_open is ready to make them globally visible. 1481 * The global round robin multi-list locks are held only to get the 1482 * next member/insertion/deletion and contention should be negligible 1483 * if the multi-list is much greater than the number of cpus. 1484 */ 1485 void 1486 ipcl_globalhash_insert(conn_t *connp) 1487 { 1488 int index; 1489 1490 /* 1491 * No need for atomic here. Approximate even distribution 1492 * in the global lists is sufficient. 1493 */ 1494 conn_g_index++; 1495 index = conn_g_index & (CONN_G_HASH_SIZE - 1); 1496 1497 connp->conn_g_prev = NULL; 1498 /* 1499 * Mark as INCIPIENT, so that walkers will ignore this 1500 * for now, till ip_open is ready to make it visible globally. 1501 */ 1502 connp->conn_state_flags |= CONN_INCIPIENT; 1503 1504 /* Insert at the head of the list */ 1505 mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); 1506 connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; 1507 if (connp->conn_g_next != NULL) 1508 connp->conn_g_next->conn_g_prev = connp; 1509 ipcl_globalhash_fanout[index].connf_head = connp; 1510 1511 /* The fanout bucket this conn points to */ 1512 connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; 1513 1514 mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); 1515 } 1516 1517 void 1518 ipcl_globalhash_remove(conn_t *connp) 1519 { 1520 /* 1521 * We were never inserted in the global multi list. 1522 * IPCL_NONE variety is never inserted in the global multilist 1523 * since it is presumed to not need any cleanup and is transient. 1524 */ 1525 if (connp->conn_g_fanout == NULL) 1526 return; 1527 1528 mutex_enter(&connp->conn_g_fanout->connf_lock); 1529 if (connp->conn_g_prev != NULL) 1530 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 1531 else 1532 connp->conn_g_fanout->connf_head = connp->conn_g_next; 1533 if (connp->conn_g_next != NULL) 1534 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 1535 mutex_exit(&connp->conn_g_fanout->connf_lock); 1536 1537 /* Better to stumble on a null pointer than to corrupt memory */ 1538 connp->conn_g_next = NULL; 1539 connp->conn_g_prev = NULL; 1540 } 1541 1542 /* 1543 * Walk the list of all conn_t's in the system, calling the function provided 1544 * with the specified argument for each. 1545 * Applies to both IPv4 and IPv6. 1546 * 1547 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 1548 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 1549 * unplumbed or removed. New conn_t's that are created while we are walking 1550 * may be missed by this walk, because they are not necessarily inserted 1551 * at the tail of the list. They are new conn_t's and thus don't have any 1552 * stale pointers. The CONN_CLOSING flag ensures that no new reference 1553 * is created to the struct that is going away. 1554 */ 1555 void 1556 ipcl_walk(pfv_t func, void *arg) 1557 { 1558 int i; 1559 conn_t *connp; 1560 conn_t *prev_connp; 1561 1562 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 1563 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1564 prev_connp = NULL; 1565 connp = ipcl_globalhash_fanout[i].connf_head; 1566 while (connp != NULL) { 1567 mutex_enter(&connp->conn_lock); 1568 if (connp->conn_state_flags & 1569 (CONN_CONDEMNED | CONN_INCIPIENT)) { 1570 mutex_exit(&connp->conn_lock); 1571 connp = connp->conn_g_next; 1572 continue; 1573 } 1574 CONN_INC_REF_LOCKED(connp); 1575 mutex_exit(&connp->conn_lock); 1576 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1577 (*func)(connp, arg); 1578 if (prev_connp != NULL) 1579 CONN_DEC_REF(prev_connp); 1580 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1581 prev_connp = connp; 1582 connp = connp->conn_g_next; 1583 } 1584 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1585 if (prev_connp != NULL) 1586 CONN_DEC_REF(prev_connp); 1587 } 1588 } 1589 1590 /* 1591 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 1592 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1593 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1594 * (peer tcp in at least ESTABLISHED state). 1595 */ 1596 conn_t * 1597 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) 1598 { 1599 uint32_t ports; 1600 uint16_t *pports = (uint16_t *)&ports; 1601 connf_t *connfp; 1602 conn_t *tconnp; 1603 boolean_t zone_chk; 1604 1605 /* 1606 * If either the source of destination address is loopback, then 1607 * both endpoints must be in the same Zone. Otherwise, both of 1608 * the addresses are system-wide unique (tcp is in ESTABLISHED 1609 * state) and the endpoints may reside in different Zones. 1610 */ 1611 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 1612 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 1613 1614 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1615 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1616 1617 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1618 1619 mutex_enter(&connfp->connf_lock); 1620 for (tconnp = connfp->connf_head; tconnp != NULL; 1621 tconnp = tconnp->conn_next) { 1622 1623 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1624 ipha->ipha_dst, ipha->ipha_src, ports) && 1625 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1626 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1627 1628 ASSERT(tconnp != connp); 1629 CONN_INC_REF(tconnp); 1630 mutex_exit(&connfp->connf_lock); 1631 return (tconnp); 1632 } 1633 } 1634 mutex_exit(&connfp->connf_lock); 1635 return (NULL); 1636 } 1637 1638 /* 1639 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 1640 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1641 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1642 * (peer tcp in at least ESTABLISHED state). 1643 */ 1644 conn_t * 1645 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) 1646 { 1647 uint32_t ports; 1648 uint16_t *pports = (uint16_t *)&ports; 1649 connf_t *connfp; 1650 conn_t *tconnp; 1651 boolean_t zone_chk; 1652 1653 /* 1654 * If either the source of destination address is loopback, then 1655 * both endpoints must be in the same Zone. Otherwise, both of 1656 * the addresses are system-wide unique (tcp is in ESTABLISHED 1657 * state) and the endpoints may reside in different Zones. We 1658 * don't do Zone check for link local address(es) because the 1659 * current Zone implementation treats each link local address as 1660 * being unique per system node, i.e. they belong to global Zone. 1661 */ 1662 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 1663 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 1664 1665 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1666 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1667 1668 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1669 1670 mutex_enter(&connfp->connf_lock); 1671 for (tconnp = connfp->connf_head; tconnp != NULL; 1672 tconnp = tconnp->conn_next) { 1673 1674 /* We skip tcp_bound_if check here as this is loopback tcp */ 1675 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1676 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1677 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1678 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1679 1680 ASSERT(tconnp != connp); 1681 CONN_INC_REF(tconnp); 1682 mutex_exit(&connfp->connf_lock); 1683 return (tconnp); 1684 } 1685 } 1686 mutex_exit(&connfp->connf_lock); 1687 return (NULL); 1688 } 1689 1690 /* 1691 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1692 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1693 * Only checks for connected entries i.e. no INADDR_ANY checks. 1694 */ 1695 conn_t * 1696 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) 1697 { 1698 uint32_t ports; 1699 uint16_t *pports; 1700 connf_t *connfp; 1701 conn_t *tconnp; 1702 1703 pports = (uint16_t *)&ports; 1704 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1705 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1706 1707 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1708 1709 mutex_enter(&connfp->connf_lock); 1710 for (tconnp = connfp->connf_head; tconnp != NULL; 1711 tconnp = tconnp->conn_next) { 1712 1713 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1714 ipha->ipha_dst, ipha->ipha_src, ports) && 1715 tconnp->conn_tcp->tcp_state >= min_state) { 1716 1717 CONN_INC_REF(tconnp); 1718 mutex_exit(&connfp->connf_lock); 1719 return (tconnp); 1720 } 1721 } 1722 mutex_exit(&connfp->connf_lock); 1723 return (NULL); 1724 } 1725 1726 /* 1727 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1728 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1729 * Only checks for connected entries i.e. no INADDR_ANY checks. 1730 * Match on ifindex in addition to addresses. 1731 */ 1732 conn_t * 1733 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 1734 uint_t ifindex) 1735 { 1736 tcp_t *tcp; 1737 uint32_t ports; 1738 uint16_t *pports; 1739 connf_t *connfp; 1740 conn_t *tconnp; 1741 1742 pports = (uint16_t *)&ports; 1743 pports[0] = tcpha->tha_fport; 1744 pports[1] = tcpha->tha_lport; 1745 1746 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1747 1748 mutex_enter(&connfp->connf_lock); 1749 for (tconnp = connfp->connf_head; tconnp != NULL; 1750 tconnp = tconnp->conn_next) { 1751 1752 tcp = tconnp->conn_tcp; 1753 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1754 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1755 tcp->tcp_state >= min_state && 1756 (tcp->tcp_bound_if == 0 || 1757 tcp->tcp_bound_if == ifindex)) { 1758 1759 CONN_INC_REF(tconnp); 1760 mutex_exit(&connfp->connf_lock); 1761 return (tconnp); 1762 } 1763 } 1764 mutex_exit(&connfp->connf_lock); 1765 return (NULL); 1766 } 1767 1768 /* 1769 * To find a TCP listening connection matching the incoming segment. 1770 */ 1771 conn_t * 1772 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) 1773 { 1774 connf_t *bind_connfp; 1775 conn_t *connp; 1776 tcp_t *tcp; 1777 1778 /* 1779 * Avoid false matches for packets sent to an IP destination of 1780 * all zeros. 1781 */ 1782 if (laddr == 0) 1783 return (NULL); 1784 1785 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1786 mutex_enter(&bind_connfp->connf_lock); 1787 for (connp = bind_connfp->connf_head; connp != NULL; 1788 connp = connp->conn_next) { 1789 tcp = connp->conn_tcp; 1790 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 1791 connp->conn_zoneid == zoneid && 1792 (tcp->tcp_listener == NULL)) { 1793 CONN_INC_REF(connp); 1794 mutex_exit(&bind_connfp->connf_lock); 1795 return (connp); 1796 } 1797 } 1798 mutex_exit(&bind_connfp->connf_lock); 1799 return (NULL); 1800 } 1801 1802 1803 conn_t * 1804 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 1805 zoneid_t zoneid) 1806 { 1807 connf_t *bind_connfp; 1808 conn_t *connp = NULL; 1809 tcp_t *tcp; 1810 1811 /* 1812 * Avoid false matches for packets sent to an IP destination of 1813 * all zeros. 1814 */ 1815 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 1816 return (NULL); 1817 1818 1819 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1820 mutex_enter(&bind_connfp->connf_lock); 1821 for (connp = bind_connfp->connf_head; connp != NULL; 1822 connp = connp->conn_next) { 1823 tcp = connp->conn_tcp; 1824 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 1825 connp->conn_zoneid == zoneid && 1826 (tcp->tcp_bound_if == 0 || 1827 tcp->tcp_bound_if == ifindex) && 1828 tcp->tcp_listener == NULL) { 1829 CONN_INC_REF(connp); 1830 mutex_exit(&bind_connfp->connf_lock); 1831 return (connp); 1832 } 1833 } 1834 mutex_exit(&bind_connfp->connf_lock); 1835 return (NULL); 1836 } 1837 1838 #ifdef CONN_DEBUG 1839 /* 1840 * Trace of the last NBUF refhold/refrele 1841 */ 1842 int 1843 conn_trace_ref(conn_t *connp) 1844 { 1845 int last; 1846 conn_trace_t *ctb; 1847 1848 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1849 last = connp->conn_trace_last; 1850 last++; 1851 if (last == CONN_TRACE_MAX) 1852 last = 0; 1853 1854 ctb = &connp->conn_trace_buf[last]; 1855 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1856 connp->conn_trace_last = last; 1857 return (1); 1858 } 1859 1860 int 1861 conn_untrace_ref(conn_t *connp) 1862 { 1863 int last; 1864 conn_trace_t *ctb; 1865 1866 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1867 last = connp->conn_trace_last; 1868 last++; 1869 if (last == CONN_TRACE_MAX) 1870 last = 0; 1871 1872 ctb = &connp->conn_trace_buf[last]; 1873 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1874 connp->conn_trace_last = last; 1875 return (1); 1876 } 1877 #endif 1878