1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char ipclassifier_version[] = "@(#)ipclassifier.c 1.6 04/03/31 SMI"; 30 31 /* 32 * IP PACKET CLASSIFIER 33 * 34 * The IP packet classifier provides mapping between IP packets and persistent 35 * connection state for connection-oriented protocols. It also provides 36 * interface for managing connection states. 37 * 38 * The connection state is kept in conn_t data structure and contains, among 39 * other things: 40 * 41 * o local/remote address and ports 42 * o Transport protocol 43 * o squeue for the connection (for TCP only) 44 * o reference counter 45 * o Connection state 46 * o hash table linkage 47 * o interface/ire information 48 * o credentials 49 * o ipsec policy 50 * o send and receive functions. 51 * o mutex lock. 52 * 53 * Connections use a reference counting scheme. They are freed when the 54 * reference counter drops to zero. A reference is incremented when connection 55 * is placed in a list or table, when incoming packet for the connection arrives 56 * and when connection is processed via squeue (squeue processing may be 57 * asynchronous and the reference protects the connection from being destroyed 58 * before its processing is finished). 59 * 60 * send and receive functions are currently used for TCP only. The send function 61 * determines the IP entry point for the packet once it leaves TCP to be sent to 62 * the destination address. The receive function is used by IP when the packet 63 * should be passed for TCP processing. When a new connection is created these 64 * are set to ip_output() and tcp_input() respectively. During the lifetime of 65 * the connection the send and receive functions may change depending on the 66 * changes in the connection state. For example, Once the connection is bound to 67 * an addresse, the receive function for this connection is set to 68 * tcp_conn_request(). This allows incoming SYNs to go directly into the 69 * listener SYN processing function without going to tcp_input() first. 70 * 71 * Classifier uses several hash tables: 72 * 73 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 74 * ipcl_bind_fanout: contains all connections in BOUND state 75 * ipcl_proto_fanout: IPv4 protocol fanout 76 * ipcl_proto_fanout_v6: IPv6 protocol fanout 77 * ipcl_udp_fanout: contains all UDP connections 78 * ipcl_globalhash_fanout: contains all connections 79 * 80 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 81 * which need to view all existing connections. 82 * 83 * All tables are protected by per-bucket locks. When both per-bucket lock and 84 * connection lock need to be held, the per-bucket lock should be acquired 85 * first, followed by the connection lock. 86 * 87 * All functions doing search in one of these tables increment a reference 88 * counter on the connection found (if any). This reference should be dropped 89 * when the caller has finished processing the connection. 90 * 91 * 92 * INTERFACES: 93 * =========== 94 * 95 * Connection Lookup: 96 * ------------------ 97 * 98 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid) 99 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid) 100 * 101 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 102 * it can't find any associated connection. If the connection is found, its 103 * reference counter is incremented. 104 * 105 * mp: mblock, containing packet header. The full header should fit 106 * into a single mblock. It should also contain at least full IP 107 * and TCP or UDP header. 108 * 109 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 110 * 111 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 112 * the packet. 113 * 114 * zoneid: The zone in which the returned connection must be. 115 * 116 * For TCP connections, the lookup order is as follows: 117 * 5-tuple {src, dst, protocol, local port, remote port} 118 * lookup in ipcl_conn_fanout table. 119 * 3-tuple {dst, remote port, protocol} lookup in 120 * ipcl_bind_fanout table. 121 * 122 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 123 * remote port} lookup is done on ipcl_udp_fanout. Note that, 124 * these interfaces do not handle cases where a packets belongs 125 * to multiple UDP clients, which is handled in IP itself. 126 * 127 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int); 128 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t); 129 * 130 * Lookup routine to find a exact match for {src, dst, local port, 131 * remote port) for TCP connections in ipcl_conn_fanout. The address and 132 * ports are read from the IP and TCP header respectively. 133 * 134 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol); 135 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex); 136 * 137 * Lookup routine to find a listener with the tuple {lport, laddr, 138 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 139 * parameter interface index is also compared. 140 * 141 * void ipcl_walk(func, arg) 142 * 143 * Apply 'func' to every connection available. The 'func' is called as 144 * (*func)(connp, arg). The walk is non-atomic so connections may be 145 * created and destroyed during the walk. The CONN_CONDEMNED and 146 * CONN_INCIPIENT flags ensure that connections which are newly created 147 * or being destroyed are not selected by the walker. 148 * 149 * Table Updates 150 * ------------- 151 * 152 * int ipcl_conn_insert(connp, protocol, src, dst, ports) 153 * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 154 * 155 * Insert 'connp' in the ipcl_conn_fanout. 156 * Arguements : 157 * connp conn_t to be inserted 158 * protocol connection protocol 159 * src source address 160 * dst destination address 161 * ports local and remote port 162 * ifindex interface index for IPv6 connections 163 * 164 * Return value : 165 * 0 if connp was inserted 166 * EADDRINUSE if the connection with the same tuple 167 * already exists. 168 * 169 * int ipcl_bind_insert(connp, protocol, src, lport); 170 * int ipcl_bind_insert_v6(connp, protocol, src, lport); 171 * 172 * Insert 'connp' in ipcl_bind_fanout. 173 * Arguements : 174 * connp conn_t to be inserted 175 * protocol connection protocol 176 * src source address connection wants 177 * to bind to 178 * lport local port connection wants to 179 * bind to 180 * 181 * 182 * void ipcl_hash_remove(connp); 183 * 184 * Removes the 'connp' from the connection fanout table. 185 * 186 * Connection Creation/Destruction 187 * ------------------------------- 188 * 189 * conn_t *ipcl_conn_create(type, sleep) 190 * 191 * Creates a new conn based on the type flag, inserts it into 192 * globalhash table. 193 * 194 * type: This flag determines the type of conn_t which needs to be 195 * created. 196 * IPCL_TCPCONN indicates a TCP connection 197 * IPCL_IPCONN indicates all non-TCP connections. 198 * 199 * void ipcl_conn_destroy(connp) 200 * 201 * Destroys the connection state, removes it from the global 202 * connection hash table and frees its memory. 203 */ 204 205 #include <sys/types.h> 206 #include <sys/stream.h> 207 #include <sys/dlpi.h> 208 #include <sys/stropts.h> 209 #include <sys/sysmacros.h> 210 #include <sys/strsubr.h> 211 #include <sys/strlog.h> 212 #include <sys/strsun.h> 213 #define _SUN_TPI_VERSION 2 214 #include <sys/ddi.h> 215 #include <sys/cmn_err.h> 216 #include <sys/debug.h> 217 218 #include <sys/systm.h> 219 #include <sys/param.h> 220 #include <sys/kmem.h> 221 #include <sys/isa_defs.h> 222 #include <inet/common.h> 223 #include <netinet/ip6.h> 224 #include <netinet/icmp6.h> 225 226 #include <inet/ip.h> 227 #include <inet/ip6.h> 228 #include <inet/tcp.h> 229 #include <inet/tcp_trace.h> 230 #include <inet/ip_multi.h> 231 #include <inet/ip_if.h> 232 #include <inet/ip_ire.h> 233 #include <inet/ip_rts.h> 234 #include <inet/optcom.h> 235 #include <inet/ip_ndp.h> 236 #include <inet/sctp_ip.h> 237 238 #include <sys/ethernet.h> 239 #include <net/if_types.h> 240 #include <sys/cpuvar.h> 241 242 #include <inet/mi.h> 243 #include <inet/ipclassifier.h> 244 #include <inet/ipsec_impl.h> 245 246 #ifdef DEBUG 247 #define IPCL_DEBUG 248 #else 249 #undef IPCL_DEBUG 250 #endif 251 252 #ifdef IPCL_DEBUG 253 int ipcl_debug_level = 0; 254 #define IPCL_DEBUG_LVL(level, args) \ 255 if (ipcl_debug_level & level) { printf args; } 256 #else 257 #define IPCL_DEBUG_LVL(level, args) {; } 258 #endif 259 connf_t *ipcl_conn_fanout; 260 connf_t *ipcl_bind_fanout; 261 connf_t ipcl_proto_fanout[IPPROTO_MAX + 1]; 262 connf_t ipcl_proto_fanout_v6[IPPROTO_MAX + 1]; 263 connf_t *ipcl_udp_fanout; 264 265 /* A separate hash list for raw socket. */ 266 connf_t *ipcl_raw_fanout; 267 268 connf_t rts_clients; 269 270 /* Old value for compatibility */ 271 uint_t tcp_conn_hash_size = 0; 272 273 /* New value. Zero means choose automatically. */ 274 uint_t ipcl_conn_hash_size = 0; 275 uint_t ipcl_conn_hash_memfactor = 8192; 276 uint_t ipcl_conn_hash_maxsize = 82500; 277 278 uint_t ipcl_conn_fanout_size = 0; 279 280 281 /* bind/udp fanout table size */ 282 uint_t ipcl_bind_fanout_size = 512; 283 uint_t ipcl_udp_fanout_size = 256; 284 285 /* Raw socket fanout size. Must be a power of 2. */ 286 uint_t ipcl_raw_fanout_size = 256; 287 288 /* 289 * Power of 2^N Primes useful for hashing for N of 0-28, 290 * these primes are the nearest prime <= 2^N - 2^(N-2). 291 */ 292 293 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 294 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 295 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 296 50331599, 100663291, 201326557, 0} 297 298 /* 299 * wrapper structure to ensure that conn+tcpb are aligned 300 * on cache lines. 301 */ 302 typedef struct itc_s { 303 union { 304 conn_t itcu_conn; 305 char itcu_filler[CACHE_ALIGN(conn_s)]; 306 } itc_u; 307 tcp_t itc_tcp; 308 } itc_t; 309 310 #define itc_conn itc_u.itcu_conn 311 312 struct kmem_cache *ipcl_tcpconn_cache; 313 struct kmem_cache *ipcl_tcp_cache; 314 struct kmem_cache *ipcl_conn_cache; 315 extern struct kmem_cache *sctp_conn_cache; 316 extern struct kmem_cache *tcp_sack_info_cache; 317 extern struct kmem_cache *tcp_iphc_cache; 318 319 extern void tcp_timermp_free(tcp_t *); 320 extern mblk_t *tcp_timermp_alloc(int); 321 322 static int ipcl_tcpconn_constructor(void *, void *, int); 323 static void ipcl_tcpconn_destructor(void *, void *); 324 325 static int conn_g_index; 326 connf_t *ipcl_globalhash_fanout; 327 328 #ifdef IPCL_DEBUG 329 #define INET_NTOA_BUFSIZE 18 330 331 static char * 332 inet_ntoa_r(uint32_t in, char *b) 333 { 334 unsigned char *p; 335 336 p = (unsigned char *)∈ 337 (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 338 return (b); 339 } 340 #endif 341 342 /* 343 * ipclassifier intialization routine, sets up hash tables and 344 * conn caches. 345 */ 346 void 347 ipcl_init(void) 348 { 349 int i; 350 int sizes[] = P2Ps(); 351 352 ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 353 sizeof (conn_t), CACHE_ALIGN_SIZE, 354 NULL, NULL, 355 NULL, NULL, NULL, 0); 356 357 ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 358 sizeof (itc_t), CACHE_ALIGN_SIZE, 359 ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 360 NULL, NULL, NULL, 0); 361 362 /* 363 * Calculate size of conn fanout table. 364 */ 365 if (ipcl_conn_hash_size != 0) { 366 ipcl_conn_fanout_size = ipcl_conn_hash_size; 367 } else if (tcp_conn_hash_size != 0) { 368 ipcl_conn_fanout_size = tcp_conn_hash_size; 369 } else { 370 extern pgcnt_t freemem; 371 372 ipcl_conn_fanout_size = 373 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 374 375 if (ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) 376 ipcl_conn_fanout_size = ipcl_conn_hash_maxsize; 377 } 378 379 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 380 if (sizes[i] >= ipcl_conn_fanout_size) { 381 break; 382 } 383 } 384 if ((ipcl_conn_fanout_size = sizes[i]) == 0) { 385 /* Out of range, use the 2^16 value */ 386 ipcl_conn_fanout_size = sizes[16]; 387 } 388 ipcl_conn_fanout = (connf_t *)kmem_zalloc(ipcl_conn_fanout_size * 389 sizeof (*ipcl_conn_fanout), KM_SLEEP); 390 391 for (i = 0; i < ipcl_conn_fanout_size; i++) { 392 mutex_init(&ipcl_conn_fanout[i].connf_lock, NULL, 393 MUTEX_DEFAULT, NULL); 394 } 395 396 ipcl_bind_fanout = (connf_t *)kmem_zalloc(ipcl_bind_fanout_size * 397 sizeof (*ipcl_bind_fanout), KM_SLEEP); 398 399 for (i = 0; i < ipcl_bind_fanout_size; i++) { 400 mutex_init(&ipcl_bind_fanout[i].connf_lock, NULL, 401 MUTEX_DEFAULT, NULL); 402 } 403 404 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) { 405 mutex_init(&ipcl_proto_fanout[i].connf_lock, NULL, 406 MUTEX_DEFAULT, NULL); 407 } 408 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) { 409 mutex_init(&ipcl_proto_fanout_v6[i].connf_lock, NULL, 410 MUTEX_DEFAULT, NULL); 411 } 412 413 mutex_init(&rts_clients.connf_lock, NULL, MUTEX_DEFAULT, NULL); 414 415 ipcl_udp_fanout = (connf_t *)kmem_zalloc(ipcl_udp_fanout_size * 416 sizeof (*ipcl_udp_fanout), KM_SLEEP); 417 418 for (i = 0; i < ipcl_udp_fanout_size; i++) { 419 mutex_init(&ipcl_udp_fanout[i].connf_lock, NULL, 420 MUTEX_DEFAULT, NULL); 421 } 422 423 ipcl_raw_fanout = (connf_t *)kmem_zalloc(ipcl_raw_fanout_size * 424 sizeof (*ipcl_raw_fanout), KM_SLEEP); 425 426 for (i = 0; i < ipcl_raw_fanout_size; i++) { 427 mutex_init(&ipcl_raw_fanout[i].connf_lock, NULL, 428 MUTEX_DEFAULT, NULL); 429 } 430 431 ipcl_globalhash_fanout = (connf_t *)kmem_zalloc(sizeof (connf_t) * 432 CONN_G_HASH_SIZE, KM_SLEEP); 433 434 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 435 mutex_init(&ipcl_globalhash_fanout[i].connf_lock, NULL, 436 MUTEX_DEFAULT, NULL); 437 } 438 } 439 440 void 441 ipcl_destroy(void) 442 { 443 int i; 444 kmem_cache_destroy(ipcl_conn_cache); 445 kmem_cache_destroy(ipcl_tcpconn_cache); 446 for (i = 0; i < ipcl_conn_fanout_size; i++) 447 mutex_destroy(&ipcl_conn_fanout[i].connf_lock); 448 kmem_free(ipcl_conn_fanout, ipcl_conn_fanout_size * 449 sizeof (*ipcl_conn_fanout)); 450 for (i = 0; i < ipcl_bind_fanout_size; i++) 451 mutex_destroy(&ipcl_bind_fanout[i].connf_lock); 452 kmem_free(ipcl_bind_fanout, ipcl_bind_fanout_size * 453 sizeof (*ipcl_bind_fanout)); 454 455 for (i = 0; i < A_CNT(ipcl_proto_fanout); i++) 456 mutex_destroy(&ipcl_proto_fanout[i].connf_lock); 457 for (i = 0; i < A_CNT(ipcl_proto_fanout_v6); i++) 458 mutex_destroy(&ipcl_proto_fanout_v6[i].connf_lock); 459 460 for (i = 0; i < ipcl_udp_fanout_size; i++) 461 mutex_destroy(&ipcl_udp_fanout[i].connf_lock); 462 kmem_free(ipcl_udp_fanout, ipcl_udp_fanout_size * 463 sizeof (*ipcl_udp_fanout)); 464 465 for (i = 0; i < ipcl_raw_fanout_size; i++) 466 mutex_destroy(&ipcl_raw_fanout[i].connf_lock); 467 kmem_free(ipcl_raw_fanout, ipcl_raw_fanout_size * 468 sizeof (*ipcl_raw_fanout)); 469 470 kmem_free(ipcl_globalhash_fanout, sizeof (connf_t) * CONN_G_HASH_SIZE); 471 mutex_destroy(&rts_clients.connf_lock); 472 } 473 474 /* 475 * conn creation routine. initialize the conn, sets the reference 476 * and inserts it in the global hash table. 477 */ 478 conn_t * 479 ipcl_conn_create(uint32_t type, int sleep) 480 { 481 itc_t *itc; 482 conn_t *connp; 483 484 switch (type) { 485 case IPCL_TCPCONN: 486 if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 487 sleep)) == NULL) 488 return (NULL); 489 connp = &itc->itc_conn; 490 connp->conn_ref = 1; 491 IPCL_DEBUG_LVL(1, 492 ("ipcl_conn_create: connp = %p tcp (%p)", 493 (void *)connp, (void *)connp->conn_tcp)); 494 ipcl_globalhash_insert(connp); 495 break; 496 case IPCL_SCTPCONN: 497 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 498 return (NULL); 499 connp->conn_flags = IPCL_SCTPCONN; 500 break; 501 case IPCL_IPCCONN: 502 connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 503 if (connp == NULL) 504 return (connp); 505 bzero(connp, sizeof (conn_t)); 506 mutex_init(&connp->conn_lock, NULL, 507 MUTEX_DEFAULT, NULL); 508 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 509 connp->conn_flags |= IPCL_IPCCONN; 510 connp->conn_ref = 1; 511 IPCL_DEBUG_LVL(1, 512 ("ipcl_conn_create: connp = %p\n", (void *)connp)); 513 ipcl_globalhash_insert(connp); 514 break; 515 } 516 517 return (connp); 518 } 519 520 void 521 ipcl_conn_destroy(conn_t *connp) 522 { 523 mblk_t *mp; 524 tcp_t *tcp = connp->conn_tcp; 525 526 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 527 ASSERT(connp->conn_ref == 0); 528 ASSERT(connp->conn_ire_cache == NULL); 529 530 ipcl_globalhash_remove(connp); 531 532 cv_destroy(&connp->conn_cv); 533 if (connp->conn_flags & IPCL_TCPCONN) { 534 mutex_destroy(&connp->conn_lock); 535 ASSERT(connp->conn_tcp != NULL); 536 tcp_free(tcp); 537 mp = tcp->tcp_timercache; 538 539 if (tcp->tcp_sack_info != NULL) { 540 bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 541 kmem_cache_free(tcp_sack_info_cache, 542 tcp->tcp_sack_info); 543 } 544 if (tcp->tcp_iphc != NULL) { 545 if (tcp->tcp_hdr_grown) { 546 kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 547 } else { 548 bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 549 kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 550 } 551 tcp->tcp_iphc_len = 0; 552 } 553 ASSERT(tcp->tcp_iphc_len == 0); 554 555 if (connp->conn_latch != NULL) 556 IPLATCH_REFRELE(connp->conn_latch); 557 if (connp->conn_policy != NULL) 558 IPPH_REFRELE(connp->conn_policy); 559 bzero(connp, sizeof (itc_t)); 560 561 tcp->tcp_timercache = mp; 562 connp->conn_tcp = tcp; 563 connp->conn_flags = IPCL_TCPCONN; 564 connp->conn_ulp = IPPROTO_TCP; 565 tcp->tcp_connp = connp; 566 kmem_cache_free(ipcl_tcpconn_cache, connp); 567 } else if (connp->conn_flags & IPCL_SCTPCONN) { 568 sctp_free(connp); 569 } else { 570 mutex_destroy(&connp->conn_lock); 571 kmem_cache_free(ipcl_conn_cache, connp); 572 } 573 } 574 575 /* 576 * Running in cluster mode - deregister listener information 577 */ 578 579 static void 580 ipcl_conn_unlisten(conn_t *connp) 581 { 582 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 583 ASSERT(connp->conn_lport != 0); 584 585 if (cl_inet_unlisten != NULL) { 586 sa_family_t addr_family; 587 uint8_t *laddrp; 588 589 if (connp->conn_pkt_isv6) { 590 addr_family = AF_INET6; 591 laddrp = (uint8_t *)&connp->conn_bound_source_v6; 592 } else { 593 addr_family = AF_INET; 594 laddrp = (uint8_t *)&connp->conn_bound_source; 595 } 596 (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 597 connp->conn_lport); 598 } 599 connp->conn_flags &= ~IPCL_CL_LISTENER; 600 } 601 602 /* 603 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 604 * which table the conn belonged to). So for debugging we can see which hash 605 * table this connection was in. 606 */ 607 #define IPCL_HASH_REMOVE(connp) { \ 608 connf_t *connfp = (connp)->conn_fanout; \ 609 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 610 if (connfp != NULL) { \ 611 IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 612 (void *)(connp))); \ 613 mutex_enter(&connfp->connf_lock); \ 614 if ((connp)->conn_next != NULL) \ 615 (connp)->conn_next->conn_prev = \ 616 (connp)->conn_prev; \ 617 if ((connp)->conn_prev != NULL) \ 618 (connp)->conn_prev->conn_next = \ 619 (connp)->conn_next; \ 620 else \ 621 connfp->connf_head = (connp)->conn_next; \ 622 (connp)->conn_fanout = NULL; \ 623 (connp)->conn_next = NULL; \ 624 (connp)->conn_prev = NULL; \ 625 (connp)->conn_flags |= IPCL_REMOVED; \ 626 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 627 ipcl_conn_unlisten((connp)); \ 628 CONN_DEC_REF((connp)); \ 629 mutex_exit(&connfp->connf_lock); \ 630 } \ 631 } 632 633 void 634 ipcl_hash_remove(conn_t *connp) 635 { 636 IPCL_HASH_REMOVE(connp); 637 } 638 639 /* 640 * The whole purpose of this function is allow removal of 641 * a conn_t from the connected hash for timewait reclaim. 642 * This is essentially a TW reclaim fastpath where timewait 643 * collector checks under fanout lock (so no one else can 644 * get access to the conn_t) that refcnt is 2 i.e. one for 645 * TCP and one for the classifier hash list. If ref count 646 * is indeed 2, we can just remove the conn under lock and 647 * avoid cleaning up the conn under squeue. This gives us 648 * improved performance. 649 */ 650 void 651 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 652 { 653 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 654 ASSERT(MUTEX_HELD(&connp->conn_lock)); 655 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 656 657 if ((connp)->conn_next != NULL) { 658 (connp)->conn_next->conn_prev = 659 (connp)->conn_prev; 660 } 661 if ((connp)->conn_prev != NULL) { 662 (connp)->conn_prev->conn_next = 663 (connp)->conn_next; 664 } else { 665 connfp->connf_head = (connp)->conn_next; 666 } 667 (connp)->conn_fanout = NULL; 668 (connp)->conn_next = NULL; 669 (connp)->conn_prev = NULL; 670 (connp)->conn_flags |= IPCL_REMOVED; 671 ASSERT((connp)->conn_ref == 2); 672 (connp)->conn_ref--; 673 } 674 675 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 676 ASSERT((connp)->conn_fanout == NULL); \ 677 ASSERT((connp)->conn_next == NULL); \ 678 ASSERT((connp)->conn_prev == NULL); \ 679 if ((connfp)->connf_head != NULL) { \ 680 (connfp)->connf_head->conn_prev = (connp); \ 681 (connp)->conn_next = (connfp)->connf_head; \ 682 } \ 683 (connp)->conn_fanout = (connfp); \ 684 (connfp)->connf_head = (connp); \ 685 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 686 IPCL_CONNECTED; \ 687 CONN_INC_REF(connp); \ 688 } 689 690 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 691 IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 692 "connp %p", (void *)(connfp), (void *)(connp))); \ 693 IPCL_HASH_REMOVE((connp)); \ 694 mutex_enter(&(connfp)->connf_lock); \ 695 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 696 mutex_exit(&(connfp)->connf_lock); \ 697 } 698 699 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 700 conn_t *pconnp = NULL, *nconnp; \ 701 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 702 "connp %p", (void *)connfp, (void *)(connp))); \ 703 IPCL_HASH_REMOVE((connp)); \ 704 mutex_enter(&(connfp)->connf_lock); \ 705 nconnp = (connfp)->connf_head; \ 706 while (nconnp != NULL && \ 707 !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 708 pconnp = nconnp; \ 709 nconnp = nconnp->conn_next; \ 710 } \ 711 if (pconnp != NULL) { \ 712 pconnp->conn_next = (connp); \ 713 (connp)->conn_prev = pconnp; \ 714 } else { \ 715 (connfp)->connf_head = (connp); \ 716 } \ 717 if (nconnp != NULL) { \ 718 (connp)->conn_next = nconnp; \ 719 nconnp->conn_prev = (connp); \ 720 } \ 721 (connp)->conn_fanout = (connfp); \ 722 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 723 IPCL_BOUND; \ 724 CONN_INC_REF(connp); \ 725 mutex_exit(&(connfp)->connf_lock); \ 726 } 727 728 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 729 conn_t **list, *prev, *next; \ 730 boolean_t isv4mapped = \ 731 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 732 IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 733 "connp %p", (void *)(connfp), (void *)(connp))); \ 734 IPCL_HASH_REMOVE((connp)); \ 735 mutex_enter(&(connfp)->connf_lock); \ 736 list = &(connfp)->connf_head; \ 737 prev = NULL; \ 738 while ((next = *list) != NULL) { \ 739 if (isv4mapped && \ 740 IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 741 connp->conn_zoneid == next->conn_zoneid) { \ 742 (connp)->conn_next = next; \ 743 if (prev != NULL) \ 744 prev = next->conn_prev; \ 745 next->conn_prev = (connp); \ 746 break; \ 747 } \ 748 list = &next->conn_next; \ 749 prev = next; \ 750 } \ 751 (connp)->conn_prev = prev; \ 752 *list = (connp); \ 753 (connp)->conn_fanout = (connfp); \ 754 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 755 IPCL_BOUND; \ 756 CONN_INC_REF((connp)); \ 757 mutex_exit(&(connfp)->connf_lock); \ 758 } 759 760 void 761 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 762 { 763 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 764 } 765 766 void 767 ipcl_proto_insert(conn_t *connp, uint8_t protocol) 768 { 769 connf_t *connfp; 770 771 ASSERT(connp != NULL); 772 773 connp->conn_ulp = protocol; 774 775 /* Insert it in the protocol hash */ 776 connfp = &ipcl_proto_fanout[protocol]; 777 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 778 } 779 780 void 781 ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 782 { 783 connf_t *connfp; 784 785 ASSERT(connp != NULL); 786 787 connp->conn_ulp = protocol; 788 789 /* Insert it in the Bind Hash */ 790 connfp = &ipcl_proto_fanout_v6[protocol]; 791 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 792 } 793 794 /* 795 * This function is used only for inserting SCTP raw socket now. 796 * This may change later. 797 * 798 * Note that only one raw socket can be bound to a port. The param 799 * lport is in network byte order. 800 */ 801 static int 802 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 803 { 804 connf_t *connfp; 805 conn_t *oconnp; 806 807 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 808 809 /* Check for existing raw socket already bound to the port. */ 810 mutex_enter(&connfp->connf_lock); 811 for (oconnp = connfp->connf_head; oconnp != NULL; 812 oconnp = oconnp->conn_next) { 813 if (oconnp->conn_lport == lport && 814 oconnp->conn_zoneid == connp->conn_zoneid && 815 oconnp->conn_af_isv6 == connp->conn_af_isv6 && 816 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 817 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 818 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 819 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 820 IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 821 &connp->conn_srcv6))) { 822 break; 823 } 824 } 825 mutex_exit(&connfp->connf_lock); 826 if (oconnp != NULL) 827 return (EADDRNOTAVAIL); 828 829 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 830 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 831 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 832 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 833 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 834 } else { 835 IPCL_HASH_INSERT_BOUND(connfp, connp); 836 } 837 } else { 838 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 839 } 840 return (0); 841 } 842 843 /* 844 * (v4, v6) bind hash insertion routines 845 */ 846 int 847 ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 848 { 849 connf_t *connfp; 850 #ifdef IPCL_DEBUG 851 char buf[INET_NTOA_BUFSIZE]; 852 #endif 853 int ret = 0; 854 855 ASSERT(connp); 856 857 IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 858 "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 859 860 connp->conn_ulp = protocol; 861 IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 862 connp->conn_lport = lport; 863 864 switch (protocol) { 865 case IPPROTO_UDP: 866 default: 867 if (protocol == IPPROTO_UDP) { 868 IPCL_DEBUG_LVL(64, 869 ("ipcl_bind_insert: connp %p - udp\n", 870 (void *)connp)); 871 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 872 } else { 873 IPCL_DEBUG_LVL(64, 874 ("ipcl_bind_insert: connp %p - protocol\n", 875 (void *)connp)); 876 connfp = &ipcl_proto_fanout[protocol]; 877 } 878 879 if (connp->conn_rem != INADDR_ANY) { 880 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 881 } else if (connp->conn_src != INADDR_ANY) { 882 IPCL_HASH_INSERT_BOUND(connfp, connp); 883 } else { 884 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 885 } 886 break; 887 888 case IPPROTO_TCP: 889 890 /* Insert it in the Bind Hash */ 891 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 892 if (connp->conn_src != INADDR_ANY) { 893 IPCL_HASH_INSERT_BOUND(connfp, connp); 894 } else { 895 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 896 } 897 if (cl_inet_listen != NULL) { 898 ASSERT(!connp->conn_pkt_isv6); 899 connp->conn_flags |= IPCL_CL_LISTENER; 900 (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 901 (uint8_t *)&connp->conn_bound_source, lport); 902 } 903 break; 904 905 case IPPROTO_SCTP: 906 ret = ipcl_sctp_hash_insert(connp, lport); 907 break; 908 } 909 910 return (ret); 911 } 912 913 int 914 ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 915 uint16_t lport) 916 { 917 connf_t *connfp; 918 int ret = 0; 919 920 ASSERT(connp); 921 922 connp->conn_ulp = protocol; 923 connp->conn_srcv6 = *src; 924 connp->conn_lport = lport; 925 926 switch (protocol) { 927 case IPPROTO_UDP: 928 default: 929 if (protocol == IPPROTO_UDP) { 930 IPCL_DEBUG_LVL(128, 931 ("ipcl_bind_insert_v6: connp %p - udp\n", 932 (void *)connp)); 933 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 934 } else { 935 IPCL_DEBUG_LVL(128, 936 ("ipcl_bind_insert_v6: connp %p - protocol\n", 937 (void *)connp)); 938 connfp = &ipcl_proto_fanout_v6[protocol]; 939 } 940 941 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 942 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 943 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 944 IPCL_HASH_INSERT_BOUND(connfp, connp); 945 } else { 946 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 947 } 948 break; 949 950 case IPPROTO_TCP: 951 /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 952 953 /* Insert it in the Bind Hash */ 954 connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 955 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 956 IPCL_HASH_INSERT_BOUND(connfp, connp); 957 } else { 958 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 959 } 960 if (cl_inet_listen != NULL) { 961 sa_family_t addr_family; 962 uint8_t *laddrp; 963 964 if (connp->conn_pkt_isv6) { 965 addr_family = AF_INET6; 966 laddrp = 967 (uint8_t *)&connp->conn_bound_source_v6; 968 } else { 969 addr_family = AF_INET; 970 laddrp = (uint8_t *)&connp->conn_bound_source; 971 } 972 connp->conn_flags |= IPCL_CL_LISTENER; 973 (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 974 lport); 975 } 976 break; 977 978 case IPPROTO_SCTP: 979 ret = ipcl_sctp_hash_insert(connp, lport); 980 break; 981 } 982 983 return (ret); 984 } 985 986 /* 987 * ipcl_conn_hash insertion routines. 988 */ 989 int 990 ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 991 ipaddr_t rem, uint32_t ports) 992 { 993 connf_t *connfp; 994 uint16_t *up; 995 conn_t *tconnp; 996 #ifdef IPCL_DEBUG 997 char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 998 #endif 999 in_port_t lport; 1000 int ret = 0; 1001 1002 IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 1003 "dst = %s, ports = %x, protocol = %x", (void *)connp, 1004 inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 1005 ports, protocol)); 1006 1007 switch (protocol) { 1008 case IPPROTO_TCP: 1009 if (!(connp->conn_flags & IPCL_EAGER)) { 1010 /* 1011 * for a eager connection, i.e connections which 1012 * have just been created, the initialization is 1013 * already done in ip at conn_creation time, so 1014 * we can skip the checks here. 1015 */ 1016 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1017 } 1018 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(connp->conn_rem, 1019 connp->conn_ports)]; 1020 mutex_enter(&connfp->connf_lock); 1021 for (tconnp = connfp->connf_head; tconnp != NULL; 1022 tconnp = tconnp->conn_next) { 1023 if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 1024 connp->conn_rem, connp->conn_src, 1025 connp->conn_ports)) { 1026 1027 /* Already have a conn. bail out */ 1028 mutex_exit(&connfp->connf_lock); 1029 return (EADDRINUSE); 1030 } 1031 } 1032 if (connp->conn_fanout != NULL) { 1033 /* 1034 * Probably a XTI/TLI application trying to do a 1035 * rebind. Let it happen. 1036 */ 1037 mutex_exit(&connfp->connf_lock); 1038 IPCL_HASH_REMOVE(connp); 1039 mutex_enter(&connfp->connf_lock); 1040 } 1041 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1042 mutex_exit(&connfp->connf_lock); 1043 break; 1044 1045 case IPPROTO_SCTP: 1046 /* 1047 * The raw socket may have already been bound, remove it 1048 * from the hash first. 1049 */ 1050 IPCL_HASH_REMOVE(connp); 1051 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1052 ret = ipcl_sctp_hash_insert(connp, lport); 1053 break; 1054 1055 case IPPROTO_UDP: 1056 default: 1057 up = (uint16_t *)&ports; 1058 IPCL_CONN_INIT(connp, protocol, src, rem, ports); 1059 if (protocol == IPPROTO_UDP) { 1060 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1061 } else { 1062 connfp = &ipcl_proto_fanout[protocol]; 1063 } 1064 1065 if (connp->conn_rem != INADDR_ANY) { 1066 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1067 } else if (connp->conn_src != INADDR_ANY) { 1068 IPCL_HASH_INSERT_BOUND(connfp, connp); 1069 } else { 1070 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1071 } 1072 break; 1073 } 1074 1075 return (ret); 1076 } 1077 1078 int 1079 ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 1080 const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 1081 { 1082 connf_t *connfp; 1083 uint16_t *up; 1084 conn_t *tconnp; 1085 in_port_t lport; 1086 int ret = 0; 1087 1088 switch (protocol) { 1089 case IPPROTO_TCP: 1090 /* Just need to insert a conn struct */ 1091 if (!(connp->conn_flags & IPCL_EAGER)) { 1092 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1093 } 1094 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(connp->conn_remv6, 1095 connp->conn_ports)]; 1096 mutex_enter(&connfp->connf_lock); 1097 for (tconnp = connfp->connf_head; tconnp != NULL; 1098 tconnp = tconnp->conn_next) { 1099 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 1100 connp->conn_remv6, connp->conn_srcv6, 1101 connp->conn_ports) && 1102 (tconnp->conn_tcp->tcp_bound_if == 0 || 1103 tconnp->conn_tcp->tcp_bound_if == ifindex)) { 1104 /* Already have a conn. bail out */ 1105 mutex_exit(&connfp->connf_lock); 1106 return (EADDRINUSE); 1107 } 1108 } 1109 if (connp->conn_fanout != NULL) { 1110 /* 1111 * Probably a XTI/TLI application trying to do a 1112 * rebind. Let it happen. 1113 */ 1114 mutex_exit(&connfp->connf_lock); 1115 IPCL_HASH_REMOVE(connp); 1116 mutex_enter(&connfp->connf_lock); 1117 } 1118 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1119 mutex_exit(&connfp->connf_lock); 1120 break; 1121 1122 case IPPROTO_SCTP: 1123 IPCL_HASH_REMOVE(connp); 1124 lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 1125 ret = ipcl_sctp_hash_insert(connp, lport); 1126 break; 1127 1128 case IPPROTO_UDP: 1129 default: 1130 up = (uint16_t *)&ports; 1131 IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 1132 if (protocol == IPPROTO_UDP) { 1133 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(up[1])]; 1134 } else { 1135 connfp = &ipcl_proto_fanout_v6[protocol]; 1136 } 1137 1138 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 1139 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1140 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 1141 IPCL_HASH_INSERT_BOUND(connfp, connp); 1142 } else { 1143 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1144 } 1145 break; 1146 } 1147 1148 return (ret); 1149 } 1150 1151 /* 1152 * v4 packet classifying function. looks up the fanout table to 1153 * find the conn, the packet belongs to. returns the conn with 1154 * the reference held, null otherwise. 1155 */ 1156 conn_t * 1157 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1158 { 1159 ipha_t *ipha; 1160 connf_t *connfp, *bind_connfp; 1161 uint16_t lport; 1162 uint16_t fport; 1163 uint32_t ports; 1164 conn_t *connp; 1165 uint16_t *up; 1166 1167 ipha = (ipha_t *)mp->b_rptr; 1168 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1169 1170 switch (protocol) { 1171 case IPPROTO_TCP: 1172 ports = *(uint32_t *)up; 1173 connfp = 1174 &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, ports)]; 1175 mutex_enter(&connfp->connf_lock); 1176 for (connp = connfp->connf_head; connp != NULL; 1177 connp = connp->conn_next) { 1178 if (IPCL_CONN_MATCH(connp, protocol, 1179 ipha->ipha_src, ipha->ipha_dst, ports)) 1180 break; 1181 } 1182 1183 if (connp != NULL) { 1184 CONN_INC_REF(connp); 1185 mutex_exit(&connfp->connf_lock); 1186 return (connp); 1187 } 1188 1189 mutex_exit(&connfp->connf_lock); 1190 1191 lport = up[1]; 1192 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1193 mutex_enter(&bind_connfp->connf_lock); 1194 for (connp = bind_connfp->connf_head; connp != NULL; 1195 connp = connp->conn_next) { 1196 if (IPCL_BIND_MATCH(connp, protocol, 1197 ipha->ipha_dst, lport) && 1198 connp->conn_zoneid == zoneid) 1199 break; 1200 } 1201 1202 if (connp != NULL) { 1203 /* Have a listner at least */ 1204 CONN_INC_REF(connp); 1205 mutex_exit(&bind_connfp->connf_lock); 1206 return (connp); 1207 } 1208 1209 mutex_exit(&bind_connfp->connf_lock); 1210 1211 IPCL_DEBUG_LVL(512, 1212 ("ipcl_classify: couldn't classify mp = %p\n", 1213 (void *)mp)); 1214 break; 1215 1216 case IPPROTO_UDP: 1217 lport = up[1]; 1218 fport = up[0]; 1219 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1220 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1221 mutex_enter(&connfp->connf_lock); 1222 for (connp = connfp->connf_head; connp != NULL; 1223 connp = connp->conn_next) { 1224 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1225 fport, ipha->ipha_src) && 1226 connp->conn_zoneid == zoneid) 1227 break; 1228 } 1229 1230 if (connp != NULL) { 1231 CONN_INC_REF(connp); 1232 mutex_exit(&connfp->connf_lock); 1233 return (connp); 1234 } 1235 1236 /* 1237 * We shouldn't come here for multicast/broadcast packets 1238 */ 1239 mutex_exit(&connfp->connf_lock); 1240 IPCL_DEBUG_LVL(512, 1241 ("ipcl_classify: cant find udp conn_t for ports : %x %x", 1242 lport, fport)); 1243 break; 1244 } 1245 1246 return (NULL); 1247 } 1248 1249 conn_t * 1250 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid) 1251 { 1252 ip6_t *ip6h; 1253 connf_t *connfp, *bind_connfp; 1254 uint16_t lport; 1255 uint16_t fport; 1256 tcph_t *tcph; 1257 uint32_t ports; 1258 conn_t *connp; 1259 uint16_t *up; 1260 1261 1262 ip6h = (ip6_t *)mp->b_rptr; 1263 1264 switch (protocol) { 1265 case IPPROTO_TCP: 1266 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 1267 up = (uint16_t *)tcph->th_lport; 1268 ports = *(uint32_t *)up; 1269 1270 connfp = 1271 &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, ports)]; 1272 mutex_enter(&connfp->connf_lock); 1273 for (connp = connfp->connf_head; connp != NULL; 1274 connp = connp->conn_next) { 1275 if (IPCL_CONN_MATCH_V6(connp, protocol, 1276 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1277 break; 1278 } 1279 1280 if (connp != NULL) { 1281 CONN_INC_REF(connp); 1282 mutex_exit(&connfp->connf_lock); 1283 return (connp); 1284 } 1285 1286 mutex_exit(&connfp->connf_lock); 1287 1288 lport = up[1]; 1289 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1290 mutex_enter(&bind_connfp->connf_lock); 1291 for (connp = bind_connfp->connf_head; connp != NULL; 1292 connp = connp->conn_next) { 1293 if (IPCL_BIND_MATCH_V6(connp, protocol, 1294 ip6h->ip6_dst, lport) && 1295 connp->conn_zoneid == zoneid) 1296 break; 1297 } 1298 1299 if (connp != NULL) { 1300 /* Have a listner at least */ 1301 CONN_INC_REF(connp); 1302 mutex_exit(&bind_connfp->connf_lock); 1303 IPCL_DEBUG_LVL(512, 1304 ("ipcl_classify_v6: found listner " 1305 "connp = %p\n", (void *)connp)); 1306 1307 return (connp); 1308 } 1309 1310 mutex_exit(&bind_connfp->connf_lock); 1311 1312 IPCL_DEBUG_LVL(512, 1313 ("ipcl_classify_v6: couldn't classify mp = %p\n", 1314 (void *)mp)); 1315 break; 1316 1317 case IPPROTO_UDP: 1318 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1319 lport = up[1]; 1320 fport = up[0]; 1321 IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 1322 fport)); 1323 connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(lport)]; 1324 mutex_enter(&connfp->connf_lock); 1325 for (connp = connfp->connf_head; connp != NULL; 1326 connp = connp->conn_next) { 1327 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1328 fport, ip6h->ip6_src) && 1329 connp->conn_zoneid == zoneid) 1330 break; 1331 } 1332 1333 if (connp != NULL) { 1334 CONN_INC_REF(connp); 1335 mutex_exit(&connfp->connf_lock); 1336 return (connp); 1337 } 1338 1339 /* 1340 * We shouldn't come here for multicast/broadcast packets 1341 */ 1342 mutex_exit(&connfp->connf_lock); 1343 IPCL_DEBUG_LVL(512, 1344 ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 1345 lport, fport)); 1346 break; 1347 } 1348 1349 1350 return (NULL); 1351 } 1352 1353 /* 1354 * wrapper around ipcl_classify_(v4,v6) routines. 1355 */ 1356 conn_t * 1357 ipcl_classify(mblk_t *mp, zoneid_t zoneid) 1358 { 1359 uint16_t hdr_len; 1360 ipha_t *ipha; 1361 uint8_t *nexthdrp; 1362 1363 if (MBLKL(mp) < sizeof (ipha_t)) 1364 return (NULL); 1365 1366 switch (IPH_HDR_VERSION(mp->b_rptr)) { 1367 case IPV4_VERSION: 1368 ipha = (ipha_t *)mp->b_rptr; 1369 hdr_len = IPH_HDR_LENGTH(ipha); 1370 return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1371 zoneid)); 1372 case IPV6_VERSION: 1373 if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 1374 &hdr_len, &nexthdrp)) 1375 return (NULL); 1376 1377 return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid)); 1378 } 1379 1380 return (NULL); 1381 } 1382 1383 conn_t * 1384 ipcl_classify_raw(uint8_t protocol, zoneid_t zoneid, uint32_t ports, 1385 ipha_t *hdr) 1386 { 1387 struct connf_s *connfp; 1388 conn_t *connp; 1389 in_port_t lport; 1390 int af; 1391 1392 lport = ((uint16_t *)&ports)[1]; 1393 af = IPH_HDR_VERSION(hdr); 1394 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport))]; 1395 1396 mutex_enter(&connfp->connf_lock); 1397 for (connp = connfp->connf_head; connp != NULL; 1398 connp = connp->conn_next) { 1399 /* We don't allow v4 fallback for v6 raw socket. */ 1400 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1401 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1402 continue; 1403 } 1404 if (connp->conn_fully_bound) { 1405 if (af == IPV4_VERSION) { 1406 if (IPCL_CONN_MATCH(connp, protocol, 1407 hdr->ipha_src, hdr->ipha_dst, ports)) { 1408 break; 1409 } 1410 } else { 1411 if (IPCL_CONN_MATCH_V6(connp, protocol, 1412 ((ip6_t *)hdr)->ip6_src, 1413 ((ip6_t *)hdr)->ip6_dst, ports)) { 1414 break; 1415 } 1416 } 1417 } else { 1418 if (af == IPV4_VERSION) { 1419 if (IPCL_BIND_MATCH(connp, protocol, 1420 hdr->ipha_dst, lport)) { 1421 break; 1422 } 1423 } else { 1424 if (IPCL_BIND_MATCH_V6(connp, protocol, 1425 ((ip6_t *)hdr)->ip6_dst, lport)) { 1426 break; 1427 } 1428 } 1429 } 1430 } 1431 1432 if (connp != NULL) 1433 goto found; 1434 mutex_exit(&connfp->connf_lock); 1435 1436 /* Try to look for a wildcard match. */ 1437 connfp = &ipcl_raw_fanout[IPCL_RAW_HASH(0)]; 1438 mutex_enter(&connfp->connf_lock); 1439 for (connp = connfp->connf_head; connp != NULL; 1440 connp = connp->conn_next) { 1441 /* We don't allow v4 fallback for v6 raw socket. */ 1442 if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 1443 IPV6_VERSION)) || (connp->conn_zoneid != zoneid)) { 1444 continue; 1445 } 1446 if (af == IPV4_VERSION) { 1447 if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 1448 break; 1449 } else { 1450 if (IPCL_RAW_MATCH_V6(connp, protocol, 1451 ((ip6_t *)hdr)->ip6_dst)) { 1452 break; 1453 } 1454 } 1455 } 1456 1457 if (connp != NULL) 1458 goto found; 1459 1460 mutex_exit(&connfp->connf_lock); 1461 return (NULL); 1462 1463 found: 1464 ASSERT(connp != NULL); 1465 CONN_INC_REF(connp); 1466 mutex_exit(&connfp->connf_lock); 1467 return (connp); 1468 } 1469 1470 /* ARGSUSED */ 1471 static int 1472 ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 1473 { 1474 itc_t *itc = (itc_t *)buf; 1475 conn_t *connp = &itc->itc_conn; 1476 tcp_t *tcp = &itc->itc_tcp; 1477 bzero(itc, sizeof (itc_t)); 1478 tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 1479 connp->conn_tcp = tcp; 1480 connp->conn_flags = IPCL_TCPCONN; 1481 connp->conn_ulp = IPPROTO_TCP; 1482 tcp->tcp_connp = connp; 1483 return (0); 1484 } 1485 1486 /* ARGSUSED */ 1487 static void 1488 ipcl_tcpconn_destructor(void *buf, void *cdrarg) 1489 { 1490 tcp_timermp_free(((conn_t *)buf)->conn_tcp); 1491 } 1492 1493 /* 1494 * All conns are inserted in a global multi-list for the benefit of 1495 * walkers. The walk is guaranteed to walk all open conns at the time 1496 * of the start of the walk exactly once. This property is needed to 1497 * achieve some cleanups during unplumb of interfaces. This is achieved 1498 * as follows. 1499 * 1500 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 1501 * call the insert and delete functions below at creation and deletion 1502 * time respectively. The conn never moves or changes its position in this 1503 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 1504 * won't increase due to walkers, once the conn deletion has started. Note 1505 * that we can't remove the conn from the global list and then wait for 1506 * the refcnt to drop to zero, since walkers would then see a truncated 1507 * list. CONN_INCIPIENT ensures that walkers don't start looking at 1508 * conns until ip_open is ready to make them globally visible. 1509 * The global round robin multi-list locks are held only to get the 1510 * next member/insertion/deletion and contention should be negligible 1511 * if the multi-list is much greater than the number of cpus. 1512 */ 1513 void 1514 ipcl_globalhash_insert(conn_t *connp) 1515 { 1516 int index; 1517 1518 /* 1519 * No need for atomic here. Approximate even distribution 1520 * in the global lists is sufficient. 1521 */ 1522 conn_g_index++; 1523 index = conn_g_index & (CONN_G_HASH_SIZE - 1); 1524 1525 connp->conn_g_prev = NULL; 1526 /* 1527 * Mark as INCIPIENT, so that walkers will ignore this 1528 * for now, till ip_open is ready to make it visible globally. 1529 */ 1530 connp->conn_state_flags |= CONN_INCIPIENT; 1531 1532 /* Insert at the head of the list */ 1533 mutex_enter(&ipcl_globalhash_fanout[index].connf_lock); 1534 connp->conn_g_next = ipcl_globalhash_fanout[index].connf_head; 1535 if (connp->conn_g_next != NULL) 1536 connp->conn_g_next->conn_g_prev = connp; 1537 ipcl_globalhash_fanout[index].connf_head = connp; 1538 1539 /* The fanout bucket this conn points to */ 1540 connp->conn_g_fanout = &ipcl_globalhash_fanout[index]; 1541 1542 mutex_exit(&ipcl_globalhash_fanout[index].connf_lock); 1543 } 1544 1545 void 1546 ipcl_globalhash_remove(conn_t *connp) 1547 { 1548 /* 1549 * We were never inserted in the global multi list. 1550 * IPCL_NONE variety is never inserted in the global multilist 1551 * since it is presumed to not need any cleanup and is transient. 1552 */ 1553 if (connp->conn_g_fanout == NULL) 1554 return; 1555 1556 mutex_enter(&connp->conn_g_fanout->connf_lock); 1557 if (connp->conn_g_prev != NULL) 1558 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 1559 else 1560 connp->conn_g_fanout->connf_head = connp->conn_g_next; 1561 if (connp->conn_g_next != NULL) 1562 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 1563 mutex_exit(&connp->conn_g_fanout->connf_lock); 1564 1565 /* Better to stumble on a null pointer than to corrupt memory */ 1566 connp->conn_g_next = NULL; 1567 connp->conn_g_prev = NULL; 1568 } 1569 1570 /* 1571 * Walk the list of all conn_t's in the system, calling the function provided 1572 * with the specified argument for each. 1573 * Applies to both IPv4 and IPv6. 1574 * 1575 * IPCs may hold pointers to ipif/ill. To guard against stale pointers 1576 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 1577 * unplumbed or removed. New conn_t's that are created while we are walking 1578 * may be missed by this walk, because they are not necessarily inserted 1579 * at the tail of the list. They are new conn_t's and thus don't have any 1580 * stale pointers. The CONN_CLOSING flag ensures that no new reference 1581 * is created to the struct that is going away. 1582 */ 1583 void 1584 ipcl_walk(pfv_t func, void *arg) 1585 { 1586 int i; 1587 conn_t *connp; 1588 conn_t *prev_connp; 1589 1590 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 1591 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1592 prev_connp = NULL; 1593 connp = ipcl_globalhash_fanout[i].connf_head; 1594 while (connp != NULL) { 1595 mutex_enter(&connp->conn_lock); 1596 if (connp->conn_state_flags & 1597 (CONN_CONDEMNED | CONN_INCIPIENT)) { 1598 mutex_exit(&connp->conn_lock); 1599 connp = connp->conn_g_next; 1600 continue; 1601 } 1602 CONN_INC_REF_LOCKED(connp); 1603 mutex_exit(&connp->conn_lock); 1604 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1605 (*func)(connp, arg); 1606 if (prev_connp != NULL) 1607 CONN_DEC_REF(prev_connp); 1608 mutex_enter(&ipcl_globalhash_fanout[i].connf_lock); 1609 prev_connp = connp; 1610 connp = connp->conn_g_next; 1611 } 1612 mutex_exit(&ipcl_globalhash_fanout[i].connf_lock); 1613 if (prev_connp != NULL) 1614 CONN_DEC_REF(prev_connp); 1615 } 1616 } 1617 1618 /* 1619 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 1620 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1621 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1622 * (peer tcp in at least ESTABLISHED state). 1623 */ 1624 conn_t * 1625 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph) 1626 { 1627 uint32_t ports; 1628 uint16_t *pports = (uint16_t *)&ports; 1629 connf_t *connfp; 1630 conn_t *tconnp; 1631 boolean_t zone_chk; 1632 1633 /* 1634 * If either the source of destination address is loopback, then 1635 * both endpoints must be in the same Zone. Otherwise, both of 1636 * the addresses are system-wide unique (tcp is in ESTABLISHED 1637 * state) and the endpoints may reside in different Zones. 1638 */ 1639 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 1640 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 1641 1642 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1643 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1644 1645 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1646 1647 mutex_enter(&connfp->connf_lock); 1648 for (tconnp = connfp->connf_head; tconnp != NULL; 1649 tconnp = tconnp->conn_next) { 1650 1651 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1652 ipha->ipha_dst, ipha->ipha_src, ports) && 1653 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1654 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1655 1656 ASSERT(tconnp != connp); 1657 CONN_INC_REF(tconnp); 1658 mutex_exit(&connfp->connf_lock); 1659 return (tconnp); 1660 } 1661 } 1662 mutex_exit(&connfp->connf_lock); 1663 return (NULL); 1664 } 1665 1666 /* 1667 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 1668 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 1669 * held; caller must call CONN_DEC_REF. Only checks for connected entries 1670 * (peer tcp in at least ESTABLISHED state). 1671 */ 1672 conn_t * 1673 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph) 1674 { 1675 uint32_t ports; 1676 uint16_t *pports = (uint16_t *)&ports; 1677 connf_t *connfp; 1678 conn_t *tconnp; 1679 boolean_t zone_chk; 1680 1681 /* 1682 * If either the source of destination address is loopback, then 1683 * both endpoints must be in the same Zone. Otherwise, both of 1684 * the addresses are system-wide unique (tcp is in ESTABLISHED 1685 * state) and the endpoints may reside in different Zones. We 1686 * don't do Zone check for link local address(es) because the 1687 * current Zone implementation treats each link local address as 1688 * being unique per system node, i.e. they belong to global Zone. 1689 */ 1690 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 1691 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 1692 1693 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1694 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1695 1696 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1697 1698 mutex_enter(&connfp->connf_lock); 1699 for (tconnp = connfp->connf_head; tconnp != NULL; 1700 tconnp = tconnp->conn_next) { 1701 1702 /* We skip tcp_bound_if check here as this is loopback tcp */ 1703 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1704 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1705 tconnp->conn_tcp->tcp_state >= TCPS_ESTABLISHED && 1706 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 1707 1708 ASSERT(tconnp != connp); 1709 CONN_INC_REF(tconnp); 1710 mutex_exit(&connfp->connf_lock); 1711 return (tconnp); 1712 } 1713 } 1714 mutex_exit(&connfp->connf_lock); 1715 return (NULL); 1716 } 1717 1718 /* 1719 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1720 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1721 * Only checks for connected entries i.e. no INADDR_ANY checks. 1722 */ 1723 conn_t * 1724 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state) 1725 { 1726 uint32_t ports; 1727 uint16_t *pports; 1728 connf_t *connfp; 1729 conn_t *tconnp; 1730 1731 pports = (uint16_t *)&ports; 1732 bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 1733 bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 1734 1735 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, ports)]; 1736 1737 mutex_enter(&connfp->connf_lock); 1738 for (tconnp = connfp->connf_head; tconnp != NULL; 1739 tconnp = tconnp->conn_next) { 1740 1741 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 1742 ipha->ipha_dst, ipha->ipha_src, ports) && 1743 tconnp->conn_tcp->tcp_state >= min_state) { 1744 1745 CONN_INC_REF(tconnp); 1746 mutex_exit(&connfp->connf_lock); 1747 return (tconnp); 1748 } 1749 } 1750 mutex_exit(&connfp->connf_lock); 1751 return (NULL); 1752 } 1753 1754 /* 1755 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 1756 * Returns with conn reference held. Caller must call CONN_DEC_REF. 1757 * Only checks for connected entries i.e. no INADDR_ANY checks. 1758 * Match on ifindex in addition to addresses. 1759 */ 1760 conn_t * 1761 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 1762 uint_t ifindex) 1763 { 1764 tcp_t *tcp; 1765 uint32_t ports; 1766 uint16_t *pports; 1767 connf_t *connfp; 1768 conn_t *tconnp; 1769 1770 pports = (uint16_t *)&ports; 1771 pports[0] = tcpha->tha_fport; 1772 pports[1] = tcpha->tha_lport; 1773 1774 connfp = &ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, ports)]; 1775 1776 mutex_enter(&connfp->connf_lock); 1777 for (tconnp = connfp->connf_head; tconnp != NULL; 1778 tconnp = tconnp->conn_next) { 1779 1780 tcp = tconnp->conn_tcp; 1781 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 1782 ip6h->ip6_dst, ip6h->ip6_src, ports) && 1783 tcp->tcp_state >= min_state && 1784 (tcp->tcp_bound_if == 0 || 1785 tcp->tcp_bound_if == ifindex)) { 1786 1787 CONN_INC_REF(tconnp); 1788 mutex_exit(&connfp->connf_lock); 1789 return (tconnp); 1790 } 1791 } 1792 mutex_exit(&connfp->connf_lock); 1793 return (NULL); 1794 } 1795 1796 /* 1797 * To find a TCP listening connection matching the incoming segment. 1798 */ 1799 conn_t * 1800 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid) 1801 { 1802 connf_t *bind_connfp; 1803 conn_t *connp; 1804 tcp_t *tcp; 1805 1806 /* 1807 * Avoid false matches for packets sent to an IP destination of 1808 * all zeros. 1809 */ 1810 if (laddr == 0) 1811 return (NULL); 1812 1813 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1814 mutex_enter(&bind_connfp->connf_lock); 1815 for (connp = bind_connfp->connf_head; connp != NULL; 1816 connp = connp->conn_next) { 1817 tcp = connp->conn_tcp; 1818 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 1819 connp->conn_zoneid == zoneid && 1820 (tcp->tcp_listener == NULL)) { 1821 CONN_INC_REF(connp); 1822 mutex_exit(&bind_connfp->connf_lock); 1823 return (connp); 1824 } 1825 } 1826 mutex_exit(&bind_connfp->connf_lock); 1827 return (NULL); 1828 } 1829 1830 1831 conn_t * 1832 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 1833 zoneid_t zoneid) 1834 { 1835 connf_t *bind_connfp; 1836 conn_t *connp = NULL; 1837 tcp_t *tcp; 1838 1839 /* 1840 * Avoid false matches for packets sent to an IP destination of 1841 * all zeros. 1842 */ 1843 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 1844 return (NULL); 1845 1846 1847 bind_connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(lport)]; 1848 mutex_enter(&bind_connfp->connf_lock); 1849 for (connp = bind_connfp->connf_head; connp != NULL; 1850 connp = connp->conn_next) { 1851 tcp = connp->conn_tcp; 1852 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 1853 connp->conn_zoneid == zoneid && 1854 (tcp->tcp_bound_if == 0 || 1855 tcp->tcp_bound_if == ifindex) && 1856 tcp->tcp_listener == NULL) { 1857 CONN_INC_REF(connp); 1858 mutex_exit(&bind_connfp->connf_lock); 1859 return (connp); 1860 } 1861 } 1862 mutex_exit(&bind_connfp->connf_lock); 1863 return (NULL); 1864 } 1865 1866 #ifdef CONN_DEBUG 1867 /* 1868 * Trace of the last NBUF refhold/refrele 1869 */ 1870 int 1871 conn_trace_ref(conn_t *connp) 1872 { 1873 int last; 1874 conn_trace_t *ctb; 1875 1876 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1877 last = connp->conn_trace_last; 1878 last++; 1879 if (last == CONN_TRACE_MAX) 1880 last = 0; 1881 1882 ctb = &connp->conn_trace_buf[last]; 1883 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1884 connp->conn_trace_last = last; 1885 return (1); 1886 } 1887 1888 int 1889 conn_untrace_ref(conn_t *connp) 1890 { 1891 int last; 1892 conn_trace_t *ctb; 1893 1894 ASSERT(MUTEX_HELD(&connp->conn_lock)); 1895 last = connp->conn_trace_last; 1896 last++; 1897 if (last == CONN_TRACE_MAX) 1898 last = 0; 1899 1900 ctb = &connp->conn_trace_buf[last]; 1901 ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 1902 connp->conn_trace_last = last; 1903 return (1); 1904 } 1905 #endif 1906