1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * Copyright 2019 Joyent, Inc. 29 * Copyright 2024 Oxide Computer Company 30 */ 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/strsun.h> 35 #include <sys/zone.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/atomic.h> 41 42 #include <sys/systm.h> 43 #include <sys/param.h> 44 #include <sys/kmem.h> 45 #include <sys/sdt.h> 46 #include <sys/socket.h> 47 #include <sys/mac.h> 48 #include <net/if.h> 49 #include <net/if_arp.h> 50 #include <net/route.h> 51 #include <sys/sockio.h> 52 #include <netinet/in.h> 53 #include <net/if_dl.h> 54 55 #include <inet/common.h> 56 #include <inet/mi.h> 57 #include <inet/mib2.h> 58 #include <inet/nd.h> 59 #include <inet/arp.h> 60 #include <inet/snmpcom.h> 61 #include <inet/kstatcom.h> 62 63 #include <netinet/igmp_var.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip6_asp.h> 72 #include <inet/tcp.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_if.h> 75 #include <inet/ip_ire.h> 76 #include <inet/ip_ftable.h> 77 #include <inet/ip_rts.h> 78 #include <inet/optcom.h> 79 #include <inet/ip_ndp.h> 80 #include <inet/ip_listutils.h> 81 #include <netinet/igmp.h> 82 #include <netinet/ip_mroute.h> 83 #include <inet/ipp_common.h> 84 85 #include <net/pfkeyv2.h> 86 #include <inet/sadb.h> 87 #include <inet/ipsec_impl.h> 88 #include <inet/ipdrop.h> 89 #include <inet/ip_netinfo.h> 90 #include <sys/squeue_impl.h> 91 #include <sys/squeue.h> 92 93 #include <inet/ipclassifier.h> 94 #include <inet/sctp_ip.h> 95 #include <inet/sctp/sctp_impl.h> 96 #include <inet/udp_impl.h> 97 #include <sys/sunddi.h> 98 99 #include <sys/tsol/label.h> 100 #include <sys/tsol/tnet.h> 101 102 /* 103 * Release a reference on ip_xmit_attr. 104 * The reference is acquired by conn_get_ixa() 105 * 106 * This macro has a lowercase function-call version for callers outside 107 * this file. 108 */ 109 #define IXA_REFRELE(ixa) \ 110 { \ 111 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \ 112 ixa_inactive(ixa); \ 113 } 114 115 #define IXA_REFHOLD(ixa) \ 116 { \ 117 ASSERT3U((ixa)->ixa_refcnt, !=, 0); \ 118 atomic_inc_32(&(ixa)->ixa_refcnt); \ 119 } 120 121 /* 122 * When we need to handle a transmit side asynchronous operation, then we need 123 * to save sufficient information so that we can call the fragment and postfrag 124 * functions. That information is captured in an mblk containing this structure. 125 * 126 * Since this is currently only used for IPsec, we include information for 127 * the kernel crypto framework. 128 */ 129 typedef struct ixamblk_s { 130 boolean_t ixm_inbound; /* B_FALSE */ 131 iaflags_t ixm_flags; /* ixa_flags */ 132 netstackid_t ixm_stackid; /* Verify it didn't go away */ 133 uint_t ixm_ifindex; /* Used to find the nce */ 134 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ 135 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) 136 uint32_t ixm_fragsize; 137 uint_t ixm_pktlen; 138 uint16_t ixm_ip_hdr_length; /* Points to ULP header */ 139 uint8_t ixm_protocol; /* Protocol number for ULP cksum */ 140 pfirepostfrag_t ixm_postfragfn; 141 142 zoneid_t ixm_zoneid; /* Needed for ipobs */ 143 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ 144 145 uint_t ixm_scopeid; /* For IPv6 link-locals */ 146 147 uint32_t ixm_ident; /* For IPv6 fragment header */ 148 uint32_t ixm_xmit_hint; 149 150 uint64_t ixm_conn_id; /* Used by DTrace */ 151 cred_t *ixm_cred; /* For getpeerucred - refhold if set */ 152 pid_t ixm_cpid; /* For getpeerucred */ 153 154 ts_label_t *ixm_tsl; /* Refhold if set. */ 155 156 /* 157 * When the pointers below are set they have a refhold on the struct. 158 */ 159 ipsec_latch_t *ixm_ipsec_latch; 160 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ 161 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ 162 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ 163 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ 164 165 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ 166 167 /* Need these while waiting for SA */ 168 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ 169 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ 170 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ 171 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ 172 173 sa_family_t ixm_ipsec_inaf; /* Inner address family */ 174 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ 175 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ 176 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ 177 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ 178 179 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ 180 } ixamblk_t; 181 182 183 /* 184 * When we need to handle a receive side asynchronous operation, then we need 185 * to save sufficient information so that we can call ip_fanout. 186 * That information is captured in an mblk containing this structure. 187 * 188 * Since this is currently only used for IPsec, we include information for 189 * the kernel crypto framework. 190 */ 191 typedef struct iramblk_s { 192 boolean_t irm_inbound; /* B_TRUE */ 193 iaflags_t irm_flags; /* ira_flags */ 194 netstackid_t irm_stackid; /* Verify it didn't go away */ 195 uint_t irm_ifindex; /* To find ira_ill */ 196 197 uint_t irm_rifindex; /* ira_rifindex */ 198 uint_t irm_ruifindex; /* ira_ruifindex */ 199 uint_t irm_pktlen; 200 uint16_t irm_ip_hdr_length; /* Points to ULP header */ 201 uint8_t irm_protocol; /* Protocol number for ULP cksum */ 202 uint8_t irm_ttl; /* IP TTL, IPv6 hop limit */ 203 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ 204 205 squeue_t *irm_sqp; 206 ill_rx_ring_t *irm_ring; 207 208 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ 209 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ 210 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ 211 212 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ 213 214 cred_t *irm_cred; /* For getpeerucred - refhold if set */ 215 pid_t irm_cpid; /* For getpeerucred */ 216 217 ts_label_t *irm_tsl; /* Refhold if set. */ 218 219 /* 220 * When set these correspond to a refhold on the object. 221 */ 222 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ 223 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ 224 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ 225 } iramblk_t; 226 227 228 /* 229 * Take the information in ip_xmit_attr_t and stick it in an mblk 230 * that can later be passed to ip_xmit_attr_from_mblk to recreate the 231 * ip_xmit_attr_t. 232 * 233 * Returns NULL on memory allocation failure. 234 */ 235 mblk_t * 236 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) 237 { 238 mblk_t *ixamp; 239 ixamblk_t *ixm; 240 nce_t *nce = ixa->ixa_nce; 241 242 ASSERT(nce != NULL); 243 ixamp = allocb(sizeof (*ixm), BPRI_MED); 244 if (ixamp == NULL) 245 return (NULL); 246 247 ixamp->b_datap->db_type = M_BREAK; 248 ixamp->b_wptr += sizeof (*ixm); 249 ixm = (ixamblk_t *)ixamp->b_rptr; 250 251 bzero(ixm, sizeof (*ixm)); 252 ixm->ixm_inbound = B_FALSE; 253 ixm->ixm_flags = ixa->ixa_flags; 254 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; 255 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; 256 ixm->ixm_nceaddr_v6 = nce->nce_addr; 257 ixm->ixm_fragsize = ixa->ixa_fragsize; 258 ixm->ixm_pktlen = ixa->ixa_pktlen; 259 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; 260 ixm->ixm_protocol = ixa->ixa_protocol; 261 ixm->ixm_postfragfn = ixa->ixa_postfragfn; 262 ixm->ixm_zoneid = ixa->ixa_zoneid; 263 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 264 ixm->ixm_scopeid = ixa->ixa_scopeid; 265 ixm->ixm_ident = ixa->ixa_ident; 266 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; 267 268 if (ixa->ixa_tsl != NULL) { 269 ixm->ixm_tsl = ixa->ixa_tsl; 270 label_hold(ixm->ixm_tsl); 271 } 272 if (ixa->ixa_cred != NULL) { 273 ixm->ixm_cred = ixa->ixa_cred; 274 crhold(ixa->ixa_cred); 275 } 276 ixm->ixm_cpid = ixa->ixa_cpid; 277 ixm->ixm_conn_id = ixa->ixa_conn_id; 278 279 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 280 if (ixa->ixa_ipsec_ah_sa != NULL) { 281 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; 282 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 283 } 284 if (ixa->ixa_ipsec_esp_sa != NULL) { 285 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; 286 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 287 } 288 if (ixa->ixa_ipsec_policy != NULL) { 289 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; 290 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 291 } 292 if (ixa->ixa_ipsec_action != NULL) { 293 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; 294 IPACT_REFHOLD(ixa->ixa_ipsec_action); 295 } 296 if (ixa->ixa_ipsec_latch != NULL) { 297 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; 298 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 299 } 300 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; 301 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; 302 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; 303 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; 304 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; 305 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; 306 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; 307 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; 308 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; 309 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; 310 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; 311 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; 312 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; 313 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; 314 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; 315 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; 316 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; 317 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; 318 } 319 return (ixamp); 320 } 321 322 /* 323 * Extract the ip_xmit_attr_t from the mblk, checking that the 324 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is 325 * not the case. 326 * 327 * Otherwise ixa is updated. 328 * Caller needs to release references on the ixa by calling ixa_refrele() 329 * which will imediately call ixa_inactive to release the references. 330 */ 331 boolean_t 332 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) 333 { 334 ixamblk_t *ixm; 335 netstack_t *ns; 336 ip_stack_t *ipst; 337 ill_t *ill; 338 nce_t *nce; 339 340 /* We assume the caller hasn't initialized ixa */ 341 bzero(ixa, sizeof (*ixa)); 342 343 ASSERT(DB_TYPE(ixamp) == M_BREAK); 344 ASSERT(ixamp->b_cont == NULL); 345 346 ixm = (ixamblk_t *)ixamp->b_rptr; 347 ASSERT(!ixm->ixm_inbound); 348 349 /* Verify the netstack is still around */ 350 ns = netstack_find_by_stackid(ixm->ixm_stackid); 351 if (ns == NULL) { 352 /* Disappeared on us */ 353 (void) ip_xmit_attr_free_mblk(ixamp); 354 return (B_FALSE); 355 } 356 ipst = ns->netstack_ip; 357 358 /* Verify the ill is still around */ 359 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, 360 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); 361 362 /* We have the ill, hence the netstack can't go away */ 363 netstack_rele(ns); 364 if (ill == NULL) { 365 /* Disappeared on us */ 366 (void) ip_xmit_attr_free_mblk(ixamp); 367 return (B_FALSE); 368 } 369 /* 370 * Find the nce. We don't load-spread (only lookup nce's on the ill) 371 * because we want to find the same nce as the one we had when 372 * ip_xmit_attr_to_mblk was called. 373 */ 374 if (ixm->ixm_flags & IXAF_IS_IPV4) { 375 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); 376 } else { 377 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); 378 } 379 380 /* We have the nce, hence the ill can't go away */ 381 ill_refrele(ill); 382 if (nce == NULL) { 383 /* 384 * Since this is unusual and we don't know what type of 385 * nce it was, we drop the packet. 386 */ 387 (void) ip_xmit_attr_free_mblk(ixamp); 388 return (B_FALSE); 389 } 390 391 ixa->ixa_flags = ixm->ixm_flags; 392 ixa->ixa_refcnt = 1; 393 ixa->ixa_ipst = ipst; 394 ixa->ixa_fragsize = ixm->ixm_fragsize; 395 ixa->ixa_pktlen = ixm->ixm_pktlen; 396 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; 397 ixa->ixa_protocol = ixm->ixm_protocol; 398 ixa->ixa_nce = nce; 399 ixa->ixa_postfragfn = ixm->ixm_postfragfn; 400 ixa->ixa_zoneid = ixm->ixm_zoneid; 401 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; 402 ixa->ixa_scopeid = ixm->ixm_scopeid; 403 ixa->ixa_ident = ixm->ixm_ident; 404 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; 405 406 if (ixm->ixm_tsl != NULL) { 407 ixa->ixa_tsl = ixm->ixm_tsl; 408 ixa->ixa_free_flags |= IXA_FREE_TSL; 409 ixm->ixm_tsl = NULL; 410 } 411 if (ixm->ixm_cred != NULL) { 412 ixa->ixa_cred = ixm->ixm_cred; 413 ixa->ixa_free_flags |= IXA_FREE_CRED; 414 ixm->ixm_cred = NULL; 415 } 416 ixa->ixa_cpid = ixm->ixm_cpid; 417 ixa->ixa_conn_id = ixm->ixm_conn_id; 418 419 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; 420 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; 421 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; 422 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; 423 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; 424 425 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; 426 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; 427 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; 428 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; 429 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; 430 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; 431 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; 432 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; 433 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; 434 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; 435 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; 436 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; 437 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; 438 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; 439 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; 440 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; 441 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; 442 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; 443 444 freeb(ixamp); 445 return (B_TRUE); 446 } 447 448 /* 449 * Free the ixm mblk and any references it holds 450 * Returns b_cont. 451 */ 452 mblk_t * 453 ip_xmit_attr_free_mblk(mblk_t *ixamp) 454 { 455 ixamblk_t *ixm; 456 mblk_t *mp; 457 458 /* Consume mp */ 459 ASSERT(DB_TYPE(ixamp) == M_BREAK); 460 mp = ixamp->b_cont; 461 462 ixm = (ixamblk_t *)ixamp->b_rptr; 463 ASSERT(!ixm->ixm_inbound); 464 465 if (ixm->ixm_ipsec_ah_sa != NULL) { 466 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); 467 ixm->ixm_ipsec_ah_sa = NULL; 468 } 469 if (ixm->ixm_ipsec_esp_sa != NULL) { 470 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); 471 ixm->ixm_ipsec_esp_sa = NULL; 472 } 473 if (ixm->ixm_ipsec_policy != NULL) { 474 IPPOL_REFRELE(ixm->ixm_ipsec_policy); 475 ixm->ixm_ipsec_policy = NULL; 476 } 477 if (ixm->ixm_ipsec_action != NULL) { 478 IPACT_REFRELE(ixm->ixm_ipsec_action); 479 ixm->ixm_ipsec_action = NULL; 480 } 481 if (ixm->ixm_ipsec_latch) { 482 IPLATCH_REFRELE(ixm->ixm_ipsec_latch); 483 ixm->ixm_ipsec_latch = NULL; 484 } 485 486 if (ixm->ixm_tsl != NULL) { 487 label_rele(ixm->ixm_tsl); 488 ixm->ixm_tsl = NULL; 489 } 490 if (ixm->ixm_cred != NULL) { 491 crfree(ixm->ixm_cred); 492 ixm->ixm_cred = NULL; 493 } 494 freeb(ixamp); 495 return (mp); 496 } 497 498 /* 499 * Take the information in ip_recv_attr_t and stick it in an mblk 500 * that can later be passed to ip_recv_attr_from_mblk to recreate the 501 * ip_recv_attr_t. 502 * 503 * Returns NULL on memory allocation failure. 504 */ 505 mblk_t * 506 ip_recv_attr_to_mblk(ip_recv_attr_t *ira) 507 { 508 mblk_t *iramp; 509 iramblk_t *irm; 510 ill_t *ill = ira->ira_ill; 511 512 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); 513 514 iramp = allocb(sizeof (*irm), BPRI_MED); 515 if (iramp == NULL) 516 return (NULL); 517 518 iramp->b_datap->db_type = M_BREAK; 519 iramp->b_wptr += sizeof (*irm); 520 irm = (iramblk_t *)iramp->b_rptr; 521 522 bzero(irm, sizeof (*irm)); 523 irm->irm_inbound = B_TRUE; 524 irm->irm_flags = ira->ira_flags; 525 if (ill != NULL) { 526 /* Internal to IP - preserve ip_stack_t, ill and rill */ 527 irm->irm_stackid = 528 ill->ill_ipst->ips_netstack->netstack_stackid; 529 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 530 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == 531 ira->ira_rifindex); 532 } else { 533 /* Let ip_recv_attr_from_stackid know there isn't one */ 534 irm->irm_stackid = -1; 535 } 536 irm->irm_rifindex = ira->ira_rifindex; 537 irm->irm_ruifindex = ira->ira_ruifindex; 538 irm->irm_pktlen = ira->ira_pktlen; 539 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; 540 irm->irm_protocol = ira->ira_protocol; 541 irm->irm_ttl = ira->ira_ttl; 542 543 irm->irm_sqp = ira->ira_sqp; 544 irm->irm_ring = ira->ira_ring; 545 546 irm->irm_zoneid = ira->ira_zoneid; 547 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; 548 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; 549 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; 550 551 if (ira->ira_tsl != NULL) { 552 irm->irm_tsl = ira->ira_tsl; 553 label_hold(irm->irm_tsl); 554 } 555 if (ira->ira_cred != NULL) { 556 irm->irm_cred = ira->ira_cred; 557 crhold(ira->ira_cred); 558 } 559 irm->irm_cpid = ira->ira_cpid; 560 561 if (ira->ira_flags & IRAF_L2SRC_SET) 562 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); 563 564 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 565 if (ira->ira_ipsec_ah_sa != NULL) { 566 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; 567 IPSA_REFHOLD(ira->ira_ipsec_ah_sa); 568 } 569 if (ira->ira_ipsec_esp_sa != NULL) { 570 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; 571 IPSA_REFHOLD(ira->ira_ipsec_esp_sa); 572 } 573 if (ira->ira_ipsec_action != NULL) { 574 irm->irm_ipsec_action = ira->ira_ipsec_action; 575 IPACT_REFHOLD(ira->ira_ipsec_action); 576 } 577 } 578 return (iramp); 579 } 580 581 /* 582 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP 583 * then irm_stackid is not -1, in which case we check that the 584 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is 585 * not the case. 586 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) 587 * and we just proceed with ira_ill and ira_rill as NULL. 588 * 589 * The caller needs to release any references on the pointers inside the ire 590 * by calling ira_cleanup. 591 */ 592 boolean_t 593 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) 594 { 595 iramblk_t *irm; 596 netstack_t *ns; 597 ip_stack_t *ipst = NULL; 598 ill_t *ill = NULL, *rill = NULL; 599 600 /* We assume the caller hasn't initialized ira */ 601 bzero(ira, sizeof (*ira)); 602 603 ASSERT(DB_TYPE(iramp) == M_BREAK); 604 ASSERT(iramp->b_cont == NULL); 605 606 irm = (iramblk_t *)iramp->b_rptr; 607 ASSERT(irm->irm_inbound); 608 609 if (irm->irm_stackid != -1) { 610 /* Verify the netstack is still around */ 611 ns = netstack_find_by_stackid(irm->irm_stackid); 612 if (ns == NULL) { 613 /* Disappeared on us */ 614 (void) ip_recv_attr_free_mblk(iramp); 615 return (B_FALSE); 616 } 617 ipst = ns->netstack_ip; 618 619 /* Verify the ill is still around */ 620 ill = ill_lookup_on_ifindex(irm->irm_ifindex, 621 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 622 623 if (irm->irm_ifindex == irm->irm_rifindex) { 624 rill = ill; 625 } else { 626 rill = ill_lookup_on_ifindex(irm->irm_rifindex, 627 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 628 } 629 630 /* We have the ill, hence the netstack can't go away */ 631 netstack_rele(ns); 632 if (ill == NULL || rill == NULL) { 633 /* Disappeared on us */ 634 if (ill != NULL) 635 ill_refrele(ill); 636 if (rill != NULL && rill != ill) 637 ill_refrele(rill); 638 (void) ip_recv_attr_free_mblk(iramp); 639 return (B_FALSE); 640 } 641 } 642 643 ira->ira_flags = irm->irm_flags; 644 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ 645 ira->ira_ill = ill; 646 ira->ira_rill = rill; 647 648 ira->ira_rifindex = irm->irm_rifindex; 649 ira->ira_ruifindex = irm->irm_ruifindex; 650 ira->ira_pktlen = irm->irm_pktlen; 651 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; 652 ira->ira_protocol = irm->irm_protocol; 653 ira->ira_ttl = irm->irm_ttl; 654 655 ira->ira_sqp = irm->irm_sqp; 656 /* The rest of IP assumes that the rings never go away. */ 657 ira->ira_ring = irm->irm_ring; 658 659 ira->ira_zoneid = irm->irm_zoneid; 660 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; 661 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; 662 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; 663 664 if (irm->irm_tsl != NULL) { 665 ira->ira_tsl = irm->irm_tsl; 666 ira->ira_free_flags |= IRA_FREE_TSL; 667 irm->irm_tsl = NULL; 668 } 669 if (irm->irm_cred != NULL) { 670 ira->ira_cred = irm->irm_cred; 671 ira->ira_free_flags |= IRA_FREE_CRED; 672 irm->irm_cred = NULL; 673 } 674 ira->ira_cpid = irm->irm_cpid; 675 676 if (ira->ira_flags & IRAF_L2SRC_SET) 677 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); 678 679 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; 680 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; 681 ira->ira_ipsec_action = irm->irm_ipsec_action; 682 683 freeb(iramp); 684 return (B_TRUE); 685 } 686 687 /* 688 * Free the irm mblk and any references it holds 689 * Returns b_cont. 690 */ 691 mblk_t * 692 ip_recv_attr_free_mblk(mblk_t *iramp) 693 { 694 iramblk_t *irm; 695 mblk_t *mp; 696 697 /* Consume mp */ 698 ASSERT(DB_TYPE(iramp) == M_BREAK); 699 mp = iramp->b_cont; 700 701 irm = (iramblk_t *)iramp->b_rptr; 702 ASSERT(irm->irm_inbound); 703 704 if (irm->irm_ipsec_ah_sa != NULL) { 705 IPSA_REFRELE(irm->irm_ipsec_ah_sa); 706 irm->irm_ipsec_ah_sa = NULL; 707 } 708 if (irm->irm_ipsec_esp_sa != NULL) { 709 IPSA_REFRELE(irm->irm_ipsec_esp_sa); 710 irm->irm_ipsec_esp_sa = NULL; 711 } 712 if (irm->irm_ipsec_action != NULL) { 713 IPACT_REFRELE(irm->irm_ipsec_action); 714 irm->irm_ipsec_action = NULL; 715 } 716 if (irm->irm_tsl != NULL) { 717 label_rele(irm->irm_tsl); 718 irm->irm_tsl = NULL; 719 } 720 if (irm->irm_cred != NULL) { 721 crfree(irm->irm_cred); 722 irm->irm_cred = NULL; 723 } 724 725 freeb(iramp); 726 return (mp); 727 } 728 729 /* 730 * Returns true if the mblk contains an ip_recv_attr_t 731 * For now we just check db_type. 732 */ 733 boolean_t 734 ip_recv_attr_is_mblk(mblk_t *mp) 735 { 736 /* 737 * Need to handle the various forms of tcp_timermp which are tagged 738 * with b_wptr and might have a NULL b_datap. 739 */ 740 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) 741 return (B_FALSE); 742 743 #ifdef DEBUG 744 iramblk_t *irm; 745 746 if (DB_TYPE(mp) != M_BREAK) 747 return (B_FALSE); 748 749 irm = (iramblk_t *)mp->b_rptr; 750 ASSERT(irm->irm_inbound); 751 return (B_TRUE); 752 #else 753 return (DB_TYPE(mp) == M_BREAK); 754 #endif 755 } 756 757 static ip_xmit_attr_t * 758 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) 759 { 760 ip_xmit_attr_t *oldixa; /* Already attached to conn_t */ 761 ip_xmit_attr_t *ixa; /* New one, which we return. */ 762 763 /* 764 * NOTE: If the marked-below common case isn't, move the 765 * kmem_alloc() up here and put a free in what was marked as the 766 * (not really) common case instead. 767 */ 768 769 mutex_enter(&connp->conn_lock); 770 oldixa = connp->conn_ixa; 771 772 /* At least one reference for the conn_t */ 773 ASSERT3U(oldixa->ixa_refcnt, >=, 1); 774 if (atomic_inc_32_nv(&oldixa->ixa_refcnt) == 2) { 775 /* No other thread using conn_ixa (common case) */ 776 mutex_exit(&connp->conn_lock); 777 return (oldixa); 778 } 779 /* Do allocation inside-the-conn_lock because it's less common. */ 780 ixa = kmem_alloc(sizeof (*ixa), kmflag); 781 if (ixa == NULL) { 782 mutex_exit(&connp->conn_lock); 783 IXA_REFRELE(oldixa); 784 return (NULL); 785 } 786 ixa_safe_copy(oldixa, ixa); 787 788 /* Make sure we drop conn_lock before any refrele */ 789 if (replace) { 790 ixa->ixa_refcnt++; /* No atomic needed - not visible */ 791 connp->conn_ixa = ixa; 792 mutex_exit(&connp->conn_lock); 793 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ 794 } else { 795 mutex_exit(&connp->conn_lock); 796 } 797 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ 798 799 return (ixa); 800 } 801 802 /* 803 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only 804 * the caller can access the ip_xmit_attr_t. 805 * 806 * If nobody else is using conn_ixa we return it. 807 * Otherwise we make a "safe" copy of conn_ixa 808 * and return it. The "safe" copy has the pointers set to NULL 809 * (since the pointers might be changed by another thread using 810 * conn_ixa). The caller needs to check for NULL pointers to see 811 * if ip_set_destination needs to be called to re-establish the pointers. 812 * 813 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. 814 * That is used when we connect() the ULP. 815 */ 816 ip_xmit_attr_t * 817 conn_get_ixa(conn_t *connp, boolean_t replace) 818 { 819 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); 820 } 821 822 /* 823 * Used only when the option is to have the kernel hang due to not 824 * cleaning up ixa references on ills etc. 825 */ 826 ip_xmit_attr_t * 827 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) 828 { 829 return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); 830 } 831 832 /* 833 * Replace conn_ixa with the ixa argument. 834 * 835 * The caller must hold conn_lock. 836 * 837 * We return the old ixa; the caller must ixa_refrele that after conn_lock 838 * has been dropped. 839 */ 840 ip_xmit_attr_t * 841 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) 842 { 843 ip_xmit_attr_t *oldixa; 844 845 ASSERT(MUTEX_HELD(&connp->conn_lock)); 846 847 oldixa = connp->conn_ixa; 848 IXA_REFHOLD(ixa); 849 ixa->ixa_conn_id = oldixa->ixa_conn_id; 850 connp->conn_ixa = ixa; 851 return (oldixa); 852 } 853 854 /* 855 * Return a ip_xmit_attr_t to use with a conn_t that is based on but 856 * separate from conn_ixa. 857 * 858 * This "safe" copy has the pointers set to NULL 859 * (since the pointers might be changed by another thread using 860 * conn_ixa). The caller needs to check for NULL pointers to see 861 * if ip_set_destination needs to be called to re-establish the pointers. 862 */ 863 ip_xmit_attr_t * 864 conn_get_ixa_exclusive(conn_t *connp) 865 { 866 ip_xmit_attr_t *oldixa; 867 ip_xmit_attr_t *ixa; 868 869 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP_LAZY); 870 if (ixa == NULL) 871 return (NULL); 872 873 mutex_enter(&connp->conn_lock); 874 875 oldixa = connp->conn_ixa; 876 IXA_REFHOLD(oldixa); 877 878 ixa_safe_copy(oldixa, ixa); 879 mutex_exit(&connp->conn_lock); 880 IXA_REFRELE(oldixa); 881 return (ixa); 882 } 883 884 void 885 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) 886 { 887 bcopy(src, ixa, sizeof (*ixa)); 888 ixa->ixa_refcnt = 1; 889 /* 890 * Clear any pointers that have references and might be changed 891 * by ip_set_destination or the ULP 892 */ 893 ixa->ixa_ire = NULL; 894 ixa->ixa_nce = NULL; 895 ixa->ixa_dce = NULL; 896 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 897 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 898 #ifdef DEBUG 899 ixa->ixa_curthread = NULL; 900 #endif 901 /* Clear all the IPsec pointers and the flag as well. */ 902 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 903 904 ixa->ixa_ipsec_latch = NULL; 905 ixa->ixa_ipsec_ah_sa = NULL; 906 ixa->ixa_ipsec_esp_sa = NULL; 907 ixa->ixa_ipsec_policy = NULL; 908 ixa->ixa_ipsec_action = NULL; 909 910 /* 911 * We leave ixa_tsl unchanged, but if it has a refhold we need 912 * to get an extra refhold. 913 */ 914 if (ixa->ixa_free_flags & IXA_FREE_TSL) 915 label_hold(ixa->ixa_tsl); 916 917 /* 918 * We leave ixa_cred unchanged, but if it has a refhold we need 919 * to get an extra refhold. 920 */ 921 if (ixa->ixa_free_flags & IXA_FREE_CRED) 922 crhold(ixa->ixa_cred); 923 924 /* 925 * There is no cleanup in progress on this new copy. 926 */ 927 ixa->ixa_tcpcleanup = IXATC_IDLE; 928 } 929 930 /* 931 * Duplicate an ip_xmit_attr_t. 932 * Assumes that the caller controls the ixa, hence we do not need to use 933 * a safe copy. We just have to increase the refcnt on any pointers. 934 */ 935 ip_xmit_attr_t * 936 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) 937 { 938 ip_xmit_attr_t *ixa; 939 940 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 941 if (ixa == NULL) 942 return (NULL); 943 bcopy(src_ixa, ixa, sizeof (*ixa)); 944 ixa->ixa_refcnt = 1; 945 946 if (ixa->ixa_ire != NULL) 947 ire_refhold_notr(ixa->ixa_ire); 948 if (ixa->ixa_nce != NULL) 949 nce_refhold(ixa->ixa_nce); 950 if (ixa->ixa_dce != NULL) 951 dce_refhold_notr(ixa->ixa_dce); 952 953 #ifdef DEBUG 954 ixa->ixa_curthread = NULL; 955 #endif 956 957 if (ixa->ixa_ipsec_latch != NULL) 958 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 959 if (ixa->ixa_ipsec_ah_sa != NULL) 960 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 961 if (ixa->ixa_ipsec_esp_sa != NULL) 962 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 963 if (ixa->ixa_ipsec_policy != NULL) 964 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 965 if (ixa->ixa_ipsec_action != NULL) 966 IPACT_REFHOLD(ixa->ixa_ipsec_action); 967 968 if (ixa->ixa_tsl != NULL) { 969 label_hold(ixa->ixa_tsl); 970 ixa->ixa_free_flags |= IXA_FREE_TSL; 971 } 972 if (ixa->ixa_cred != NULL) { 973 crhold(ixa->ixa_cred); 974 ixa->ixa_free_flags |= IXA_FREE_CRED; 975 } 976 return (ixa); 977 } 978 979 /* 980 * Used to replace the ixa_label field. 981 * The caller should have a reference on the label, which we transfer to 982 * the attributes so that when the attribute is freed/cleaned up 983 * we will release that reference. 984 */ 985 void 986 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) 987 { 988 ASSERT(tsl != NULL); 989 990 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 991 ASSERT(ixa->ixa_tsl != NULL); 992 label_rele(ixa->ixa_tsl); 993 } else { 994 ixa->ixa_free_flags |= IXA_FREE_TSL; 995 } 996 ixa->ixa_tsl = tsl; 997 } 998 999 /* 1000 * Replace the ip_recv_attr_t's label. 1001 * Due to kernel RPC's use of db_credp we also need to replace ira_cred; 1002 * TCP/UDP uses ira_cred to set db_credp for non-socket users. 1003 * This can fail (and return B_FALSE) due to lack of memory. 1004 */ 1005 boolean_t 1006 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) 1007 { 1008 cred_t *newcr; 1009 1010 if (ira->ira_free_flags & IRA_FREE_TSL) { 1011 ASSERT(ira->ira_tsl != NULL); 1012 label_rele(ira->ira_tsl); 1013 } 1014 label_hold(tsl); 1015 ira->ira_tsl = tsl; 1016 ira->ira_free_flags |= IRA_FREE_TSL; 1017 1018 /* 1019 * Reset zoneid if we have a shared address. That allows 1020 * ip_fanout_tx_v4/v6 to determine the zoneid again. 1021 */ 1022 if (ira->ira_flags & IRAF_TX_SHARED_ADDR) 1023 ira->ira_zoneid = ALL_ZONES; 1024 1025 /* We update ira_cred for RPC */ 1026 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); 1027 if (newcr == NULL) 1028 return (B_FALSE); 1029 if (ira->ira_free_flags & IRA_FREE_CRED) 1030 crfree(ira->ira_cred); 1031 ira->ira_cred = newcr; 1032 ira->ira_free_flags |= IRA_FREE_CRED; 1033 return (B_TRUE); 1034 } 1035 1036 /* 1037 * This needs to be called after ip_set_destination/tsol_check_dest might 1038 * have changed ixa_tsl to be specific for a destination, and we now want to 1039 * send to a different destination. 1040 * We have to restart with crgetlabel() since ip_set_destination/ 1041 * tsol_check_dest will start with ixa_tsl. 1042 */ 1043 void 1044 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) 1045 { 1046 if (!is_system_labeled()) 1047 return; 1048 1049 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1050 ASSERT(ixa->ixa_tsl != NULL); 1051 label_rele(ixa->ixa_tsl); 1052 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1053 } 1054 ixa->ixa_tsl = crgetlabel(cr); 1055 } 1056 1057 void 1058 ixa_refrele(ip_xmit_attr_t *ixa) 1059 { 1060 IXA_REFRELE(ixa); 1061 } 1062 1063 void 1064 ixa_inactive(ip_xmit_attr_t *ixa) 1065 { 1066 ASSERT(ixa->ixa_refcnt == 0); 1067 1068 ixa_cleanup(ixa); 1069 kmem_free(ixa, sizeof (*ixa)); 1070 } 1071 1072 /* 1073 * Release any references contained in the ixa. 1074 * Also clear any fields that are not controlled by ixa_flags. 1075 */ 1076 void 1077 ixa_cleanup(ip_xmit_attr_t *ixa) 1078 { 1079 if (ixa->ixa_ire != NULL) { 1080 ire_refrele_notr(ixa->ixa_ire); 1081 ixa->ixa_ire = NULL; 1082 } 1083 if (ixa->ixa_dce != NULL) { 1084 dce_refrele_notr(ixa->ixa_dce); 1085 ixa->ixa_dce = NULL; 1086 } 1087 if (ixa->ixa_nce != NULL) { 1088 nce_refrele(ixa->ixa_nce); 1089 ixa->ixa_nce = NULL; 1090 } 1091 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1092 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1093 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 1094 ipsec_out_release_refs(ixa); 1095 } 1096 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1097 ASSERT(ixa->ixa_tsl != NULL); 1098 label_rele(ixa->ixa_tsl); 1099 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1100 } 1101 ixa->ixa_tsl = NULL; 1102 if (ixa->ixa_free_flags & IXA_FREE_CRED) { 1103 ASSERT(ixa->ixa_cred != NULL); 1104 crfree(ixa->ixa_cred); 1105 ixa->ixa_free_flags &= ~IXA_FREE_CRED; 1106 } 1107 ixa->ixa_cred = NULL; 1108 ixa->ixa_src_preferences = 0; 1109 ixa->ixa_ifindex = 0; 1110 ixa->ixa_multicast_ifindex = 0; 1111 ixa->ixa_multicast_ifaddr = INADDR_ANY; 1112 } 1113 1114 /* 1115 * Release any references contained in the ira. 1116 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second 1117 * argument. 1118 */ 1119 void 1120 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) 1121 { 1122 if (ira->ira_ill != NULL) { 1123 if (ira->ira_rill != ira->ira_ill) { 1124 /* Caused by async processing */ 1125 ill_refrele(ira->ira_rill); 1126 } 1127 if (refrele_ill) 1128 ill_refrele(ira->ira_ill); 1129 } 1130 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1131 ipsec_in_release_refs(ira); 1132 } 1133 if (ira->ira_free_flags & IRA_FREE_TSL) { 1134 ASSERT(ira->ira_tsl != NULL); 1135 label_rele(ira->ira_tsl); 1136 ira->ira_free_flags &= ~IRA_FREE_TSL; 1137 } 1138 ira->ira_tsl = NULL; 1139 if (ira->ira_free_flags & IRA_FREE_CRED) { 1140 ASSERT(ira->ira_cred != NULL); 1141 crfree(ira->ira_cred); 1142 ira->ira_free_flags &= ~IRA_FREE_CRED; 1143 } 1144 ira->ira_cred = NULL; 1145 } 1146 1147 /* 1148 * Function to help release any IRE, NCE, or DCEs that 1149 * have been deleted and are marked as condemned. 1150 * The caller is responsible for any serialization which is different 1151 * for TCP, SCTP, and others. 1152 */ 1153 static void 1154 ixa_cleanup_stale(ip_xmit_attr_t *ixa) 1155 { 1156 ire_t *ire; 1157 nce_t *nce; 1158 dce_t *dce; 1159 1160 ire = ixa->ixa_ire; 1161 nce = ixa->ixa_nce; 1162 dce = ixa->ixa_dce; 1163 1164 if (ire != NULL && IRE_IS_CONDEMNED(ire)) { 1165 ire_refrele_notr(ire); 1166 ire = ire_blackhole(ixa->ixa_ipst, 1167 !(ixa->ixa_flags & IXAF_IS_IPV4)); 1168 ASSERT(ire != NULL); 1169 #ifdef DEBUG 1170 ire_refhold_notr(ire); 1171 ire_refrele(ire); 1172 #endif 1173 ixa->ixa_ire = ire; 1174 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1175 } 1176 if (nce != NULL && nce->nce_is_condemned) { 1177 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ 1178 nce_refrele(nce); 1179 ixa->ixa_nce = NULL; 1180 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1181 } 1182 if (dce != NULL && DCE_IS_CONDEMNED(dce)) { 1183 dce_refrele_notr(dce); 1184 dce = dce_get_default(ixa->ixa_ipst); 1185 ASSERT(dce != NULL); 1186 #ifdef DEBUG 1187 dce_refhold_notr(dce); 1188 dce_refrele(dce); 1189 #endif 1190 ixa->ixa_dce = dce; 1191 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1192 } 1193 } 1194 1195 static mblk_t * 1196 tcp_ixa_cleanup_getmblk(conn_t *connp) 1197 { 1198 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1199 int need_retry; 1200 mblk_t *mp; 1201 1202 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1203 1204 /* 1205 * It's possible that someone else came in and started cleaning up 1206 * another connection between the time we verified this one is not being 1207 * cleaned up and the time we actually get the shared mblk. If that's 1208 * the case, we've dropped the lock, and some other thread may have 1209 * cleaned up this connection again, and is still waiting for 1210 * notification of that cleanup's completion. Therefore we need to 1211 * recheck. 1212 */ 1213 do { 1214 need_retry = 0; 1215 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) { 1216 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1217 &tcps->tcps_ixa_cleanup_lock); 1218 } 1219 1220 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { 1221 /* 1222 * Multiple concurrent cleanups; need to have the last 1223 * one run since it could be an unplumb. 1224 */ 1225 need_retry = 1; 1226 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv, 1227 &tcps->tcps_ixa_cleanup_lock); 1228 } 1229 } while (need_retry); 1230 1231 /* 1232 * We now have the lock and the mblk; now make sure that no one else can 1233 * try to clean up this connection or enqueue it for cleanup, clear the 1234 * mblk pointer for this stack, drop the lock, and return the mblk. 1235 */ 1236 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock)); 1237 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE); 1238 ASSERT(tcps->tcps_ixa_cleanup_mp == mp); 1239 ASSERT(mp != NULL); 1240 1241 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS; 1242 tcps->tcps_ixa_cleanup_mp = NULL; 1243 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1244 1245 return (mp); 1246 } 1247 1248 /* 1249 * Used to run ixa_cleanup_stale inside the tcp squeue. 1250 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp 1251 * and waking up the caller. 1252 */ 1253 /* ARGSUSED2 */ 1254 static void 1255 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, 1256 ip_recv_attr_t *dummy) 1257 { 1258 conn_t *connp = (conn_t *)arg; 1259 tcp_stack_t *tcps; 1260 1261 tcps = connp->conn_netstack->netstack_tcp; 1262 1263 ixa_cleanup_stale(connp->conn_ixa); 1264 1265 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1266 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); 1267 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE; 1268 tcps->tcps_ixa_cleanup_mp = mp; 1269 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv); 1270 /* 1271 * It is possible for any number of threads to be waiting for cleanup of 1272 * different connections. Absent a per-connection (or per-IXA) CV, we 1273 * need to wake them all up even though only one can be waiting on this 1274 * particular cleanup. 1275 */ 1276 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1277 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1278 } 1279 1280 static void 1281 tcp_ixa_cleanup_wait_and_finish(conn_t *connp) 1282 { 1283 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1284 1285 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1286 1287 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE); 1288 1289 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) { 1290 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1291 &tcps->tcps_ixa_cleanup_lock); 1292 } 1293 1294 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE); 1295 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE; 1296 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1297 1298 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1299 } 1300 1301 /* 1302 * ipcl_walk() function to help release any IRE, NCE, or DCEs that 1303 * have been deleted and are marked as condemned. 1304 * Note that we can't cleanup the pointers since there can be threads 1305 * in conn_ip_output() sending while we are called. 1306 */ 1307 void 1308 conn_ixa_cleanup(conn_t *connp, void *arg) 1309 { 1310 boolean_t tryhard = (boolean_t)arg; 1311 1312 if (IPCL_IS_TCP(connp)) { 1313 mblk_t *mp; 1314 1315 mp = tcp_ixa_cleanup_getmblk(connp); 1316 1317 if (connp->conn_sqp->sq_run == curthread) { 1318 /* Already on squeue */ 1319 tcp_ixa_cleanup(connp, mp, NULL, NULL); 1320 } else { 1321 CONN_INC_REF(connp); 1322 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, 1323 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); 1324 } 1325 tcp_ixa_cleanup_wait_and_finish(connp); 1326 } else if (IPCL_IS_SCTP(connp)) { 1327 sctp_t *sctp; 1328 sctp_faddr_t *fp; 1329 1330 sctp = CONN2SCTP(connp); 1331 RUN_SCTP(sctp); 1332 ixa_cleanup_stale(connp->conn_ixa); 1333 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) 1334 ixa_cleanup_stale(fp->sf_ixa); 1335 WAKE_SCTP(sctp); 1336 } else { 1337 ip_xmit_attr_t *ixa; 1338 1339 /* 1340 * If there is a different thread using conn_ixa then we get a 1341 * new copy and cut the old one loose from conn_ixa. Otherwise 1342 * we use conn_ixa and prevent any other thread from 1343 * using/changing it. Anybody using conn_ixa (e.g., a thread in 1344 * conn_ip_output) will do an ixa_refrele which will remove any 1345 * references on the ire etc. 1346 * 1347 * Once we are done other threads can use conn_ixa since the 1348 * refcnt will be back at one. 1349 * 1350 * We are called either because an ill is going away, or 1351 * due to memory reclaim. In the former case we wait for 1352 * memory since we must remove the refcnts on the ill. 1353 */ 1354 if (tryhard) { 1355 ixa = conn_get_ixa_tryhard(connp, B_TRUE); 1356 ASSERT(ixa != NULL); 1357 } else { 1358 ixa = conn_get_ixa(connp, B_TRUE); 1359 if (ixa == NULL) { 1360 /* 1361 * Somebody else was using it and kmem_alloc 1362 * failed! Next memory reclaim will try to 1363 * clean up. 1364 */ 1365 DTRACE_PROBE1(conn__ixa__cleanup__bail, 1366 conn_t *, connp); 1367 return; 1368 } 1369 } 1370 ixa_cleanup_stale(ixa); 1371 IXA_REFRELE(ixa); 1372 } 1373 } 1374 1375 /* 1376 * ixa needs to be an exclusive copy so that no one changes the cookie 1377 * or the ixa_nce. 1378 */ 1379 boolean_t 1380 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) 1381 { 1382 uintptr_t cookie = ixa->ixa_cookie; 1383 ill_dld_direct_t *idd; 1384 idl_tx_list_t *idl_txl; 1385 ill_t *ill = ixa->ixa_nce->nce_ill; 1386 boolean_t inserted = B_FALSE; 1387 1388 idd = &(ill)->ill_dld_capab->idc_direct; 1389 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 1390 mutex_enter(&idl_txl->txl_lock); 1391 1392 /* 1393 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow 1394 * control is asserted on an ill that does not support direct calls. 1395 * Jump to insert. 1396 */ 1397 if (cookie == 0) 1398 goto tryinsert; 1399 1400 ASSERT(ILL_DIRECT_CAPABLE(ill)); 1401 1402 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) { 1403 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie); 1404 } else if (idl_txl->txl_cookie != (uintptr_t)NULL && 1405 idl_txl->txl_cookie != ixa->ixa_cookie) { 1406 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie, 1407 uintptr_t, idl_txl->txl_cookie); 1408 /* TODO: bump kstat for cookie collision */ 1409 } else { 1410 /* 1411 * Check/set conn_blocked under conn_lock. Note that txl_lock 1412 * will not suffice since two separate UDP threads may be 1413 * racing to send to different destinations that are 1414 * associated with different cookies and thus may not be 1415 * holding the same txl_lock. Further, since a given conn_t 1416 * can only be on a single drain list, the conn_t will be 1417 * enqueued on whichever thread wins this race. 1418 */ 1419 tryinsert: mutex_enter(&connp->conn_lock); 1420 if (connp->conn_blocked) { 1421 DTRACE_PROBE1(ill__tx__conn__already__blocked, 1422 conn_t *, connp); 1423 mutex_exit(&connp->conn_lock); 1424 } else { 1425 connp->conn_blocked = B_TRUE; 1426 mutex_exit(&connp->conn_lock); 1427 idl_txl->txl_cookie = cookie; 1428 conn_drain_insert(connp, idl_txl); 1429 if (!IPCL_IS_NONSTR(connp)) 1430 noenable(connp->conn_wq); 1431 inserted = B_TRUE; 1432 } 1433 } 1434 mutex_exit(&idl_txl->txl_lock); 1435 return (inserted); 1436 } 1437