1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * Copyright 2019 Joyent, Inc. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/strsun.h> 34 #include <sys/zone.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/debug.h> 39 #include <sys/atomic.h> 40 41 #include <sys/systm.h> 42 #include <sys/param.h> 43 #include <sys/kmem.h> 44 #include <sys/sdt.h> 45 #include <sys/socket.h> 46 #include <sys/mac.h> 47 #include <net/if.h> 48 #include <net/if_arp.h> 49 #include <net/route.h> 50 #include <sys/sockio.h> 51 #include <netinet/in.h> 52 #include <net/if_dl.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/mib2.h> 57 #include <inet/nd.h> 58 #include <inet/arp.h> 59 #include <inet/snmpcom.h> 60 #include <inet/kstatcom.h> 61 62 #include <netinet/igmp_var.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/ip.h> 68 #include <inet/ip_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip6_asp.h> 71 #include <inet/tcp.h> 72 #include <inet/ip_multi.h> 73 #include <inet/ip_if.h> 74 #include <inet/ip_ire.h> 75 #include <inet/ip_ftable.h> 76 #include <inet/ip_rts.h> 77 #include <inet/optcom.h> 78 #include <inet/ip_ndp.h> 79 #include <inet/ip_listutils.h> 80 #include <netinet/igmp.h> 81 #include <netinet/ip_mroute.h> 82 #include <inet/ipp_common.h> 83 84 #include <net/pfkeyv2.h> 85 #include <inet/sadb.h> 86 #include <inet/ipsec_impl.h> 87 #include <inet/ipdrop.h> 88 #include <inet/ip_netinfo.h> 89 #include <sys/squeue_impl.h> 90 #include <sys/squeue.h> 91 92 #include <inet/ipclassifier.h> 93 #include <inet/sctp_ip.h> 94 #include <inet/sctp/sctp_impl.h> 95 #include <inet/udp_impl.h> 96 #include <sys/sunddi.h> 97 98 #include <sys/tsol/label.h> 99 #include <sys/tsol/tnet.h> 100 101 /* 102 * Release a reference on ip_xmit_attr. 103 * The reference is acquired by conn_get_ixa() 104 * 105 * This macro has a lowercase function-call version for callers outside 106 * this file. 107 */ 108 #define IXA_REFRELE(ixa) \ 109 { \ 110 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \ 111 ixa_inactive(ixa); \ 112 } 113 114 #define IXA_REFHOLD(ixa) \ 115 { \ 116 ASSERT3U((ixa)->ixa_refcnt, !=, 0); \ 117 atomic_inc_32(&(ixa)->ixa_refcnt); \ 118 } 119 120 /* 121 * When we need to handle a transmit side asynchronous operation, then we need 122 * to save sufficient information so that we can call the fragment and postfrag 123 * functions. That information is captured in an mblk containing this structure. 124 * 125 * Since this is currently only used for IPsec, we include information for 126 * the kernel crypto framework. 127 */ 128 typedef struct ixamblk_s { 129 boolean_t ixm_inbound; /* B_FALSE */ 130 iaflags_t ixm_flags; /* ixa_flags */ 131 netstackid_t ixm_stackid; /* Verify it didn't go away */ 132 uint_t ixm_ifindex; /* Used to find the nce */ 133 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ 134 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) 135 uint32_t ixm_fragsize; 136 uint_t ixm_pktlen; 137 uint16_t ixm_ip_hdr_length; /* Points to ULP header */ 138 uint8_t ixm_protocol; /* Protocol number for ULP cksum */ 139 pfirepostfrag_t ixm_postfragfn; 140 141 zoneid_t ixm_zoneid; /* Needed for ipobs */ 142 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ 143 144 uint_t ixm_scopeid; /* For IPv6 link-locals */ 145 146 uint32_t ixm_ident; /* For IPv6 fragment header */ 147 uint32_t ixm_xmit_hint; 148 149 uint64_t ixm_conn_id; /* Used by DTrace */ 150 cred_t *ixm_cred; /* For getpeerucred - refhold if set */ 151 pid_t ixm_cpid; /* For getpeerucred */ 152 153 ts_label_t *ixm_tsl; /* Refhold if set. */ 154 155 /* 156 * When the pointers below are set they have a refhold on the struct. 157 */ 158 ipsec_latch_t *ixm_ipsec_latch; 159 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ 160 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ 161 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ 162 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ 163 164 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ 165 166 /* Need these while waiting for SA */ 167 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ 168 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ 169 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ 170 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ 171 172 sa_family_t ixm_ipsec_inaf; /* Inner address family */ 173 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ 174 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ 175 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ 176 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ 177 178 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ 179 } ixamblk_t; 180 181 182 /* 183 * When we need to handle a receive side asynchronous operation, then we need 184 * to save sufficient information so that we can call ip_fanout. 185 * That information is captured in an mblk containing this structure. 186 * 187 * Since this is currently only used for IPsec, we include information for 188 * the kernel crypto framework. 189 */ 190 typedef struct iramblk_s { 191 boolean_t irm_inbound; /* B_TRUE */ 192 iaflags_t irm_flags; /* ira_flags */ 193 netstackid_t irm_stackid; /* Verify it didn't go away */ 194 uint_t irm_ifindex; /* To find ira_ill */ 195 196 uint_t irm_rifindex; /* ira_rifindex */ 197 uint_t irm_ruifindex; /* ira_ruifindex */ 198 uint_t irm_pktlen; 199 uint16_t irm_ip_hdr_length; /* Points to ULP header */ 200 uint8_t irm_protocol; /* Protocol number for ULP cksum */ 201 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ 202 203 squeue_t *irm_sqp; 204 ill_rx_ring_t *irm_ring; 205 206 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ 207 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ 208 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ 209 210 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ 211 212 cred_t *irm_cred; /* For getpeerucred - refhold if set */ 213 pid_t irm_cpid; /* For getpeerucred */ 214 215 ts_label_t *irm_tsl; /* Refhold if set. */ 216 217 /* 218 * When set these correspond to a refhold on the object. 219 */ 220 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ 221 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ 222 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ 223 } iramblk_t; 224 225 226 /* 227 * Take the information in ip_xmit_attr_t and stick it in an mblk 228 * that can later be passed to ip_xmit_attr_from_mblk to recreate the 229 * ip_xmit_attr_t. 230 * 231 * Returns NULL on memory allocation failure. 232 */ 233 mblk_t * 234 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) 235 { 236 mblk_t *ixamp; 237 ixamblk_t *ixm; 238 nce_t *nce = ixa->ixa_nce; 239 240 ASSERT(nce != NULL); 241 ixamp = allocb(sizeof (*ixm), BPRI_MED); 242 if (ixamp == NULL) 243 return (NULL); 244 245 ixamp->b_datap->db_type = M_BREAK; 246 ixamp->b_wptr += sizeof (*ixm); 247 ixm = (ixamblk_t *)ixamp->b_rptr; 248 249 bzero(ixm, sizeof (*ixm)); 250 ixm->ixm_inbound = B_FALSE; 251 ixm->ixm_flags = ixa->ixa_flags; 252 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; 253 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; 254 ixm->ixm_nceaddr_v6 = nce->nce_addr; 255 ixm->ixm_fragsize = ixa->ixa_fragsize; 256 ixm->ixm_pktlen = ixa->ixa_pktlen; 257 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; 258 ixm->ixm_protocol = ixa->ixa_protocol; 259 ixm->ixm_postfragfn = ixa->ixa_postfragfn; 260 ixm->ixm_zoneid = ixa->ixa_zoneid; 261 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 262 ixm->ixm_scopeid = ixa->ixa_scopeid; 263 ixm->ixm_ident = ixa->ixa_ident; 264 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; 265 266 if (ixa->ixa_tsl != NULL) { 267 ixm->ixm_tsl = ixa->ixa_tsl; 268 label_hold(ixm->ixm_tsl); 269 } 270 if (ixa->ixa_cred != NULL) { 271 ixm->ixm_cred = ixa->ixa_cred; 272 crhold(ixa->ixa_cred); 273 } 274 ixm->ixm_cpid = ixa->ixa_cpid; 275 ixm->ixm_conn_id = ixa->ixa_conn_id; 276 277 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 278 if (ixa->ixa_ipsec_ah_sa != NULL) { 279 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; 280 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 281 } 282 if (ixa->ixa_ipsec_esp_sa != NULL) { 283 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; 284 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 285 } 286 if (ixa->ixa_ipsec_policy != NULL) { 287 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; 288 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 289 } 290 if (ixa->ixa_ipsec_action != NULL) { 291 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; 292 IPACT_REFHOLD(ixa->ixa_ipsec_action); 293 } 294 if (ixa->ixa_ipsec_latch != NULL) { 295 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; 296 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 297 } 298 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; 299 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; 300 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; 301 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; 302 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; 303 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; 304 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; 305 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; 306 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; 307 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; 308 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; 309 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; 310 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; 311 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; 312 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; 313 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; 314 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; 315 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; 316 } 317 return (ixamp); 318 } 319 320 /* 321 * Extract the ip_xmit_attr_t from the mblk, checking that the 322 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is 323 * not the case. 324 * 325 * Otherwise ixa is updated. 326 * Caller needs to release references on the ixa by calling ixa_refrele() 327 * which will imediately call ixa_inactive to release the references. 328 */ 329 boolean_t 330 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) 331 { 332 ixamblk_t *ixm; 333 netstack_t *ns; 334 ip_stack_t *ipst; 335 ill_t *ill; 336 nce_t *nce; 337 338 /* We assume the caller hasn't initialized ixa */ 339 bzero(ixa, sizeof (*ixa)); 340 341 ASSERT(DB_TYPE(ixamp) == M_BREAK); 342 ASSERT(ixamp->b_cont == NULL); 343 344 ixm = (ixamblk_t *)ixamp->b_rptr; 345 ASSERT(!ixm->ixm_inbound); 346 347 /* Verify the netstack is still around */ 348 ns = netstack_find_by_stackid(ixm->ixm_stackid); 349 if (ns == NULL) { 350 /* Disappeared on us */ 351 (void) ip_xmit_attr_free_mblk(ixamp); 352 return (B_FALSE); 353 } 354 ipst = ns->netstack_ip; 355 356 /* Verify the ill is still around */ 357 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, 358 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); 359 360 /* We have the ill, hence the netstack can't go away */ 361 netstack_rele(ns); 362 if (ill == NULL) { 363 /* Disappeared on us */ 364 (void) ip_xmit_attr_free_mblk(ixamp); 365 return (B_FALSE); 366 } 367 /* 368 * Find the nce. We don't load-spread (only lookup nce's on the ill) 369 * because we want to find the same nce as the one we had when 370 * ip_xmit_attr_to_mblk was called. 371 */ 372 if (ixm->ixm_flags & IXAF_IS_IPV4) { 373 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); 374 } else { 375 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); 376 } 377 378 /* We have the nce, hence the ill can't go away */ 379 ill_refrele(ill); 380 if (nce == NULL) { 381 /* 382 * Since this is unusual and we don't know what type of 383 * nce it was, we drop the packet. 384 */ 385 (void) ip_xmit_attr_free_mblk(ixamp); 386 return (B_FALSE); 387 } 388 389 ixa->ixa_flags = ixm->ixm_flags; 390 ixa->ixa_refcnt = 1; 391 ixa->ixa_ipst = ipst; 392 ixa->ixa_fragsize = ixm->ixm_fragsize; 393 ixa->ixa_pktlen = ixm->ixm_pktlen; 394 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; 395 ixa->ixa_protocol = ixm->ixm_protocol; 396 ixa->ixa_nce = nce; 397 ixa->ixa_postfragfn = ixm->ixm_postfragfn; 398 ixa->ixa_zoneid = ixm->ixm_zoneid; 399 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; 400 ixa->ixa_scopeid = ixm->ixm_scopeid; 401 ixa->ixa_ident = ixm->ixm_ident; 402 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; 403 404 if (ixm->ixm_tsl != NULL) { 405 ixa->ixa_tsl = ixm->ixm_tsl; 406 ixa->ixa_free_flags |= IXA_FREE_TSL; 407 ixm->ixm_tsl = NULL; 408 } 409 if (ixm->ixm_cred != NULL) { 410 ixa->ixa_cred = ixm->ixm_cred; 411 ixa->ixa_free_flags |= IXA_FREE_CRED; 412 ixm->ixm_cred = NULL; 413 } 414 ixa->ixa_cpid = ixm->ixm_cpid; 415 ixa->ixa_conn_id = ixm->ixm_conn_id; 416 417 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; 418 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; 419 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; 420 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; 421 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; 422 423 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; 424 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; 425 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; 426 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; 427 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; 428 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; 429 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; 430 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; 431 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; 432 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; 433 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; 434 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; 435 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; 436 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; 437 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; 438 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; 439 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; 440 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; 441 442 freeb(ixamp); 443 return (B_TRUE); 444 } 445 446 /* 447 * Free the ixm mblk and any references it holds 448 * Returns b_cont. 449 */ 450 mblk_t * 451 ip_xmit_attr_free_mblk(mblk_t *ixamp) 452 { 453 ixamblk_t *ixm; 454 mblk_t *mp; 455 456 /* Consume mp */ 457 ASSERT(DB_TYPE(ixamp) == M_BREAK); 458 mp = ixamp->b_cont; 459 460 ixm = (ixamblk_t *)ixamp->b_rptr; 461 ASSERT(!ixm->ixm_inbound); 462 463 if (ixm->ixm_ipsec_ah_sa != NULL) { 464 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); 465 ixm->ixm_ipsec_ah_sa = NULL; 466 } 467 if (ixm->ixm_ipsec_esp_sa != NULL) { 468 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); 469 ixm->ixm_ipsec_esp_sa = NULL; 470 } 471 if (ixm->ixm_ipsec_policy != NULL) { 472 IPPOL_REFRELE(ixm->ixm_ipsec_policy); 473 ixm->ixm_ipsec_policy = NULL; 474 } 475 if (ixm->ixm_ipsec_action != NULL) { 476 IPACT_REFRELE(ixm->ixm_ipsec_action); 477 ixm->ixm_ipsec_action = NULL; 478 } 479 if (ixm->ixm_ipsec_latch) { 480 IPLATCH_REFRELE(ixm->ixm_ipsec_latch); 481 ixm->ixm_ipsec_latch = NULL; 482 } 483 484 if (ixm->ixm_tsl != NULL) { 485 label_rele(ixm->ixm_tsl); 486 ixm->ixm_tsl = NULL; 487 } 488 if (ixm->ixm_cred != NULL) { 489 crfree(ixm->ixm_cred); 490 ixm->ixm_cred = NULL; 491 } 492 freeb(ixamp); 493 return (mp); 494 } 495 496 /* 497 * Take the information in ip_recv_attr_t and stick it in an mblk 498 * that can later be passed to ip_recv_attr_from_mblk to recreate the 499 * ip_recv_attr_t. 500 * 501 * Returns NULL on memory allocation failure. 502 */ 503 mblk_t * 504 ip_recv_attr_to_mblk(ip_recv_attr_t *ira) 505 { 506 mblk_t *iramp; 507 iramblk_t *irm; 508 ill_t *ill = ira->ira_ill; 509 510 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); 511 512 iramp = allocb(sizeof (*irm), BPRI_MED); 513 if (iramp == NULL) 514 return (NULL); 515 516 iramp->b_datap->db_type = M_BREAK; 517 iramp->b_wptr += sizeof (*irm); 518 irm = (iramblk_t *)iramp->b_rptr; 519 520 bzero(irm, sizeof (*irm)); 521 irm->irm_inbound = B_TRUE; 522 irm->irm_flags = ira->ira_flags; 523 if (ill != NULL) { 524 /* Internal to IP - preserve ip_stack_t, ill and rill */ 525 irm->irm_stackid = 526 ill->ill_ipst->ips_netstack->netstack_stackid; 527 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 528 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == 529 ira->ira_rifindex); 530 } else { 531 /* Let ip_recv_attr_from_stackid know there isn't one */ 532 irm->irm_stackid = -1; 533 } 534 irm->irm_rifindex = ira->ira_rifindex; 535 irm->irm_ruifindex = ira->ira_ruifindex; 536 irm->irm_pktlen = ira->ira_pktlen; 537 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; 538 irm->irm_protocol = ira->ira_protocol; 539 540 irm->irm_sqp = ira->ira_sqp; 541 irm->irm_ring = ira->ira_ring; 542 543 irm->irm_zoneid = ira->ira_zoneid; 544 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; 545 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; 546 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; 547 548 if (ira->ira_tsl != NULL) { 549 irm->irm_tsl = ira->ira_tsl; 550 label_hold(irm->irm_tsl); 551 } 552 if (ira->ira_cred != NULL) { 553 irm->irm_cred = ira->ira_cred; 554 crhold(ira->ira_cred); 555 } 556 irm->irm_cpid = ira->ira_cpid; 557 558 if (ira->ira_flags & IRAF_L2SRC_SET) 559 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); 560 561 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 562 if (ira->ira_ipsec_ah_sa != NULL) { 563 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; 564 IPSA_REFHOLD(ira->ira_ipsec_ah_sa); 565 } 566 if (ira->ira_ipsec_esp_sa != NULL) { 567 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; 568 IPSA_REFHOLD(ira->ira_ipsec_esp_sa); 569 } 570 if (ira->ira_ipsec_action != NULL) { 571 irm->irm_ipsec_action = ira->ira_ipsec_action; 572 IPACT_REFHOLD(ira->ira_ipsec_action); 573 } 574 } 575 return (iramp); 576 } 577 578 /* 579 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP 580 * then irm_stackid is not -1, in which case we check that the 581 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is 582 * not the case. 583 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) 584 * and we just proceed with ira_ill and ira_rill as NULL. 585 * 586 * The caller needs to release any references on the pointers inside the ire 587 * by calling ira_cleanup. 588 */ 589 boolean_t 590 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) 591 { 592 iramblk_t *irm; 593 netstack_t *ns; 594 ip_stack_t *ipst = NULL; 595 ill_t *ill = NULL, *rill = NULL; 596 597 /* We assume the caller hasn't initialized ira */ 598 bzero(ira, sizeof (*ira)); 599 600 ASSERT(DB_TYPE(iramp) == M_BREAK); 601 ASSERT(iramp->b_cont == NULL); 602 603 irm = (iramblk_t *)iramp->b_rptr; 604 ASSERT(irm->irm_inbound); 605 606 if (irm->irm_stackid != -1) { 607 /* Verify the netstack is still around */ 608 ns = netstack_find_by_stackid(irm->irm_stackid); 609 if (ns == NULL) { 610 /* Disappeared on us */ 611 (void) ip_recv_attr_free_mblk(iramp); 612 return (B_FALSE); 613 } 614 ipst = ns->netstack_ip; 615 616 /* Verify the ill is still around */ 617 ill = ill_lookup_on_ifindex(irm->irm_ifindex, 618 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 619 620 if (irm->irm_ifindex == irm->irm_rifindex) { 621 rill = ill; 622 } else { 623 rill = ill_lookup_on_ifindex(irm->irm_rifindex, 624 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 625 } 626 627 /* We have the ill, hence the netstack can't go away */ 628 netstack_rele(ns); 629 if (ill == NULL || rill == NULL) { 630 /* Disappeared on us */ 631 if (ill != NULL) 632 ill_refrele(ill); 633 if (rill != NULL && rill != ill) 634 ill_refrele(rill); 635 (void) ip_recv_attr_free_mblk(iramp); 636 return (B_FALSE); 637 } 638 } 639 640 ira->ira_flags = irm->irm_flags; 641 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ 642 ira->ira_ill = ill; 643 ira->ira_rill = rill; 644 645 ira->ira_rifindex = irm->irm_rifindex; 646 ira->ira_ruifindex = irm->irm_ruifindex; 647 ira->ira_pktlen = irm->irm_pktlen; 648 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; 649 ira->ira_protocol = irm->irm_protocol; 650 651 ira->ira_sqp = irm->irm_sqp; 652 /* The rest of IP assumes that the rings never go away. */ 653 ira->ira_ring = irm->irm_ring; 654 655 ira->ira_zoneid = irm->irm_zoneid; 656 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; 657 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; 658 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; 659 660 if (irm->irm_tsl != NULL) { 661 ira->ira_tsl = irm->irm_tsl; 662 ira->ira_free_flags |= IRA_FREE_TSL; 663 irm->irm_tsl = NULL; 664 } 665 if (irm->irm_cred != NULL) { 666 ira->ira_cred = irm->irm_cred; 667 ira->ira_free_flags |= IRA_FREE_CRED; 668 irm->irm_cred = NULL; 669 } 670 ira->ira_cpid = irm->irm_cpid; 671 672 if (ira->ira_flags & IRAF_L2SRC_SET) 673 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); 674 675 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; 676 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; 677 ira->ira_ipsec_action = irm->irm_ipsec_action; 678 679 freeb(iramp); 680 return (B_TRUE); 681 } 682 683 /* 684 * Free the irm mblk and any references it holds 685 * Returns b_cont. 686 */ 687 mblk_t * 688 ip_recv_attr_free_mblk(mblk_t *iramp) 689 { 690 iramblk_t *irm; 691 mblk_t *mp; 692 693 /* Consume mp */ 694 ASSERT(DB_TYPE(iramp) == M_BREAK); 695 mp = iramp->b_cont; 696 697 irm = (iramblk_t *)iramp->b_rptr; 698 ASSERT(irm->irm_inbound); 699 700 if (irm->irm_ipsec_ah_sa != NULL) { 701 IPSA_REFRELE(irm->irm_ipsec_ah_sa); 702 irm->irm_ipsec_ah_sa = NULL; 703 } 704 if (irm->irm_ipsec_esp_sa != NULL) { 705 IPSA_REFRELE(irm->irm_ipsec_esp_sa); 706 irm->irm_ipsec_esp_sa = NULL; 707 } 708 if (irm->irm_ipsec_action != NULL) { 709 IPACT_REFRELE(irm->irm_ipsec_action); 710 irm->irm_ipsec_action = NULL; 711 } 712 if (irm->irm_tsl != NULL) { 713 label_rele(irm->irm_tsl); 714 irm->irm_tsl = NULL; 715 } 716 if (irm->irm_cred != NULL) { 717 crfree(irm->irm_cred); 718 irm->irm_cred = NULL; 719 } 720 721 freeb(iramp); 722 return (mp); 723 } 724 725 /* 726 * Returns true if the mblk contains an ip_recv_attr_t 727 * For now we just check db_type. 728 */ 729 boolean_t 730 ip_recv_attr_is_mblk(mblk_t *mp) 731 { 732 /* 733 * Need to handle the various forms of tcp_timermp which are tagged 734 * with b_wptr and might have a NULL b_datap. 735 */ 736 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) 737 return (B_FALSE); 738 739 #ifdef DEBUG 740 iramblk_t *irm; 741 742 if (DB_TYPE(mp) != M_BREAK) 743 return (B_FALSE); 744 745 irm = (iramblk_t *)mp->b_rptr; 746 ASSERT(irm->irm_inbound); 747 return (B_TRUE); 748 #else 749 return (DB_TYPE(mp) == M_BREAK); 750 #endif 751 } 752 753 static ip_xmit_attr_t * 754 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) 755 { 756 ip_xmit_attr_t *oldixa; /* Already attached to conn_t */ 757 ip_xmit_attr_t *ixa; /* New one, which we return. */ 758 759 /* 760 * NOTE: If the marked-below common case isn't, move the 761 * kmem_alloc() up here and put a free in what was marked as the 762 * (not really) common case instead. 763 */ 764 765 mutex_enter(&connp->conn_lock); 766 oldixa = connp->conn_ixa; 767 768 /* At least one reference for the conn_t */ 769 ASSERT3U(oldixa->ixa_refcnt, >=, 1); 770 if (atomic_inc_32_nv(&oldixa->ixa_refcnt) == 2) { 771 /* No other thread using conn_ixa (common case) */ 772 mutex_exit(&connp->conn_lock); 773 return (oldixa); 774 } 775 /* Do allocation inside-the-conn_lock because it's less common. */ 776 ixa = kmem_alloc(sizeof (*ixa), kmflag); 777 if (ixa == NULL) { 778 mutex_exit(&connp->conn_lock); 779 IXA_REFRELE(oldixa); 780 return (NULL); 781 } 782 ixa_safe_copy(oldixa, ixa); 783 784 /* Make sure we drop conn_lock before any refrele */ 785 if (replace) { 786 ixa->ixa_refcnt++; /* No atomic needed - not visible */ 787 connp->conn_ixa = ixa; 788 mutex_exit(&connp->conn_lock); 789 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ 790 } else { 791 mutex_exit(&connp->conn_lock); 792 } 793 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ 794 795 return (ixa); 796 } 797 798 /* 799 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only 800 * the caller can access the ip_xmit_attr_t. 801 * 802 * If nobody else is using conn_ixa we return it. 803 * Otherwise we make a "safe" copy of conn_ixa 804 * and return it. The "safe" copy has the pointers set to NULL 805 * (since the pointers might be changed by another thread using 806 * conn_ixa). The caller needs to check for NULL pointers to see 807 * if ip_set_destination needs to be called to re-establish the pointers. 808 * 809 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. 810 * That is used when we connect() the ULP. 811 */ 812 ip_xmit_attr_t * 813 conn_get_ixa(conn_t *connp, boolean_t replace) 814 { 815 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); 816 } 817 818 /* 819 * Used only when the option is to have the kernel hang due to not 820 * cleaning up ixa references on ills etc. 821 */ 822 ip_xmit_attr_t * 823 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) 824 { 825 return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); 826 } 827 828 /* 829 * Replace conn_ixa with the ixa argument. 830 * 831 * The caller must hold conn_lock. 832 * 833 * We return the old ixa; the caller must ixa_refrele that after conn_lock 834 * has been dropped. 835 */ 836 ip_xmit_attr_t * 837 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) 838 { 839 ip_xmit_attr_t *oldixa; 840 841 ASSERT(MUTEX_HELD(&connp->conn_lock)); 842 843 oldixa = connp->conn_ixa; 844 IXA_REFHOLD(ixa); 845 ixa->ixa_conn_id = oldixa->ixa_conn_id; 846 connp->conn_ixa = ixa; 847 return (oldixa); 848 } 849 850 /* 851 * Return a ip_xmit_attr_t to use with a conn_t that is based on but 852 * separate from conn_ixa. 853 * 854 * This "safe" copy has the pointers set to NULL 855 * (since the pointers might be changed by another thread using 856 * conn_ixa). The caller needs to check for NULL pointers to see 857 * if ip_set_destination needs to be called to re-establish the pointers. 858 */ 859 ip_xmit_attr_t * 860 conn_get_ixa_exclusive(conn_t *connp) 861 { 862 ip_xmit_attr_t *oldixa; 863 ip_xmit_attr_t *ixa; 864 865 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP | KM_NORMALPRI); 866 if (ixa == NULL) 867 return (NULL); 868 869 mutex_enter(&connp->conn_lock); 870 871 oldixa = connp->conn_ixa; 872 IXA_REFHOLD(oldixa); 873 874 ixa_safe_copy(oldixa, ixa); 875 mutex_exit(&connp->conn_lock); 876 IXA_REFRELE(oldixa); 877 return (ixa); 878 } 879 880 void 881 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) 882 { 883 bcopy(src, ixa, sizeof (*ixa)); 884 ixa->ixa_refcnt = 1; 885 /* 886 * Clear any pointers that have references and might be changed 887 * by ip_set_destination or the ULP 888 */ 889 ixa->ixa_ire = NULL; 890 ixa->ixa_nce = NULL; 891 ixa->ixa_dce = NULL; 892 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 893 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 894 #ifdef DEBUG 895 ixa->ixa_curthread = NULL; 896 #endif 897 /* Clear all the IPsec pointers and the flag as well. */ 898 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 899 900 ixa->ixa_ipsec_latch = NULL; 901 ixa->ixa_ipsec_ah_sa = NULL; 902 ixa->ixa_ipsec_esp_sa = NULL; 903 ixa->ixa_ipsec_policy = NULL; 904 ixa->ixa_ipsec_action = NULL; 905 906 /* 907 * We leave ixa_tsl unchanged, but if it has a refhold we need 908 * to get an extra refhold. 909 */ 910 if (ixa->ixa_free_flags & IXA_FREE_TSL) 911 label_hold(ixa->ixa_tsl); 912 913 /* 914 * We leave ixa_cred unchanged, but if it has a refhold we need 915 * to get an extra refhold. 916 */ 917 if (ixa->ixa_free_flags & IXA_FREE_CRED) 918 crhold(ixa->ixa_cred); 919 920 /* 921 * There is no cleanup in progress on this new copy. 922 */ 923 ixa->ixa_tcpcleanup = IXATC_IDLE; 924 } 925 926 /* 927 * Duplicate an ip_xmit_attr_t. 928 * Assumes that the caller controls the ixa, hence we do not need to use 929 * a safe copy. We just have to increase the refcnt on any pointers. 930 */ 931 ip_xmit_attr_t * 932 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) 933 { 934 ip_xmit_attr_t *ixa; 935 936 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 937 if (ixa == NULL) 938 return (NULL); 939 bcopy(src_ixa, ixa, sizeof (*ixa)); 940 ixa->ixa_refcnt = 1; 941 942 if (ixa->ixa_ire != NULL) 943 ire_refhold_notr(ixa->ixa_ire); 944 if (ixa->ixa_nce != NULL) 945 nce_refhold(ixa->ixa_nce); 946 if (ixa->ixa_dce != NULL) 947 dce_refhold_notr(ixa->ixa_dce); 948 949 #ifdef DEBUG 950 ixa->ixa_curthread = NULL; 951 #endif 952 953 if (ixa->ixa_ipsec_latch != NULL) 954 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 955 if (ixa->ixa_ipsec_ah_sa != NULL) 956 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 957 if (ixa->ixa_ipsec_esp_sa != NULL) 958 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 959 if (ixa->ixa_ipsec_policy != NULL) 960 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 961 if (ixa->ixa_ipsec_action != NULL) 962 IPACT_REFHOLD(ixa->ixa_ipsec_action); 963 964 if (ixa->ixa_tsl != NULL) { 965 label_hold(ixa->ixa_tsl); 966 ixa->ixa_free_flags |= IXA_FREE_TSL; 967 } 968 if (ixa->ixa_cred != NULL) { 969 crhold(ixa->ixa_cred); 970 ixa->ixa_free_flags |= IXA_FREE_CRED; 971 } 972 return (ixa); 973 } 974 975 /* 976 * Used to replace the ixa_label field. 977 * The caller should have a reference on the label, which we transfer to 978 * the attributes so that when the attribute is freed/cleaned up 979 * we will release that reference. 980 */ 981 void 982 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) 983 { 984 ASSERT(tsl != NULL); 985 986 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 987 ASSERT(ixa->ixa_tsl != NULL); 988 label_rele(ixa->ixa_tsl); 989 } else { 990 ixa->ixa_free_flags |= IXA_FREE_TSL; 991 } 992 ixa->ixa_tsl = tsl; 993 } 994 995 /* 996 * Replace the ip_recv_attr_t's label. 997 * Due to kernel RPC's use of db_credp we also need to replace ira_cred; 998 * TCP/UDP uses ira_cred to set db_credp for non-socket users. 999 * This can fail (and return B_FALSE) due to lack of memory. 1000 */ 1001 boolean_t 1002 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) 1003 { 1004 cred_t *newcr; 1005 1006 if (ira->ira_free_flags & IRA_FREE_TSL) { 1007 ASSERT(ira->ira_tsl != NULL); 1008 label_rele(ira->ira_tsl); 1009 } 1010 label_hold(tsl); 1011 ira->ira_tsl = tsl; 1012 ira->ira_free_flags |= IRA_FREE_TSL; 1013 1014 /* 1015 * Reset zoneid if we have a shared address. That allows 1016 * ip_fanout_tx_v4/v6 to determine the zoneid again. 1017 */ 1018 if (ira->ira_flags & IRAF_TX_SHARED_ADDR) 1019 ira->ira_zoneid = ALL_ZONES; 1020 1021 /* We update ira_cred for RPC */ 1022 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); 1023 if (newcr == NULL) 1024 return (B_FALSE); 1025 if (ira->ira_free_flags & IRA_FREE_CRED) 1026 crfree(ira->ira_cred); 1027 ira->ira_cred = newcr; 1028 ira->ira_free_flags |= IRA_FREE_CRED; 1029 return (B_TRUE); 1030 } 1031 1032 /* 1033 * This needs to be called after ip_set_destination/tsol_check_dest might 1034 * have changed ixa_tsl to be specific for a destination, and we now want to 1035 * send to a different destination. 1036 * We have to restart with crgetlabel() since ip_set_destination/ 1037 * tsol_check_dest will start with ixa_tsl. 1038 */ 1039 void 1040 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) 1041 { 1042 if (!is_system_labeled()) 1043 return; 1044 1045 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1046 ASSERT(ixa->ixa_tsl != NULL); 1047 label_rele(ixa->ixa_tsl); 1048 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1049 } 1050 ixa->ixa_tsl = crgetlabel(cr); 1051 } 1052 1053 void 1054 ixa_refrele(ip_xmit_attr_t *ixa) 1055 { 1056 IXA_REFRELE(ixa); 1057 } 1058 1059 void 1060 ixa_inactive(ip_xmit_attr_t *ixa) 1061 { 1062 ASSERT(ixa->ixa_refcnt == 0); 1063 1064 ixa_cleanup(ixa); 1065 kmem_free(ixa, sizeof (*ixa)); 1066 } 1067 1068 /* 1069 * Release any references contained in the ixa. 1070 * Also clear any fields that are not controlled by ixa_flags. 1071 */ 1072 void 1073 ixa_cleanup(ip_xmit_attr_t *ixa) 1074 { 1075 if (ixa->ixa_ire != NULL) { 1076 ire_refrele_notr(ixa->ixa_ire); 1077 ixa->ixa_ire = NULL; 1078 } 1079 if (ixa->ixa_dce != NULL) { 1080 dce_refrele_notr(ixa->ixa_dce); 1081 ixa->ixa_dce = NULL; 1082 } 1083 if (ixa->ixa_nce != NULL) { 1084 nce_refrele(ixa->ixa_nce); 1085 ixa->ixa_nce = NULL; 1086 } 1087 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1088 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1089 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 1090 ipsec_out_release_refs(ixa); 1091 } 1092 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1093 ASSERT(ixa->ixa_tsl != NULL); 1094 label_rele(ixa->ixa_tsl); 1095 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1096 } 1097 ixa->ixa_tsl = NULL; 1098 if (ixa->ixa_free_flags & IXA_FREE_CRED) { 1099 ASSERT(ixa->ixa_cred != NULL); 1100 crfree(ixa->ixa_cred); 1101 ixa->ixa_free_flags &= ~IXA_FREE_CRED; 1102 } 1103 ixa->ixa_cred = NULL; 1104 ixa->ixa_src_preferences = 0; 1105 ixa->ixa_ifindex = 0; 1106 ixa->ixa_multicast_ifindex = 0; 1107 ixa->ixa_multicast_ifaddr = INADDR_ANY; 1108 } 1109 1110 /* 1111 * Release any references contained in the ira. 1112 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second 1113 * argument. 1114 */ 1115 void 1116 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) 1117 { 1118 if (ira->ira_ill != NULL) { 1119 if (ira->ira_rill != ira->ira_ill) { 1120 /* Caused by async processing */ 1121 ill_refrele(ira->ira_rill); 1122 } 1123 if (refrele_ill) 1124 ill_refrele(ira->ira_ill); 1125 } 1126 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1127 ipsec_in_release_refs(ira); 1128 } 1129 if (ira->ira_free_flags & IRA_FREE_TSL) { 1130 ASSERT(ira->ira_tsl != NULL); 1131 label_rele(ira->ira_tsl); 1132 ira->ira_free_flags &= ~IRA_FREE_TSL; 1133 } 1134 ira->ira_tsl = NULL; 1135 if (ira->ira_free_flags & IRA_FREE_CRED) { 1136 ASSERT(ira->ira_cred != NULL); 1137 crfree(ira->ira_cred); 1138 ira->ira_free_flags &= ~IRA_FREE_CRED; 1139 } 1140 ira->ira_cred = NULL; 1141 } 1142 1143 /* 1144 * Function to help release any IRE, NCE, or DCEs that 1145 * have been deleted and are marked as condemned. 1146 * The caller is responsible for any serialization which is different 1147 * for TCP, SCTP, and others. 1148 */ 1149 static void 1150 ixa_cleanup_stale(ip_xmit_attr_t *ixa) 1151 { 1152 ire_t *ire; 1153 nce_t *nce; 1154 dce_t *dce; 1155 1156 ire = ixa->ixa_ire; 1157 nce = ixa->ixa_nce; 1158 dce = ixa->ixa_dce; 1159 1160 if (ire != NULL && IRE_IS_CONDEMNED(ire)) { 1161 ire_refrele_notr(ire); 1162 ire = ire_blackhole(ixa->ixa_ipst, 1163 !(ixa->ixa_flags & IXAF_IS_IPV4)); 1164 ASSERT(ire != NULL); 1165 #ifdef DEBUG 1166 ire_refhold_notr(ire); 1167 ire_refrele(ire); 1168 #endif 1169 ixa->ixa_ire = ire; 1170 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1171 } 1172 if (nce != NULL && nce->nce_is_condemned) { 1173 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ 1174 nce_refrele(nce); 1175 ixa->ixa_nce = NULL; 1176 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1177 } 1178 if (dce != NULL && DCE_IS_CONDEMNED(dce)) { 1179 dce_refrele_notr(dce); 1180 dce = dce_get_default(ixa->ixa_ipst); 1181 ASSERT(dce != NULL); 1182 #ifdef DEBUG 1183 dce_refhold_notr(dce); 1184 dce_refrele(dce); 1185 #endif 1186 ixa->ixa_dce = dce; 1187 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1188 } 1189 } 1190 1191 static mblk_t * 1192 tcp_ixa_cleanup_getmblk(conn_t *connp) 1193 { 1194 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1195 int need_retry; 1196 mblk_t *mp; 1197 1198 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1199 1200 /* 1201 * It's possible that someone else came in and started cleaning up 1202 * another connection between the time we verified this one is not being 1203 * cleaned up and the time we actually get the shared mblk. If that's 1204 * the case, we've dropped the lock, and some other thread may have 1205 * cleaned up this connection again, and is still waiting for 1206 * notification of that cleanup's completion. Therefore we need to 1207 * recheck. 1208 */ 1209 do { 1210 need_retry = 0; 1211 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) { 1212 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1213 &tcps->tcps_ixa_cleanup_lock); 1214 } 1215 1216 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { 1217 /* 1218 * Multiple concurrent cleanups; need to have the last 1219 * one run since it could be an unplumb. 1220 */ 1221 need_retry = 1; 1222 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv, 1223 &tcps->tcps_ixa_cleanup_lock); 1224 } 1225 } while (need_retry); 1226 1227 /* 1228 * We now have the lock and the mblk; now make sure that no one else can 1229 * try to clean up this connection or enqueue it for cleanup, clear the 1230 * mblk pointer for this stack, drop the lock, and return the mblk. 1231 */ 1232 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock)); 1233 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE); 1234 ASSERT(tcps->tcps_ixa_cleanup_mp == mp); 1235 ASSERT(mp != NULL); 1236 1237 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS; 1238 tcps->tcps_ixa_cleanup_mp = NULL; 1239 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1240 1241 return (mp); 1242 } 1243 1244 /* 1245 * Used to run ixa_cleanup_stale inside the tcp squeue. 1246 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp 1247 * and waking up the caller. 1248 */ 1249 /* ARGSUSED2 */ 1250 static void 1251 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, 1252 ip_recv_attr_t *dummy) 1253 { 1254 conn_t *connp = (conn_t *)arg; 1255 tcp_stack_t *tcps; 1256 1257 tcps = connp->conn_netstack->netstack_tcp; 1258 1259 ixa_cleanup_stale(connp->conn_ixa); 1260 1261 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1262 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); 1263 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE; 1264 tcps->tcps_ixa_cleanup_mp = mp; 1265 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv); 1266 /* 1267 * It is possible for any number of threads to be waiting for cleanup of 1268 * different connections. Absent a per-connection (or per-IXA) CV, we 1269 * need to wake them all up even though only one can be waiting on this 1270 * particular cleanup. 1271 */ 1272 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1273 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1274 } 1275 1276 static void 1277 tcp_ixa_cleanup_wait_and_finish(conn_t *connp) 1278 { 1279 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1280 1281 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1282 1283 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE); 1284 1285 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) { 1286 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1287 &tcps->tcps_ixa_cleanup_lock); 1288 } 1289 1290 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE); 1291 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE; 1292 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1293 1294 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1295 } 1296 1297 /* 1298 * ipcl_walk() function to help release any IRE, NCE, or DCEs that 1299 * have been deleted and are marked as condemned. 1300 * Note that we can't cleanup the pointers since there can be threads 1301 * in conn_ip_output() sending while we are called. 1302 */ 1303 void 1304 conn_ixa_cleanup(conn_t *connp, void *arg) 1305 { 1306 boolean_t tryhard = (boolean_t)arg; 1307 1308 if (IPCL_IS_TCP(connp)) { 1309 mblk_t *mp; 1310 1311 mp = tcp_ixa_cleanup_getmblk(connp); 1312 1313 if (connp->conn_sqp->sq_run == curthread) { 1314 /* Already on squeue */ 1315 tcp_ixa_cleanup(connp, mp, NULL, NULL); 1316 } else { 1317 CONN_INC_REF(connp); 1318 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, 1319 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); 1320 } 1321 tcp_ixa_cleanup_wait_and_finish(connp); 1322 } else if (IPCL_IS_SCTP(connp)) { 1323 sctp_t *sctp; 1324 sctp_faddr_t *fp; 1325 1326 sctp = CONN2SCTP(connp); 1327 RUN_SCTP(sctp); 1328 ixa_cleanup_stale(connp->conn_ixa); 1329 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) 1330 ixa_cleanup_stale(fp->sf_ixa); 1331 WAKE_SCTP(sctp); 1332 } else { 1333 ip_xmit_attr_t *ixa; 1334 1335 /* 1336 * If there is a different thread using conn_ixa then we get a 1337 * new copy and cut the old one loose from conn_ixa. Otherwise 1338 * we use conn_ixa and prevent any other thread from 1339 * using/changing it. Anybody using conn_ixa (e.g., a thread in 1340 * conn_ip_output) will do an ixa_refrele which will remove any 1341 * references on the ire etc. 1342 * 1343 * Once we are done other threads can use conn_ixa since the 1344 * refcnt will be back at one. 1345 * 1346 * We are called either because an ill is going away, or 1347 * due to memory reclaim. In the former case we wait for 1348 * memory since we must remove the refcnts on the ill. 1349 */ 1350 if (tryhard) { 1351 ixa = conn_get_ixa_tryhard(connp, B_TRUE); 1352 ASSERT(ixa != NULL); 1353 } else { 1354 ixa = conn_get_ixa(connp, B_TRUE); 1355 if (ixa == NULL) { 1356 /* 1357 * Somebody else was using it and kmem_alloc 1358 * failed! Next memory reclaim will try to 1359 * clean up. 1360 */ 1361 DTRACE_PROBE1(conn__ixa__cleanup__bail, 1362 conn_t *, connp); 1363 return; 1364 } 1365 } 1366 ixa_cleanup_stale(ixa); 1367 IXA_REFRELE(ixa); 1368 } 1369 } 1370 1371 /* 1372 * ixa needs to be an exclusive copy so that no one changes the cookie 1373 * or the ixa_nce. 1374 */ 1375 boolean_t 1376 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) 1377 { 1378 uintptr_t cookie = ixa->ixa_cookie; 1379 ill_dld_direct_t *idd; 1380 idl_tx_list_t *idl_txl; 1381 ill_t *ill = ixa->ixa_nce->nce_ill; 1382 boolean_t inserted = B_FALSE; 1383 1384 idd = &(ill)->ill_dld_capab->idc_direct; 1385 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 1386 mutex_enter(&idl_txl->txl_lock); 1387 1388 /* 1389 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow 1390 * control is asserted on an ill that does not support direct calls. 1391 * Jump to insert. 1392 */ 1393 if (cookie == 0) 1394 goto tryinsert; 1395 1396 ASSERT(ILL_DIRECT_CAPABLE(ill)); 1397 1398 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) { 1399 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie); 1400 } else if (idl_txl->txl_cookie != (uintptr_t)NULL && 1401 idl_txl->txl_cookie != ixa->ixa_cookie) { 1402 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie, 1403 uintptr_t, idl_txl->txl_cookie); 1404 /* TODO: bump kstat for cookie collision */ 1405 } else { 1406 /* 1407 * Check/set conn_blocked under conn_lock. Note that txl_lock 1408 * will not suffice since two separate UDP threads may be 1409 * racing to send to different destinations that are 1410 * associated with different cookies and thus may not be 1411 * holding the same txl_lock. Further, since a given conn_t 1412 * can only be on a single drain list, the conn_t will be 1413 * enqueued on whichever thread wins this race. 1414 */ 1415 tryinsert: mutex_enter(&connp->conn_lock); 1416 if (connp->conn_blocked) { 1417 DTRACE_PROBE1(ill__tx__conn__already__blocked, 1418 conn_t *, connp); 1419 mutex_exit(&connp->conn_lock); 1420 } else { 1421 connp->conn_blocked = B_TRUE; 1422 mutex_exit(&connp->conn_lock); 1423 idl_txl->txl_cookie = cookie; 1424 conn_drain_insert(connp, idl_txl); 1425 if (!IPCL_IS_NONSTR(connp)) 1426 noenable(connp->conn_wq); 1427 inserted = B_TRUE; 1428 } 1429 } 1430 mutex_exit(&idl_txl->txl_lock); 1431 return (inserted); 1432 } 1433