1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/strsun.h> 31 #include <sys/zone.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/atomic.h> 37 38 #include <sys/systm.h> 39 #include <sys/param.h> 40 #include <sys/kmem.h> 41 #include <sys/sdt.h> 42 #include <sys/socket.h> 43 #include <sys/mac.h> 44 #include <net/if.h> 45 #include <net/if_arp.h> 46 #include <net/route.h> 47 #include <sys/sockio.h> 48 #include <netinet/in.h> 49 #include <net/if_dl.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/mib2.h> 54 #include <inet/nd.h> 55 #include <inet/arp.h> 56 #include <inet/snmpcom.h> 57 #include <inet/kstatcom.h> 58 59 #include <netinet/igmp_var.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/sctp.h> 63 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip6_asp.h> 68 #include <inet/tcp.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_if.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_ftable.h> 73 #include <inet/ip_rts.h> 74 #include <inet/optcom.h> 75 #include <inet/ip_ndp.h> 76 #include <inet/ip_listutils.h> 77 #include <netinet/igmp.h> 78 #include <netinet/ip_mroute.h> 79 #include <inet/ipp_common.h> 80 81 #include <net/pfkeyv2.h> 82 #include <inet/sadb.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipdrop.h> 85 #include <inet/ip_netinfo.h> 86 #include <sys/squeue_impl.h> 87 #include <sys/squeue.h> 88 89 #include <inet/ipclassifier.h> 90 #include <inet/sctp_ip.h> 91 #include <inet/sctp/sctp_impl.h> 92 #include <inet/udp_impl.h> 93 #include <sys/sunddi.h> 94 95 #include <sys/tsol/label.h> 96 #include <sys/tsol/tnet.h> 97 98 /* 99 * Release a reference on ip_xmit_attr. 100 * The reference is acquired by conn_get_ixa() 101 */ 102 #define IXA_REFRELE(ixa) \ 103 { \ 104 if (atomic_add_32_nv(&(ixa)->ixa_refcnt, -1) == 0) \ 105 ixa_inactive(ixa); \ 106 } 107 108 #define IXA_REFHOLD(ixa) \ 109 { \ 110 ASSERT((ixa)->ixa_refcnt != 0); \ 111 atomic_add_32(&(ixa)->ixa_refcnt, 1); \ 112 } 113 114 /* 115 * When we need to handle a transmit side asynchronous operation, then we need 116 * to save sufficient information so that we can call the fragment and postfrag 117 * functions. That information is captured in an mblk containing this structure. 118 * 119 * Since this is currently only used for IPsec, we include information for 120 * the kernel crypto framework. 121 */ 122 typedef struct ixamblk_s { 123 boolean_t ixm_inbound; /* B_FALSE */ 124 iaflags_t ixm_flags; /* ixa_flags */ 125 netstackid_t ixm_stackid; /* Verify it didn't go away */ 126 uint_t ixm_ifindex; /* Used to find the nce */ 127 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ 128 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) 129 uint32_t ixm_fragsize; 130 uint_t ixm_pktlen; 131 uint16_t ixm_ip_hdr_length; /* Points to ULP header */ 132 uint8_t ixm_protocol; /* Protocol number for ULP cksum */ 133 pfirepostfrag_t ixm_postfragfn; 134 135 zoneid_t ixm_zoneid; /* Needed for ipobs */ 136 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ 137 138 uint_t ixm_scopeid; /* For IPv6 link-locals */ 139 140 uint32_t ixm_ident; /* For IPv6 fragment header */ 141 uint32_t ixm_xmit_hint; 142 143 cred_t *ixm_cred; /* For getpeerucred - refhold if set */ 144 pid_t ixm_cpid; /* For getpeerucred */ 145 146 ts_label_t *ixm_tsl; /* Refhold if set. */ 147 148 /* 149 * When the pointers below are set they have a refhold on the struct. 150 */ 151 ipsec_latch_t *ixm_ipsec_latch; 152 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ 153 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ 154 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ 155 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ 156 157 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ 158 159 /* Need these while waiting for SA */ 160 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ 161 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ 162 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ 163 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ 164 165 sa_family_t ixm_ipsec_inaf; /* Inner address family */ 166 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ 167 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ 168 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ 169 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ 170 171 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ 172 } ixamblk_t; 173 174 175 /* 176 * When we need to handle a receive side asynchronous operation, then we need 177 * to save sufficient information so that we can call ip_fanout. 178 * That information is captured in an mblk containing this structure. 179 * 180 * Since this is currently only used for IPsec, we include information for 181 * the kernel crypto framework. 182 */ 183 typedef struct iramblk_s { 184 boolean_t irm_inbound; /* B_TRUE */ 185 iaflags_t irm_flags; /* ira_flags */ 186 netstackid_t irm_stackid; /* Verify it didn't go away */ 187 uint_t irm_ifindex; /* To find ira_ill */ 188 189 uint_t irm_rifindex; /* ira_rifindex */ 190 uint_t irm_ruifindex; /* ira_ruifindex */ 191 uint_t irm_pktlen; 192 uint16_t irm_ip_hdr_length; /* Points to ULP header */ 193 uint8_t irm_protocol; /* Protocol number for ULP cksum */ 194 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ 195 196 squeue_t *irm_sqp; 197 ill_rx_ring_t *irm_ring; 198 199 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ 200 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ 201 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ 202 203 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ 204 205 cred_t *irm_cred; /* For getpeerucred - refhold if set */ 206 pid_t irm_cpid; /* For getpeerucred */ 207 208 ts_label_t *irm_tsl; /* Refhold if set. */ 209 210 /* 211 * When set these correspond to a refhold on the object. 212 */ 213 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ 214 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ 215 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ 216 } iramblk_t; 217 218 219 /* 220 * Take the information in ip_xmit_attr_t and stick it in an mblk 221 * that can later be passed to ip_xmit_attr_from_mblk to recreate the 222 * ip_xmit_attr_t. 223 * 224 * Returns NULL on memory allocation failure. 225 */ 226 mblk_t * 227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) 228 { 229 mblk_t *ixamp; 230 ixamblk_t *ixm; 231 nce_t *nce = ixa->ixa_nce; 232 233 ASSERT(nce != NULL); 234 ixamp = allocb(sizeof (*ixm), BPRI_MED); 235 if (ixamp == NULL) 236 return (NULL); 237 238 ixamp->b_datap->db_type = M_BREAK; 239 ixamp->b_wptr += sizeof (*ixm); 240 ixm = (ixamblk_t *)ixamp->b_rptr; 241 242 bzero(ixm, sizeof (*ixm)); 243 ixm->ixm_inbound = B_FALSE; 244 ixm->ixm_flags = ixa->ixa_flags; 245 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; 246 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; 247 ixm->ixm_nceaddr_v6 = nce->nce_addr; 248 ixm->ixm_fragsize = ixa->ixa_fragsize; 249 ixm->ixm_pktlen = ixa->ixa_pktlen; 250 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; 251 ixm->ixm_protocol = ixa->ixa_protocol; 252 ixm->ixm_postfragfn = ixa->ixa_postfragfn; 253 ixm->ixm_zoneid = ixa->ixa_zoneid; 254 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 255 ixm->ixm_scopeid = ixa->ixa_scopeid; 256 ixm->ixm_ident = ixa->ixa_ident; 257 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; 258 259 if (ixa->ixa_tsl != NULL) { 260 ixm->ixm_tsl = ixa->ixa_tsl; 261 label_hold(ixm->ixm_tsl); 262 } 263 if (ixa->ixa_cred != NULL) { 264 ixm->ixm_cred = ixa->ixa_cred; 265 crhold(ixa->ixa_cred); 266 } 267 ixm->ixm_cpid = ixa->ixa_cpid; 268 269 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 270 if (ixa->ixa_ipsec_ah_sa != NULL) { 271 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; 272 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 273 } 274 if (ixa->ixa_ipsec_esp_sa != NULL) { 275 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; 276 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 277 } 278 if (ixa->ixa_ipsec_policy != NULL) { 279 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; 280 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 281 } 282 if (ixa->ixa_ipsec_action != NULL) { 283 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; 284 IPACT_REFHOLD(ixa->ixa_ipsec_action); 285 } 286 if (ixa->ixa_ipsec_latch != NULL) { 287 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; 288 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 289 } 290 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; 291 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; 292 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; 293 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; 294 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; 295 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; 296 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; 297 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; 298 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; 299 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; 300 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; 301 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; 302 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; 303 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; 304 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; 305 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; 306 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; 307 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; 308 } 309 return (ixamp); 310 } 311 312 /* 313 * Extract the ip_xmit_attr_t from the mblk, checking that the 314 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is 315 * not the case. 316 * 317 * Otherwise ixa is updated. 318 * Caller needs to release references on the ixa by calling ixa_refrele() 319 * which will imediately call ixa_inactive to release the references. 320 */ 321 boolean_t 322 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) 323 { 324 ixamblk_t *ixm; 325 netstack_t *ns; 326 ip_stack_t *ipst; 327 ill_t *ill; 328 nce_t *nce; 329 330 /* We assume the caller hasn't initialized ixa */ 331 bzero(ixa, sizeof (*ixa)); 332 333 ASSERT(DB_TYPE(ixamp) == M_BREAK); 334 ASSERT(ixamp->b_cont == NULL); 335 336 ixm = (ixamblk_t *)ixamp->b_rptr; 337 ASSERT(!ixm->ixm_inbound); 338 339 /* Verify the netstack is still around */ 340 ns = netstack_find_by_stackid(ixm->ixm_stackid); 341 if (ns == NULL) { 342 /* Disappeared on us */ 343 (void) ip_xmit_attr_free_mblk(ixamp); 344 return (B_FALSE); 345 } 346 ipst = ns->netstack_ip; 347 348 /* Verify the ill is still around */ 349 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, 350 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); 351 352 /* We have the ill, hence the netstack can't go away */ 353 netstack_rele(ns); 354 if (ill == NULL) { 355 /* Disappeared on us */ 356 (void) ip_xmit_attr_free_mblk(ixamp); 357 return (B_FALSE); 358 } 359 /* 360 * Find the nce. We don't load-spread (only lookup nce's on the ill) 361 * because we want to find the same nce as the one we had when 362 * ip_xmit_attr_to_mblk was called. 363 */ 364 if (ixm->ixm_flags & IXAF_IS_IPV4) { 365 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); 366 } else { 367 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); 368 } 369 370 /* We have the nce, hence the ill can't go away */ 371 ill_refrele(ill); 372 if (nce == NULL) { 373 /* 374 * Since this is unusual and we don't know what type of 375 * nce it was, we drop the packet. 376 */ 377 (void) ip_xmit_attr_free_mblk(ixamp); 378 return (B_FALSE); 379 } 380 381 ixa->ixa_flags = ixm->ixm_flags; 382 ixa->ixa_refcnt = 1; 383 ixa->ixa_ipst = ipst; 384 ixa->ixa_fragsize = ixm->ixm_fragsize; 385 ixa->ixa_pktlen = ixm->ixm_pktlen; 386 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; 387 ixa->ixa_protocol = ixm->ixm_protocol; 388 ixa->ixa_nce = nce; 389 ixa->ixa_postfragfn = ixm->ixm_postfragfn; 390 ixa->ixa_zoneid = ixm->ixm_zoneid; 391 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; 392 ixa->ixa_scopeid = ixm->ixm_scopeid; 393 ixa->ixa_ident = ixm->ixm_ident; 394 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; 395 396 if (ixm->ixm_tsl != NULL) { 397 ixa->ixa_tsl = ixm->ixm_tsl; 398 ixa->ixa_free_flags |= IXA_FREE_TSL; 399 ixm->ixm_tsl = NULL; 400 } 401 if (ixm->ixm_cred != NULL) { 402 ixa->ixa_cred = ixm->ixm_cred; 403 ixa->ixa_free_flags |= IXA_FREE_CRED; 404 ixm->ixm_cred = NULL; 405 } 406 ixa->ixa_cpid = ixm->ixm_cpid; 407 408 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; 409 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; 410 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; 411 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; 412 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; 413 414 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; 415 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; 416 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; 417 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; 418 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; 419 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; 420 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; 421 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; 422 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; 423 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; 424 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; 425 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; 426 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; 427 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; 428 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; 429 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; 430 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; 431 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; 432 433 freeb(ixamp); 434 return (B_TRUE); 435 } 436 437 /* 438 * Free the ixm mblk and any references it holds 439 * Returns b_cont. 440 */ 441 mblk_t * 442 ip_xmit_attr_free_mblk(mblk_t *ixamp) 443 { 444 ixamblk_t *ixm; 445 mblk_t *mp; 446 447 /* Consume mp */ 448 ASSERT(DB_TYPE(ixamp) == M_BREAK); 449 mp = ixamp->b_cont; 450 451 ixm = (ixamblk_t *)ixamp->b_rptr; 452 ASSERT(!ixm->ixm_inbound); 453 454 if (ixm->ixm_ipsec_ah_sa != NULL) { 455 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); 456 ixm->ixm_ipsec_ah_sa = NULL; 457 } 458 if (ixm->ixm_ipsec_esp_sa != NULL) { 459 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); 460 ixm->ixm_ipsec_esp_sa = NULL; 461 } 462 if (ixm->ixm_ipsec_policy != NULL) { 463 IPPOL_REFRELE(ixm->ixm_ipsec_policy); 464 ixm->ixm_ipsec_policy = NULL; 465 } 466 if (ixm->ixm_ipsec_action != NULL) { 467 IPACT_REFRELE(ixm->ixm_ipsec_action); 468 ixm->ixm_ipsec_action = NULL; 469 } 470 if (ixm->ixm_ipsec_latch) { 471 IPLATCH_REFRELE(ixm->ixm_ipsec_latch); 472 ixm->ixm_ipsec_latch = NULL; 473 } 474 475 if (ixm->ixm_tsl != NULL) { 476 label_rele(ixm->ixm_tsl); 477 ixm->ixm_tsl = NULL; 478 } 479 if (ixm->ixm_cred != NULL) { 480 crfree(ixm->ixm_cred); 481 ixm->ixm_cred = NULL; 482 } 483 freeb(ixamp); 484 return (mp); 485 } 486 487 /* 488 * Take the information in ip_recv_attr_t and stick it in an mblk 489 * that can later be passed to ip_recv_attr_from_mblk to recreate the 490 * ip_recv_attr_t. 491 * 492 * Returns NULL on memory allocation failure. 493 */ 494 mblk_t * 495 ip_recv_attr_to_mblk(ip_recv_attr_t *ira) 496 { 497 mblk_t *iramp; 498 iramblk_t *irm; 499 ill_t *ill = ira->ira_ill; 500 501 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); 502 503 iramp = allocb(sizeof (*irm), BPRI_MED); 504 if (iramp == NULL) 505 return (NULL); 506 507 iramp->b_datap->db_type = M_BREAK; 508 iramp->b_wptr += sizeof (*irm); 509 irm = (iramblk_t *)iramp->b_rptr; 510 511 bzero(irm, sizeof (*irm)); 512 irm->irm_inbound = B_TRUE; 513 irm->irm_flags = ira->ira_flags; 514 if (ill != NULL) { 515 /* Internal to IP - preserve ip_stack_t, ill and rill */ 516 irm->irm_stackid = 517 ill->ill_ipst->ips_netstack->netstack_stackid; 518 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 519 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == 520 ira->ira_rifindex); 521 } else { 522 /* Let ip_recv_attr_from_stackid know there isn't one */ 523 irm->irm_stackid = -1; 524 } 525 irm->irm_rifindex = ira->ira_rifindex; 526 irm->irm_ruifindex = ira->ira_ruifindex; 527 irm->irm_pktlen = ira->ira_pktlen; 528 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; 529 irm->irm_protocol = ira->ira_protocol; 530 531 irm->irm_sqp = ira->ira_sqp; 532 irm->irm_ring = ira->ira_ring; 533 534 irm->irm_zoneid = ira->ira_zoneid; 535 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; 536 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; 537 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; 538 539 if (ira->ira_tsl != NULL) { 540 irm->irm_tsl = ira->ira_tsl; 541 label_hold(irm->irm_tsl); 542 } 543 if (ira->ira_cred != NULL) { 544 irm->irm_cred = ira->ira_cred; 545 crhold(ira->ira_cred); 546 } 547 irm->irm_cpid = ira->ira_cpid; 548 549 if (ira->ira_flags & IRAF_L2SRC_SET) 550 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); 551 552 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 553 if (ira->ira_ipsec_ah_sa != NULL) { 554 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; 555 IPSA_REFHOLD(ira->ira_ipsec_ah_sa); 556 } 557 if (ira->ira_ipsec_esp_sa != NULL) { 558 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; 559 IPSA_REFHOLD(ira->ira_ipsec_esp_sa); 560 } 561 if (ira->ira_ipsec_action != NULL) { 562 irm->irm_ipsec_action = ira->ira_ipsec_action; 563 IPACT_REFHOLD(ira->ira_ipsec_action); 564 } 565 } 566 return (iramp); 567 } 568 569 /* 570 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP 571 * then irm_stackid is not -1, in which case we check that the 572 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is 573 * not the case. 574 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) 575 * and we just proceed with ira_ill and ira_rill as NULL. 576 * 577 * The caller needs to release any references on the pointers inside the ire 578 * by calling ira_cleanup. 579 */ 580 boolean_t 581 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) 582 { 583 iramblk_t *irm; 584 netstack_t *ns; 585 ip_stack_t *ipst = NULL; 586 ill_t *ill = NULL, *rill = NULL; 587 588 /* We assume the caller hasn't initialized ira */ 589 bzero(ira, sizeof (*ira)); 590 591 ASSERT(DB_TYPE(iramp) == M_BREAK); 592 ASSERT(iramp->b_cont == NULL); 593 594 irm = (iramblk_t *)iramp->b_rptr; 595 ASSERT(irm->irm_inbound); 596 597 if (irm->irm_stackid != -1) { 598 /* Verify the netstack is still around */ 599 ns = netstack_find_by_stackid(irm->irm_stackid); 600 if (ns == NULL) { 601 /* Disappeared on us */ 602 (void) ip_recv_attr_free_mblk(iramp); 603 return (B_FALSE); 604 } 605 ipst = ns->netstack_ip; 606 607 /* Verify the ill is still around */ 608 ill = ill_lookup_on_ifindex(irm->irm_ifindex, 609 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 610 611 if (irm->irm_ifindex == irm->irm_rifindex) { 612 rill = ill; 613 } else { 614 rill = ill_lookup_on_ifindex(irm->irm_rifindex, 615 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 616 } 617 618 /* We have the ill, hence the netstack can't go away */ 619 netstack_rele(ns); 620 if (ill == NULL || rill == NULL) { 621 /* Disappeared on us */ 622 if (ill != NULL) 623 ill_refrele(ill); 624 if (rill != NULL && rill != ill) 625 ill_refrele(rill); 626 (void) ip_recv_attr_free_mblk(iramp); 627 return (B_FALSE); 628 } 629 } 630 631 ira->ira_flags = irm->irm_flags; 632 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ 633 ira->ira_ill = ill; 634 ira->ira_rill = rill; 635 636 ira->ira_rifindex = irm->irm_rifindex; 637 ira->ira_ruifindex = irm->irm_ruifindex; 638 ira->ira_pktlen = irm->irm_pktlen; 639 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; 640 ira->ira_protocol = irm->irm_protocol; 641 642 ira->ira_sqp = irm->irm_sqp; 643 /* The rest of IP assumes that the rings never go away. */ 644 ira->ira_ring = irm->irm_ring; 645 646 ira->ira_zoneid = irm->irm_zoneid; 647 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; 648 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; 649 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; 650 651 if (irm->irm_tsl != NULL) { 652 ira->ira_tsl = irm->irm_tsl; 653 ira->ira_free_flags |= IRA_FREE_TSL; 654 irm->irm_tsl = NULL; 655 } 656 if (irm->irm_cred != NULL) { 657 ira->ira_cred = irm->irm_cred; 658 ira->ira_free_flags |= IRA_FREE_CRED; 659 irm->irm_cred = NULL; 660 } 661 ira->ira_cpid = irm->irm_cpid; 662 663 if (ira->ira_flags & IRAF_L2SRC_SET) 664 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); 665 666 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; 667 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; 668 ira->ira_ipsec_action = irm->irm_ipsec_action; 669 670 freeb(iramp); 671 return (B_TRUE); 672 } 673 674 /* 675 * Free the irm mblk and any references it holds 676 * Returns b_cont. 677 */ 678 mblk_t * 679 ip_recv_attr_free_mblk(mblk_t *iramp) 680 { 681 iramblk_t *irm; 682 mblk_t *mp; 683 684 /* Consume mp */ 685 ASSERT(DB_TYPE(iramp) == M_BREAK); 686 mp = iramp->b_cont; 687 688 irm = (iramblk_t *)iramp->b_rptr; 689 ASSERT(irm->irm_inbound); 690 691 if (irm->irm_ipsec_ah_sa != NULL) { 692 IPSA_REFRELE(irm->irm_ipsec_ah_sa); 693 irm->irm_ipsec_ah_sa = NULL; 694 } 695 if (irm->irm_ipsec_esp_sa != NULL) { 696 IPSA_REFRELE(irm->irm_ipsec_esp_sa); 697 irm->irm_ipsec_esp_sa = NULL; 698 } 699 if (irm->irm_ipsec_action != NULL) { 700 IPACT_REFRELE(irm->irm_ipsec_action); 701 irm->irm_ipsec_action = NULL; 702 } 703 if (irm->irm_tsl != NULL) { 704 label_rele(irm->irm_tsl); 705 irm->irm_tsl = NULL; 706 } 707 if (irm->irm_cred != NULL) { 708 crfree(irm->irm_cred); 709 irm->irm_cred = NULL; 710 } 711 712 freeb(iramp); 713 return (mp); 714 } 715 716 /* 717 * Returns true if the mblk contains an ip_recv_attr_t 718 * For now we just check db_type. 719 */ 720 boolean_t 721 ip_recv_attr_is_mblk(mblk_t *mp) 722 { 723 /* 724 * Need to handle the various forms of tcp_timermp which are tagged 725 * with b_wptr and might have a NULL b_datap. 726 */ 727 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) 728 return (B_FALSE); 729 730 #ifdef DEBUG 731 iramblk_t *irm; 732 733 if (DB_TYPE(mp) != M_BREAK) 734 return (B_FALSE); 735 736 irm = (iramblk_t *)mp->b_rptr; 737 ASSERT(irm->irm_inbound); 738 return (B_TRUE); 739 #else 740 return (DB_TYPE(mp) == M_BREAK); 741 #endif 742 } 743 744 static ip_xmit_attr_t * 745 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) 746 { 747 ip_xmit_attr_t *ixa; 748 ip_xmit_attr_t *oldixa; 749 750 mutex_enter(&connp->conn_lock); 751 ixa = connp->conn_ixa; 752 753 /* At least one references for the conn_t */ 754 ASSERT(ixa->ixa_refcnt >= 1); 755 if (atomic_add_32_nv(&ixa->ixa_refcnt, 1) == 2) { 756 /* No other thread using conn_ixa */ 757 mutex_exit(&connp->conn_lock); 758 return (ixa); 759 } 760 ixa = kmem_alloc(sizeof (*ixa), kmflag); 761 if (ixa == NULL) { 762 mutex_exit(&connp->conn_lock); 763 ixa_refrele(connp->conn_ixa); 764 return (NULL); 765 } 766 ixa_safe_copy(connp->conn_ixa, ixa); 767 768 /* Make sure we drop conn_lock before any refrele */ 769 if (replace) { 770 ixa->ixa_refcnt++; /* No atomic needed - not visible */ 771 oldixa = connp->conn_ixa; 772 connp->conn_ixa = ixa; 773 mutex_exit(&connp->conn_lock); 774 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ 775 } else { 776 oldixa = connp->conn_ixa; 777 mutex_exit(&connp->conn_lock); 778 } 779 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ 780 781 return (ixa); 782 } 783 784 /* 785 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only 786 * the caller can access the ip_xmit_attr_t. 787 * 788 * If nobody else is using conn_ixa we return it. 789 * Otherwise we make a "safe" copy of conn_ixa 790 * and return it. The "safe" copy has the pointers set to NULL 791 * (since the pointers might be changed by another thread using 792 * conn_ixa). The caller needs to check for NULL pointers to see 793 * if ip_set_destination needs to be called to re-establish the pointers. 794 * 795 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. 796 * That is used when we connect() the ULP. 797 */ 798 ip_xmit_attr_t * 799 conn_get_ixa(conn_t *connp, boolean_t replace) 800 { 801 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); 802 } 803 804 /* 805 * Used only when the option is to have the kernel hang due to not 806 * cleaning up ixa references on ills etc. 807 */ 808 ip_xmit_attr_t * 809 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) 810 { 811 return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); 812 } 813 814 /* 815 * Replace conn_ixa with the ixa argument. 816 * 817 * The caller must hold conn_lock. 818 * 819 * We return the old ixa; the caller must ixa_refrele that after conn_lock 820 * has been dropped. 821 */ 822 ip_xmit_attr_t * 823 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) 824 { 825 ip_xmit_attr_t *oldixa; 826 827 ASSERT(MUTEX_HELD(&connp->conn_lock)); 828 829 oldixa = connp->conn_ixa; 830 IXA_REFHOLD(ixa); 831 connp->conn_ixa = ixa; 832 return (oldixa); 833 } 834 835 /* 836 * Return a ip_xmit_attr_t to use with a conn_t that is based on but 837 * separate from conn_ixa. 838 * 839 * This "safe" copy has the pointers set to NULL 840 * (since the pointers might be changed by another thread using 841 * conn_ixa). The caller needs to check for NULL pointers to see 842 * if ip_set_destination needs to be called to re-establish the pointers. 843 */ 844 ip_xmit_attr_t * 845 conn_get_ixa_exclusive(conn_t *connp) 846 { 847 ip_xmit_attr_t *ixa; 848 849 mutex_enter(&connp->conn_lock); 850 ixa = connp->conn_ixa; 851 852 /* At least one references for the conn_t */ 853 ASSERT(ixa->ixa_refcnt >= 1); 854 855 /* Make sure conn_ixa doesn't disappear while we copy it */ 856 atomic_add_32(&ixa->ixa_refcnt, 1); 857 858 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 859 if (ixa == NULL) { 860 mutex_exit(&connp->conn_lock); 861 ixa_refrele(connp->conn_ixa); 862 return (NULL); 863 } 864 ixa_safe_copy(connp->conn_ixa, ixa); 865 mutex_exit(&connp->conn_lock); 866 IXA_REFRELE(connp->conn_ixa); 867 return (ixa); 868 } 869 870 void 871 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) 872 { 873 bcopy(src, ixa, sizeof (*ixa)); 874 ixa->ixa_refcnt = 1; 875 /* 876 * Clear any pointers that have references and might be changed 877 * by ip_set_destination or the ULP 878 */ 879 ixa->ixa_ire = NULL; 880 ixa->ixa_nce = NULL; 881 ixa->ixa_dce = NULL; 882 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 883 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 884 #ifdef DEBUG 885 ixa->ixa_curthread = NULL; 886 #endif 887 /* Clear all the IPsec pointers and the flag as well. */ 888 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 889 890 ixa->ixa_ipsec_latch = NULL; 891 ixa->ixa_ipsec_ah_sa = NULL; 892 ixa->ixa_ipsec_esp_sa = NULL; 893 ixa->ixa_ipsec_policy = NULL; 894 ixa->ixa_ipsec_action = NULL; 895 896 /* 897 * We leave ixa_tsl unchanged, but if it has a refhold we need 898 * to get an extra refhold. 899 */ 900 if (ixa->ixa_free_flags & IXA_FREE_TSL) 901 label_hold(ixa->ixa_tsl); 902 903 /* 904 * We leave ixa_cred unchanged, but if it has a refhold we need 905 * to get an extra refhold. 906 */ 907 if (ixa->ixa_free_flags & IXA_FREE_CRED) 908 crhold(ixa->ixa_cred); 909 } 910 911 /* 912 * Duplicate an ip_xmit_attr_t. 913 * Assumes that the caller controls the ixa, hence we do not need to use 914 * a safe copy. We just have to increase the refcnt on any pointers. 915 */ 916 ip_xmit_attr_t * 917 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) 918 { 919 ip_xmit_attr_t *ixa; 920 921 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 922 if (ixa == NULL) 923 return (NULL); 924 bcopy(src_ixa, ixa, sizeof (*ixa)); 925 ixa->ixa_refcnt = 1; 926 927 if (ixa->ixa_ire != NULL) 928 ire_refhold_notr(ixa->ixa_ire); 929 if (ixa->ixa_nce != NULL) 930 nce_refhold(ixa->ixa_nce); 931 if (ixa->ixa_dce != NULL) 932 dce_refhold_notr(ixa->ixa_dce); 933 934 #ifdef DEBUG 935 ixa->ixa_curthread = NULL; 936 #endif 937 938 if (ixa->ixa_ipsec_latch != NULL) 939 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 940 if (ixa->ixa_ipsec_ah_sa != NULL) 941 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 942 if (ixa->ixa_ipsec_esp_sa != NULL) 943 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 944 if (ixa->ixa_ipsec_policy != NULL) 945 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 946 if (ixa->ixa_ipsec_action != NULL) 947 IPACT_REFHOLD(ixa->ixa_ipsec_action); 948 949 if (ixa->ixa_tsl != NULL) { 950 label_hold(ixa->ixa_tsl); 951 ixa->ixa_free_flags |= IXA_FREE_TSL; 952 } 953 if (ixa->ixa_cred != NULL) { 954 crhold(ixa->ixa_cred); 955 ixa->ixa_free_flags |= IXA_FREE_CRED; 956 } 957 return (ixa); 958 } 959 960 /* 961 * Used to replace the ixa_label field. 962 * The caller should have a reference on the label, which we transfer to 963 * the attributes so that when the attribute is freed/cleaned up 964 * we will release that reference. 965 */ 966 void 967 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) 968 { 969 ASSERT(tsl != NULL); 970 971 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 972 ASSERT(ixa->ixa_tsl != NULL); 973 label_rele(ixa->ixa_tsl); 974 } else { 975 ixa->ixa_free_flags |= IXA_FREE_TSL; 976 } 977 ixa->ixa_tsl = tsl; 978 } 979 980 /* 981 * Replace the ip_recv_attr_t's label. 982 * Due to kernel RPC's use of db_credp we also need to replace ira_cred; 983 * TCP/UDP uses ira_cred to set db_credp for non-socket users. 984 * This can fail (and return B_FALSE) due to lack of memory. 985 */ 986 boolean_t 987 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) 988 { 989 cred_t *newcr; 990 991 if (ira->ira_free_flags & IRA_FREE_TSL) { 992 ASSERT(ira->ira_tsl != NULL); 993 label_rele(ira->ira_tsl); 994 } 995 label_hold(tsl); 996 ira->ira_tsl = tsl; 997 ira->ira_free_flags |= IRA_FREE_TSL; 998 999 /* 1000 * Reset zoneid if we have a shared address. That allows 1001 * ip_fanout_tx_v4/v6 to determine the zoneid again. 1002 */ 1003 if (ira->ira_flags & IRAF_TX_SHARED_ADDR) 1004 ira->ira_zoneid = ALL_ZONES; 1005 1006 /* We update ira_cred for RPC */ 1007 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); 1008 if (newcr == NULL) 1009 return (B_FALSE); 1010 if (ira->ira_free_flags & IRA_FREE_CRED) 1011 crfree(ira->ira_cred); 1012 ira->ira_cred = newcr; 1013 ira->ira_free_flags |= IRA_FREE_CRED; 1014 return (B_TRUE); 1015 } 1016 1017 /* 1018 * This needs to be called after ip_set_destination/tsol_check_dest might 1019 * have changed ixa_tsl to be specific for a destination, and we now want to 1020 * send to a different destination. 1021 * We have to restart with crgetlabel() since ip_set_destination/ 1022 * tsol_check_dest will start with ixa_tsl. 1023 */ 1024 void 1025 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) 1026 { 1027 if (!is_system_labeled()) 1028 return; 1029 1030 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1031 ASSERT(ixa->ixa_tsl != NULL); 1032 label_rele(ixa->ixa_tsl); 1033 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1034 } 1035 ixa->ixa_tsl = crgetlabel(cr); 1036 } 1037 1038 void 1039 ixa_refrele(ip_xmit_attr_t *ixa) 1040 { 1041 IXA_REFRELE(ixa); 1042 } 1043 1044 void 1045 ixa_inactive(ip_xmit_attr_t *ixa) 1046 { 1047 ASSERT(ixa->ixa_refcnt == 0); 1048 1049 ixa_cleanup(ixa); 1050 kmem_free(ixa, sizeof (*ixa)); 1051 } 1052 1053 /* 1054 * Release any references contained in the ixa. 1055 * Also clear any fields that are not controlled by ixa_flags. 1056 */ 1057 void 1058 ixa_cleanup(ip_xmit_attr_t *ixa) 1059 { 1060 if (ixa->ixa_ire != NULL) { 1061 ire_refrele_notr(ixa->ixa_ire); 1062 ixa->ixa_ire = NULL; 1063 } 1064 if (ixa->ixa_dce != NULL) { 1065 dce_refrele_notr(ixa->ixa_dce); 1066 ixa->ixa_dce = NULL; 1067 } 1068 if (ixa->ixa_nce != NULL) { 1069 nce_refrele(ixa->ixa_nce); 1070 ixa->ixa_nce = NULL; 1071 } 1072 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1073 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1074 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 1075 ipsec_out_release_refs(ixa); 1076 } 1077 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1078 ASSERT(ixa->ixa_tsl != NULL); 1079 label_rele(ixa->ixa_tsl); 1080 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1081 } 1082 ixa->ixa_tsl = NULL; 1083 if (ixa->ixa_free_flags & IXA_FREE_CRED) { 1084 ASSERT(ixa->ixa_cred != NULL); 1085 crfree(ixa->ixa_cred); 1086 ixa->ixa_free_flags &= ~IXA_FREE_CRED; 1087 } 1088 ixa->ixa_cred = NULL; 1089 ixa->ixa_src_preferences = 0; 1090 ixa->ixa_ifindex = 0; 1091 ixa->ixa_multicast_ifindex = 0; 1092 ixa->ixa_multicast_ifaddr = INADDR_ANY; 1093 } 1094 1095 /* 1096 * Release any references contained in the ira. 1097 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second 1098 * argument. 1099 */ 1100 void 1101 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) 1102 { 1103 if (ira->ira_ill != NULL) { 1104 if (ira->ira_rill != ira->ira_ill) { 1105 /* Caused by async processing */ 1106 ill_refrele(ira->ira_rill); 1107 } 1108 if (refrele_ill) 1109 ill_refrele(ira->ira_ill); 1110 } 1111 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1112 ipsec_in_release_refs(ira); 1113 } 1114 if (ira->ira_free_flags & IRA_FREE_TSL) { 1115 ASSERT(ira->ira_tsl != NULL); 1116 label_rele(ira->ira_tsl); 1117 ira->ira_free_flags &= ~IRA_FREE_TSL; 1118 } 1119 ira->ira_tsl = NULL; 1120 if (ira->ira_free_flags & IRA_FREE_CRED) { 1121 ASSERT(ira->ira_cred != NULL); 1122 crfree(ira->ira_cred); 1123 ira->ira_free_flags &= ~IRA_FREE_CRED; 1124 } 1125 ira->ira_cred = NULL; 1126 } 1127 1128 /* 1129 * Function to help release any IRE, NCE, or DCEs that 1130 * have been deleted and are marked as condemned. 1131 * The caller is responsible for any serialization which is different 1132 * for TCP, SCTP, and others. 1133 */ 1134 static void 1135 ixa_cleanup_stale(ip_xmit_attr_t *ixa) 1136 { 1137 ire_t *ire; 1138 nce_t *nce; 1139 dce_t *dce; 1140 1141 ire = ixa->ixa_ire; 1142 nce = ixa->ixa_nce; 1143 dce = ixa->ixa_dce; 1144 1145 if (ire != NULL && IRE_IS_CONDEMNED(ire)) { 1146 ire_refrele_notr(ire); 1147 ire = ire_blackhole(ixa->ixa_ipst, 1148 !(ixa->ixa_flags & IXAF_IS_IPV4)); 1149 ASSERT(ire != NULL); 1150 #ifdef DEBUG 1151 ire_refhold_notr(ire); 1152 ire_refrele(ire); 1153 #endif 1154 ixa->ixa_ire = ire; 1155 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1156 } 1157 if (nce != NULL && nce->nce_is_condemned) { 1158 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ 1159 nce_refrele(nce); 1160 ixa->ixa_nce = NULL; 1161 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1162 } 1163 if (dce != NULL && DCE_IS_CONDEMNED(dce)) { 1164 dce_refrele_notr(dce); 1165 dce = dce_get_default(ixa->ixa_ipst); 1166 ASSERT(dce != NULL); 1167 #ifdef DEBUG 1168 dce_refhold_notr(dce); 1169 dce_refrele(dce); 1170 #endif 1171 ixa->ixa_dce = dce; 1172 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1173 } 1174 } 1175 1176 /* 1177 * Used to run ixa_cleanup_stale inside the tcp squeue. 1178 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp 1179 * and waking up the caller. 1180 */ 1181 /* ARGSUSED2 */ 1182 static void 1183 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, 1184 ip_recv_attr_t *dummy) 1185 { 1186 conn_t *connp = (conn_t *)arg; 1187 tcp_stack_t *tcps; 1188 1189 tcps = connp->conn_netstack->netstack_tcp; 1190 1191 ixa_cleanup_stale(connp->conn_ixa); 1192 1193 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1194 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); 1195 tcps->tcps_ixa_cleanup_mp = mp; 1196 cv_signal(&tcps->tcps_ixa_cleanup_cv); 1197 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1198 } 1199 1200 1201 /* 1202 * ipcl_walk() function to help release any IRE, NCE, or DCEs that 1203 * have been deleted and are marked as condemned. 1204 * Note that we can't cleanup the pointers since there can be threads 1205 * in conn_ip_output() sending while we are called. 1206 */ 1207 void 1208 conn_ixa_cleanup(conn_t *connp, void *arg) 1209 { 1210 boolean_t tryhard = (boolean_t)arg; 1211 1212 if (IPCL_IS_TCP(connp)) { 1213 mblk_t *mp; 1214 tcp_stack_t *tcps; 1215 1216 tcps = connp->conn_netstack->netstack_tcp; 1217 1218 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1219 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { 1220 /* 1221 * Multiple concurrent cleanups; need to have the last 1222 * one run since it could be an unplumb. 1223 */ 1224 cv_wait(&tcps->tcps_ixa_cleanup_cv, 1225 &tcps->tcps_ixa_cleanup_lock); 1226 } 1227 tcps->tcps_ixa_cleanup_mp = NULL; 1228 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1229 1230 if (connp->conn_sqp->sq_run == curthread) { 1231 /* Already on squeue */ 1232 tcp_ixa_cleanup(connp, mp, NULL, NULL); 1233 } else { 1234 CONN_INC_REF(connp); 1235 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, 1236 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); 1237 1238 /* Wait until tcp_ixa_cleanup has run */ 1239 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1240 while (tcps->tcps_ixa_cleanup_mp == NULL) { 1241 cv_wait(&tcps->tcps_ixa_cleanup_cv, 1242 &tcps->tcps_ixa_cleanup_lock); 1243 } 1244 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1245 } 1246 } else if (IPCL_IS_SCTP(connp)) { 1247 sctp_t *sctp; 1248 sctp_faddr_t *fp; 1249 1250 sctp = CONN2SCTP(connp); 1251 RUN_SCTP(sctp); 1252 ixa_cleanup_stale(connp->conn_ixa); 1253 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) 1254 ixa_cleanup_stale(fp->ixa); 1255 WAKE_SCTP(sctp); 1256 } else { 1257 ip_xmit_attr_t *ixa; 1258 1259 /* 1260 * If there is a different thread using conn_ixa then we get a 1261 * new copy and cut the old one loose from conn_ixa. Otherwise 1262 * we use conn_ixa and prevent any other thread from 1263 * using/changing it. Anybody using conn_ixa (e.g., a thread in 1264 * conn_ip_output) will do an ixa_refrele which will remove any 1265 * references on the ire etc. 1266 * 1267 * Once we are done other threads can use conn_ixa since the 1268 * refcnt will be back at one. 1269 * 1270 * We are called either because an ill is going away, or 1271 * due to memory reclaim. In the former case we wait for 1272 * memory since we must remove the refcnts on the ill. 1273 */ 1274 if (tryhard) { 1275 ixa = conn_get_ixa_tryhard(connp, B_TRUE); 1276 ASSERT(ixa != NULL); 1277 } else { 1278 ixa = conn_get_ixa(connp, B_TRUE); 1279 if (ixa == NULL) { 1280 /* 1281 * Somebody else was using it and kmem_alloc 1282 * failed! Next memory reclaim will try to 1283 * clean up. 1284 */ 1285 DTRACE_PROBE1(conn__ixa__cleanup__bail, 1286 conn_t *, connp); 1287 return; 1288 } 1289 } 1290 ixa_cleanup_stale(ixa); 1291 ixa_refrele(ixa); 1292 } 1293 } 1294 1295 /* 1296 * ixa needs to be an exclusive copy so that no one changes the cookie 1297 * or the ixa_nce. 1298 */ 1299 boolean_t 1300 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) 1301 { 1302 uintptr_t cookie = ixa->ixa_cookie; 1303 ill_dld_direct_t *idd; 1304 idl_tx_list_t *idl_txl; 1305 ill_t *ill = ixa->ixa_nce->nce_ill; 1306 boolean_t inserted = B_FALSE; 1307 1308 idd = &(ill)->ill_dld_capab->idc_direct; 1309 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 1310 mutex_enter(&idl_txl->txl_lock); 1311 1312 /* 1313 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow 1314 * control is asserted on an ill that does not support direct calls. 1315 * Jump to insert. 1316 */ 1317 if (cookie == 0) 1318 goto tryinsert; 1319 1320 ASSERT(ILL_DIRECT_CAPABLE(ill)); 1321 1322 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) { 1323 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie); 1324 } else if (idl_txl->txl_cookie != NULL && 1325 idl_txl->txl_cookie != ixa->ixa_cookie) { 1326 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie, 1327 uintptr_t, idl_txl->txl_cookie); 1328 /* TODO: bump kstat for cookie collision */ 1329 } else { 1330 /* 1331 * Check/set conn_blocked under conn_lock. Note that txl_lock 1332 * will not suffice since two separate UDP threads may be 1333 * racing to send to different destinations that are 1334 * associated with different cookies and thus may not be 1335 * holding the same txl_lock. Further, since a given conn_t 1336 * can only be on a single drain list, the conn_t will be 1337 * enqueued on whichever thread wins this race. 1338 */ 1339 tryinsert: mutex_enter(&connp->conn_lock); 1340 if (connp->conn_blocked) { 1341 DTRACE_PROBE1(ill__tx__conn__already__blocked, 1342 conn_t *, connp); 1343 mutex_exit(&connp->conn_lock); 1344 } else { 1345 connp->conn_blocked = B_TRUE; 1346 mutex_exit(&connp->conn_lock); 1347 idl_txl->txl_cookie = cookie; 1348 conn_drain_insert(connp, idl_txl); 1349 if (!IPCL_IS_NONSTR(connp)) 1350 noenable(connp->conn_wq); 1351 inserted = B_TRUE; 1352 } 1353 } 1354 mutex_exit(&idl_txl->txl_lock); 1355 return (inserted); 1356 } 1357