1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/strsun.h> 31 #include <sys/zone.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/atomic.h> 37 38 #include <sys/systm.h> 39 #include <sys/param.h> 40 #include <sys/kmem.h> 41 #include <sys/sdt.h> 42 #include <sys/socket.h> 43 #include <sys/mac.h> 44 #include <net/if.h> 45 #include <net/if_arp.h> 46 #include <net/route.h> 47 #include <sys/sockio.h> 48 #include <netinet/in.h> 49 #include <net/if_dl.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/mib2.h> 54 #include <inet/nd.h> 55 #include <inet/arp.h> 56 #include <inet/snmpcom.h> 57 #include <inet/kstatcom.h> 58 59 #include <netinet/igmp_var.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/sctp.h> 63 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip6_asp.h> 68 #include <inet/tcp.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_if.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_ftable.h> 73 #include <inet/ip_rts.h> 74 #include <inet/optcom.h> 75 #include <inet/ip_ndp.h> 76 #include <inet/ip_listutils.h> 77 #include <netinet/igmp.h> 78 #include <netinet/ip_mroute.h> 79 #include <inet/ipp_common.h> 80 81 #include <net/pfkeyv2.h> 82 #include <inet/sadb.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipdrop.h> 85 #include <inet/ip_netinfo.h> 86 #include <sys/squeue_impl.h> 87 #include <sys/squeue.h> 88 89 #include <inet/ipclassifier.h> 90 #include <inet/sctp_ip.h> 91 #include <inet/sctp/sctp_impl.h> 92 #include <inet/udp_impl.h> 93 #include <sys/sunddi.h> 94 95 #include <sys/tsol/label.h> 96 #include <sys/tsol/tnet.h> 97 98 /* 99 * Release a reference on ip_xmit_attr. 100 * The reference is acquired by conn_get_ixa() 101 */ 102 #define IXA_REFRELE(ixa) \ 103 { \ 104 if (atomic_add_32_nv(&(ixa)->ixa_refcnt, -1) == 0) \ 105 ixa_inactive(ixa); \ 106 } 107 108 #define IXA_REFHOLD(ixa) \ 109 { \ 110 ASSERT((ixa)->ixa_refcnt != 0); \ 111 atomic_add_32(&(ixa)->ixa_refcnt, 1); \ 112 } 113 114 /* 115 * When we need to handle a transmit side asynchronous operation, then we need 116 * to save sufficient information so that we can call the fragment and postfrag 117 * functions. That information is captured in an mblk containing this structure. 118 * 119 * Since this is currently only used for IPsec, we include information for 120 * the kernel crypto framework. 121 */ 122 typedef struct ixamblk_s { 123 boolean_t ixm_inbound; /* B_FALSE */ 124 iaflags_t ixm_flags; /* ixa_flags */ 125 netstackid_t ixm_stackid; /* Verify it didn't go away */ 126 uint_t ixm_ifindex; /* Used to find the nce */ 127 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ 128 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) 129 uint32_t ixm_fragsize; 130 uint_t ixm_pktlen; 131 uint16_t ixm_ip_hdr_length; /* Points to ULP header */ 132 uint8_t ixm_protocol; /* Protocol number for ULP cksum */ 133 pfirepostfrag_t ixm_postfragfn; 134 135 zoneid_t ixm_zoneid; /* Needed for ipobs */ 136 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ 137 138 uint_t ixm_scopeid; /* For IPv6 link-locals */ 139 140 uint32_t ixm_ident; /* For IPv6 fragment header */ 141 uint32_t ixm_xmit_hint; 142 143 cred_t *ixm_cred; /* For getpeerucred - refhold if set */ 144 pid_t ixm_cpid; /* For getpeerucred */ 145 146 ts_label_t *ixm_tsl; /* Refhold if set. */ 147 148 /* 149 * When the pointers below are set they have a refhold on the struct. 150 */ 151 ipsec_latch_t *ixm_ipsec_latch; 152 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ 153 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ 154 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ 155 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ 156 157 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ 158 159 /* Need these while waiting for SA */ 160 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ 161 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ 162 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ 163 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ 164 165 sa_family_t ixm_ipsec_inaf; /* Inner address family */ 166 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ 167 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ 168 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ 169 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ 170 171 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ 172 } ixamblk_t; 173 174 175 /* 176 * When we need to handle a receive side asynchronous operation, then we need 177 * to save sufficient information so that we can call ip_fanout. 178 * That information is captured in an mblk containing this structure. 179 * 180 * Since this is currently only used for IPsec, we include information for 181 * the kernel crypto framework. 182 */ 183 typedef struct iramblk_s { 184 boolean_t irm_inbound; /* B_TRUE */ 185 iaflags_t irm_flags; /* ira_flags */ 186 netstackid_t irm_stackid; /* Verify it didn't go away */ 187 uint_t irm_ifindex; /* To find ira_ill */ 188 189 uint_t irm_rifindex; /* ira_rifindex */ 190 uint_t irm_ruifindex; /* ira_ruifindex */ 191 uint_t irm_pktlen; 192 uint16_t irm_ip_hdr_length; /* Points to ULP header */ 193 uint8_t irm_protocol; /* Protocol number for ULP cksum */ 194 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ 195 196 squeue_t *irm_sqp; 197 ill_rx_ring_t *irm_ring; 198 199 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ 200 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ 201 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ 202 203 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ 204 205 cred_t *irm_cred; /* For getpeerucred - refhold if set */ 206 pid_t irm_cpid; /* For getpeerucred */ 207 208 ts_label_t *irm_tsl; /* Refhold if set. */ 209 210 /* 211 * When set these correspond to a refhold on the object. 212 */ 213 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ 214 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ 215 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ 216 } iramblk_t; 217 218 219 /* 220 * Take the information in ip_xmit_attr_t and stick it in an mblk 221 * that can later be passed to ip_xmit_attr_from_mblk to recreate the 222 * ip_xmit_attr_t. 223 * 224 * Returns NULL on memory allocation failure. 225 */ 226 mblk_t * 227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) 228 { 229 mblk_t *ixamp; 230 ixamblk_t *ixm; 231 nce_t *nce = ixa->ixa_nce; 232 233 ASSERT(nce != NULL); 234 ixamp = allocb(sizeof (*ixm), BPRI_MED); 235 if (ixamp == NULL) 236 return (NULL); 237 238 ixamp->b_datap->db_type = M_BREAK; 239 ixamp->b_wptr += sizeof (*ixm); 240 ixm = (ixamblk_t *)ixamp->b_rptr; 241 242 bzero(ixm, sizeof (*ixm)); 243 ixm->ixm_inbound = B_FALSE; 244 ixm->ixm_flags = ixa->ixa_flags; 245 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; 246 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; 247 ixm->ixm_nceaddr_v6 = nce->nce_addr; 248 ixm->ixm_fragsize = ixa->ixa_fragsize; 249 ixm->ixm_pktlen = ixa->ixa_pktlen; 250 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; 251 ixm->ixm_protocol = ixa->ixa_protocol; 252 ixm->ixm_postfragfn = ixa->ixa_postfragfn; 253 ixm->ixm_zoneid = ixa->ixa_zoneid; 254 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 255 ixm->ixm_scopeid = ixa->ixa_scopeid; 256 ixm->ixm_ident = ixa->ixa_ident; 257 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; 258 259 if (ixa->ixa_tsl != NULL) { 260 ixm->ixm_tsl = ixa->ixa_tsl; 261 label_hold(ixm->ixm_tsl); 262 } 263 if (ixa->ixa_cred != NULL) { 264 ixm->ixm_cred = ixa->ixa_cred; 265 crhold(ixa->ixa_cred); 266 } 267 ixm->ixm_cpid = ixa->ixa_cpid; 268 269 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 270 if (ixa->ixa_ipsec_ah_sa != NULL) { 271 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; 272 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 273 } 274 if (ixa->ixa_ipsec_esp_sa != NULL) { 275 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; 276 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 277 } 278 if (ixa->ixa_ipsec_policy != NULL) { 279 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; 280 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 281 } 282 if (ixa->ixa_ipsec_action != NULL) { 283 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; 284 IPACT_REFHOLD(ixa->ixa_ipsec_action); 285 } 286 if (ixa->ixa_ipsec_latch != NULL) { 287 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; 288 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 289 } 290 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; 291 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; 292 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; 293 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; 294 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; 295 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; 296 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; 297 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; 298 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; 299 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; 300 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; 301 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; 302 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; 303 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; 304 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; 305 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; 306 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; 307 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; 308 } 309 return (ixamp); 310 } 311 312 /* 313 * Extract the ip_xmit_attr_t from the mblk, checking that the 314 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is 315 * not the case. 316 * 317 * Otherwise ixa is updated. 318 * Caller needs to release references on the ixa by calling ixa_refrele() 319 * which will imediately call ixa_inactive to release the references. 320 */ 321 boolean_t 322 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) 323 { 324 ixamblk_t *ixm; 325 netstack_t *ns; 326 ip_stack_t *ipst; 327 ill_t *ill; 328 nce_t *nce; 329 330 /* We assume the caller hasn't initialized ixa */ 331 bzero(ixa, sizeof (*ixa)); 332 333 ASSERT(DB_TYPE(ixamp) == M_BREAK); 334 ASSERT(ixamp->b_cont == NULL); 335 336 ixm = (ixamblk_t *)ixamp->b_rptr; 337 ASSERT(!ixm->ixm_inbound); 338 339 /* Verify the netstack is still around */ 340 ns = netstack_find_by_stackid(ixm->ixm_stackid); 341 if (ns == NULL) { 342 /* Disappeared on us */ 343 (void) ip_xmit_attr_free_mblk(ixamp); 344 return (B_FALSE); 345 } 346 ipst = ns->netstack_ip; 347 348 /* Verify the ill is still around */ 349 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, 350 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); 351 352 /* We have the ill, hence the netstack can't go away */ 353 netstack_rele(ns); 354 if (ill == NULL) { 355 /* Disappeared on us */ 356 (void) ip_xmit_attr_free_mblk(ixamp); 357 return (B_FALSE); 358 } 359 /* 360 * Find the nce. We don't load-spread (only lookup nce's on the ill) 361 * because we want to find the same nce as the one we had when 362 * ip_xmit_attr_to_mblk was called. 363 */ 364 if (ixm->ixm_flags & IXAF_IS_IPV4) { 365 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); 366 } else { 367 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); 368 } 369 370 /* We have the nce, hence the ill can't go away */ 371 ill_refrele(ill); 372 if (nce == NULL) { 373 /* 374 * Since this is unusual and we don't know what type of 375 * nce it was, we drop the packet. 376 */ 377 (void) ip_xmit_attr_free_mblk(ixamp); 378 return (B_FALSE); 379 } 380 381 ixa->ixa_flags = ixm->ixm_flags; 382 ixa->ixa_refcnt = 1; 383 ixa->ixa_ipst = ipst; 384 ixa->ixa_fragsize = ixm->ixm_fragsize; 385 ixa->ixa_pktlen = ixm->ixm_pktlen; 386 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; 387 ixa->ixa_protocol = ixm->ixm_protocol; 388 ixa->ixa_nce = nce; 389 ixa->ixa_postfragfn = ixm->ixm_postfragfn; 390 ixa->ixa_zoneid = ixm->ixm_zoneid; 391 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; 392 ixa->ixa_scopeid = ixm->ixm_scopeid; 393 ixa->ixa_ident = ixm->ixm_ident; 394 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; 395 396 if (ixm->ixm_tsl != NULL) { 397 ixa->ixa_tsl = ixm->ixm_tsl; 398 ixa->ixa_free_flags |= IXA_FREE_TSL; 399 } 400 if (ixm->ixm_cred != NULL) { 401 ixa->ixa_cred = ixm->ixm_cred; 402 ixa->ixa_free_flags |= IXA_FREE_CRED; 403 } 404 ixa->ixa_cpid = ixm->ixm_cpid; 405 406 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; 407 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; 408 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; 409 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; 410 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; 411 412 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; 413 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; 414 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; 415 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; 416 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; 417 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; 418 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; 419 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; 420 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; 421 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; 422 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; 423 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; 424 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; 425 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; 426 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; 427 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; 428 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; 429 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; 430 431 freeb(ixamp); 432 return (B_TRUE); 433 } 434 435 /* 436 * Free the ixm mblk and any references it holds 437 * Returns b_cont. 438 */ 439 mblk_t * 440 ip_xmit_attr_free_mblk(mblk_t *ixamp) 441 { 442 ixamblk_t *ixm; 443 mblk_t *mp; 444 445 /* Consume mp */ 446 ASSERT(DB_TYPE(ixamp) == M_BREAK); 447 mp = ixamp->b_cont; 448 449 ixm = (ixamblk_t *)ixamp->b_rptr; 450 ASSERT(!ixm->ixm_inbound); 451 452 if (ixm->ixm_ipsec_ah_sa != NULL) { 453 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); 454 ixm->ixm_ipsec_ah_sa = NULL; 455 } 456 if (ixm->ixm_ipsec_esp_sa != NULL) { 457 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); 458 ixm->ixm_ipsec_esp_sa = NULL; 459 } 460 if (ixm->ixm_ipsec_policy != NULL) { 461 IPPOL_REFRELE(ixm->ixm_ipsec_policy); 462 ixm->ixm_ipsec_policy = NULL; 463 } 464 if (ixm->ixm_ipsec_action != NULL) { 465 IPACT_REFRELE(ixm->ixm_ipsec_action); 466 ixm->ixm_ipsec_action = NULL; 467 } 468 if (ixm->ixm_ipsec_latch) { 469 IPLATCH_REFRELE(ixm->ixm_ipsec_latch); 470 ixm->ixm_ipsec_latch = NULL; 471 } 472 473 if (ixm->ixm_tsl != NULL) { 474 label_rele(ixm->ixm_tsl); 475 ixm->ixm_tsl = NULL; 476 } 477 if (ixm->ixm_cred != NULL) { 478 crfree(ixm->ixm_cred); 479 ixm->ixm_cred = NULL; 480 } 481 freeb(ixamp); 482 return (mp); 483 } 484 485 /* 486 * Take the information in ip_recv_attr_t and stick it in an mblk 487 * that can later be passed to ip_recv_attr_from_mblk to recreate the 488 * ip_recv_attr_t. 489 * 490 * Returns NULL on memory allocation failure. 491 */ 492 mblk_t * 493 ip_recv_attr_to_mblk(ip_recv_attr_t *ira) 494 { 495 mblk_t *iramp; 496 iramblk_t *irm; 497 ill_t *ill = ira->ira_ill; 498 499 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); 500 501 iramp = allocb(sizeof (*irm), BPRI_MED); 502 if (iramp == NULL) 503 return (NULL); 504 505 iramp->b_datap->db_type = M_BREAK; 506 iramp->b_wptr += sizeof (*irm); 507 irm = (iramblk_t *)iramp->b_rptr; 508 509 bzero(irm, sizeof (*irm)); 510 irm->irm_inbound = B_TRUE; 511 irm->irm_flags = ira->ira_flags; 512 if (ill != NULL) { 513 /* Internal to IP - preserve ip_stack_t, ill and rill */ 514 irm->irm_stackid = 515 ill->ill_ipst->ips_netstack->netstack_stackid; 516 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 517 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == 518 ira->ira_rifindex); 519 } else { 520 /* Let ip_recv_attr_from_stackid know there isn't one */ 521 irm->irm_stackid = -1; 522 } 523 irm->irm_rifindex = ira->ira_rifindex; 524 irm->irm_ruifindex = ira->ira_ruifindex; 525 irm->irm_pktlen = ira->ira_pktlen; 526 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; 527 irm->irm_protocol = ira->ira_protocol; 528 529 irm->irm_sqp = ira->ira_sqp; 530 irm->irm_ring = ira->ira_ring; 531 532 irm->irm_zoneid = ira->ira_zoneid; 533 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; 534 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; 535 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; 536 537 if (ira->ira_tsl != NULL) { 538 irm->irm_tsl = ira->ira_tsl; 539 label_hold(irm->irm_tsl); 540 } 541 if (ira->ira_cred != NULL) { 542 irm->irm_cred = ira->ira_cred; 543 crhold(ira->ira_cred); 544 } 545 irm->irm_cpid = ira->ira_cpid; 546 547 if (ira->ira_flags & IRAF_L2SRC_SET) 548 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); 549 550 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 551 if (ira->ira_ipsec_ah_sa != NULL) { 552 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; 553 IPSA_REFHOLD(ira->ira_ipsec_ah_sa); 554 } 555 if (ira->ira_ipsec_esp_sa != NULL) { 556 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; 557 IPSA_REFHOLD(ira->ira_ipsec_esp_sa); 558 } 559 if (ira->ira_ipsec_action != NULL) { 560 irm->irm_ipsec_action = ira->ira_ipsec_action; 561 IPACT_REFHOLD(ira->ira_ipsec_action); 562 } 563 } 564 return (iramp); 565 } 566 567 /* 568 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP 569 * then irm_stackid is not -1, in which case we check that the 570 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is 571 * not the case. 572 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) 573 * and we just proceed with ira_ill and ira_rill as NULL. 574 * 575 * The caller needs to release any references on the pointers inside the ire 576 * by calling ira_cleanup. 577 */ 578 boolean_t 579 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) 580 { 581 iramblk_t *irm; 582 netstack_t *ns; 583 ip_stack_t *ipst = NULL; 584 ill_t *ill = NULL, *rill = NULL; 585 586 /* We assume the caller hasn't initialized ira */ 587 bzero(ira, sizeof (*ira)); 588 589 ASSERT(DB_TYPE(iramp) == M_BREAK); 590 ASSERT(iramp->b_cont == NULL); 591 592 irm = (iramblk_t *)iramp->b_rptr; 593 ASSERT(irm->irm_inbound); 594 595 if (irm->irm_stackid != -1) { 596 /* Verify the netstack is still around */ 597 ns = netstack_find_by_stackid(irm->irm_stackid); 598 if (ns == NULL) { 599 /* Disappeared on us */ 600 (void) ip_recv_attr_free_mblk(iramp); 601 return (B_FALSE); 602 } 603 ipst = ns->netstack_ip; 604 605 /* Verify the ill is still around */ 606 ill = ill_lookup_on_ifindex(irm->irm_ifindex, 607 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 608 609 if (irm->irm_ifindex == irm->irm_rifindex) { 610 rill = ill; 611 } else { 612 rill = ill_lookup_on_ifindex(irm->irm_rifindex, 613 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 614 } 615 616 /* We have the ill, hence the netstack can't go away */ 617 netstack_rele(ns); 618 if (ill == NULL || rill == NULL) { 619 /* Disappeared on us */ 620 if (ill != NULL) 621 ill_refrele(ill); 622 if (rill != NULL && rill != ill) 623 ill_refrele(rill); 624 (void) ip_recv_attr_free_mblk(iramp); 625 return (B_FALSE); 626 } 627 } 628 629 ira->ira_flags = irm->irm_flags; 630 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ 631 ira->ira_ill = ill; 632 ira->ira_rill = rill; 633 634 ira->ira_rifindex = irm->irm_rifindex; 635 ira->ira_ruifindex = irm->irm_ruifindex; 636 ira->ira_pktlen = irm->irm_pktlen; 637 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; 638 ira->ira_protocol = irm->irm_protocol; 639 640 ira->ira_sqp = irm->irm_sqp; 641 /* The rest of IP assumes that the rings never go away. */ 642 ira->ira_ring = irm->irm_ring; 643 644 ira->ira_zoneid = irm->irm_zoneid; 645 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; 646 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; 647 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; 648 649 if (irm->irm_tsl != NULL) { 650 ira->ira_tsl = irm->irm_tsl; 651 ira->ira_free_flags |= IRA_FREE_TSL; 652 } 653 if (irm->irm_cred != NULL) { 654 ira->ira_cred = irm->irm_cred; 655 ira->ira_free_flags |= IRA_FREE_CRED; 656 } 657 ira->ira_cpid = irm->irm_cpid; 658 659 if (ira->ira_flags & IRAF_L2SRC_SET) 660 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); 661 662 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; 663 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; 664 ira->ira_ipsec_action = irm->irm_ipsec_action; 665 666 freeb(iramp); 667 return (B_TRUE); 668 } 669 670 /* 671 * Free the irm mblk and any references it holds 672 * Returns b_cont. 673 */ 674 mblk_t * 675 ip_recv_attr_free_mblk(mblk_t *iramp) 676 { 677 iramblk_t *irm; 678 mblk_t *mp; 679 680 /* Consume mp */ 681 ASSERT(DB_TYPE(iramp) == M_BREAK); 682 mp = iramp->b_cont; 683 684 irm = (iramblk_t *)iramp->b_rptr; 685 ASSERT(irm->irm_inbound); 686 687 if (irm->irm_ipsec_ah_sa != NULL) { 688 IPSA_REFRELE(irm->irm_ipsec_ah_sa); 689 irm->irm_ipsec_ah_sa = NULL; 690 } 691 if (irm->irm_ipsec_esp_sa != NULL) { 692 IPSA_REFRELE(irm->irm_ipsec_esp_sa); 693 irm->irm_ipsec_esp_sa = NULL; 694 } 695 if (irm->irm_ipsec_action != NULL) { 696 IPACT_REFRELE(irm->irm_ipsec_action); 697 irm->irm_ipsec_action = NULL; 698 } 699 if (irm->irm_tsl != NULL) { 700 label_rele(irm->irm_tsl); 701 irm->irm_tsl = NULL; 702 } 703 if (irm->irm_cred != NULL) { 704 crfree(irm->irm_cred); 705 irm->irm_cred = NULL; 706 } 707 708 freeb(iramp); 709 return (mp); 710 } 711 712 /* 713 * Returns true if the mblk contains an ip_recv_attr_t 714 * For now we just check db_type. 715 */ 716 boolean_t 717 ip_recv_attr_is_mblk(mblk_t *mp) 718 { 719 /* 720 * Need to handle the various forms of tcp_timermp which are tagged 721 * with b_wptr and might have a NULL b_datap. 722 */ 723 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) 724 return (B_FALSE); 725 726 #ifdef DEBUG 727 iramblk_t *irm; 728 729 if (DB_TYPE(mp) != M_BREAK) 730 return (B_FALSE); 731 732 irm = (iramblk_t *)mp->b_rptr; 733 ASSERT(irm->irm_inbound); 734 return (B_TRUE); 735 #else 736 return (DB_TYPE(mp) == M_BREAK); 737 #endif 738 } 739 740 static ip_xmit_attr_t * 741 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) 742 { 743 ip_xmit_attr_t *ixa; 744 ip_xmit_attr_t *oldixa; 745 746 mutex_enter(&connp->conn_lock); 747 ixa = connp->conn_ixa; 748 749 /* At least one references for the conn_t */ 750 ASSERT(ixa->ixa_refcnt >= 1); 751 if (atomic_add_32_nv(&ixa->ixa_refcnt, 1) == 2) { 752 /* No other thread using conn_ixa */ 753 mutex_exit(&connp->conn_lock); 754 return (ixa); 755 } 756 ixa = kmem_alloc(sizeof (*ixa), kmflag); 757 if (ixa == NULL) { 758 mutex_exit(&connp->conn_lock); 759 ixa_refrele(connp->conn_ixa); 760 return (NULL); 761 } 762 ixa_safe_copy(connp->conn_ixa, ixa); 763 764 /* Make sure we drop conn_lock before any refrele */ 765 if (replace) { 766 ixa->ixa_refcnt++; /* No atomic needed - not visible */ 767 oldixa = connp->conn_ixa; 768 connp->conn_ixa = ixa; 769 mutex_exit(&connp->conn_lock); 770 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ 771 } else { 772 oldixa = connp->conn_ixa; 773 mutex_exit(&connp->conn_lock); 774 } 775 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ 776 777 return (ixa); 778 } 779 780 /* 781 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only 782 * the caller can access the ip_xmit_attr_t. 783 * 784 * If nobody else is using conn_ixa we return it. 785 * Otherwise we make a "safe" copy of conn_ixa 786 * and return it. The "safe" copy has the pointers set to NULL 787 * (since the pointers might be changed by another thread using 788 * conn_ixa). The caller needs to check for NULL pointers to see 789 * if ip_set_destination needs to be called to re-establish the pointers. 790 * 791 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. 792 * That is used when we connect() the ULP. 793 */ 794 ip_xmit_attr_t * 795 conn_get_ixa(conn_t *connp, boolean_t replace) 796 { 797 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); 798 } 799 800 /* 801 * Used only when the option is to have the kernel hang due to not 802 * cleaning up ixa references on ills etc. 803 */ 804 ip_xmit_attr_t * 805 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) 806 { 807 return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); 808 } 809 810 /* 811 * Replace conn_ixa with the ixa argument. 812 * 813 * The caller must hold conn_lock. 814 * 815 * We return the old ixa; the caller must ixa_refrele that after conn_lock 816 * has been dropped. 817 */ 818 ip_xmit_attr_t * 819 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) 820 { 821 ip_xmit_attr_t *oldixa; 822 823 ASSERT(MUTEX_HELD(&connp->conn_lock)); 824 825 oldixa = connp->conn_ixa; 826 IXA_REFHOLD(ixa); 827 connp->conn_ixa = ixa; 828 return (oldixa); 829 } 830 831 /* 832 * Return a ip_xmit_attr_t to use with a conn_t that is based on but 833 * separate from conn_ixa. 834 * 835 * This "safe" copy has the pointers set to NULL 836 * (since the pointers might be changed by another thread using 837 * conn_ixa). The caller needs to check for NULL pointers to see 838 * if ip_set_destination needs to be called to re-establish the pointers. 839 */ 840 ip_xmit_attr_t * 841 conn_get_ixa_exclusive(conn_t *connp) 842 { 843 ip_xmit_attr_t *ixa; 844 845 mutex_enter(&connp->conn_lock); 846 ixa = connp->conn_ixa; 847 848 /* At least one references for the conn_t */ 849 ASSERT(ixa->ixa_refcnt >= 1); 850 851 /* Make sure conn_ixa doesn't disappear while we copy it */ 852 atomic_add_32(&ixa->ixa_refcnt, 1); 853 854 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 855 if (ixa == NULL) { 856 mutex_exit(&connp->conn_lock); 857 ixa_refrele(connp->conn_ixa); 858 return (NULL); 859 } 860 ixa_safe_copy(connp->conn_ixa, ixa); 861 mutex_exit(&connp->conn_lock); 862 IXA_REFRELE(connp->conn_ixa); 863 return (ixa); 864 } 865 866 void 867 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) 868 { 869 bcopy(src, ixa, sizeof (*ixa)); 870 ixa->ixa_refcnt = 1; 871 /* 872 * Clear any pointers that have references and might be changed 873 * by ip_set_destination or the ULP 874 */ 875 ixa->ixa_ire = NULL; 876 ixa->ixa_nce = NULL; 877 ixa->ixa_dce = NULL; 878 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 879 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 880 #ifdef DEBUG 881 ixa->ixa_curthread = NULL; 882 #endif 883 /* Clear all the IPsec pointers and the flag as well. */ 884 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 885 886 ixa->ixa_ipsec_latch = NULL; 887 ixa->ixa_ipsec_ah_sa = NULL; 888 ixa->ixa_ipsec_esp_sa = NULL; 889 ixa->ixa_ipsec_policy = NULL; 890 ixa->ixa_ipsec_action = NULL; 891 892 /* 893 * We leave ixa_tsl unchanged, but if it has a refhold we need 894 * to get an extra refhold. 895 */ 896 if (ixa->ixa_free_flags & IXA_FREE_TSL) 897 label_hold(ixa->ixa_tsl); 898 899 /* 900 * We leave ixa_cred unchanged, but if it has a refhold we need 901 * to get an extra refhold. 902 */ 903 if (ixa->ixa_free_flags & IXA_FREE_CRED) 904 crhold(ixa->ixa_cred); 905 } 906 907 /* 908 * Duplicate an ip_xmit_attr_t. 909 * Assumes that the caller controls the ixa, hence we do not need to use 910 * a safe copy. We just have to increase the refcnt on any pointers. 911 */ 912 ip_xmit_attr_t * 913 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) 914 { 915 ip_xmit_attr_t *ixa; 916 917 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 918 if (ixa == NULL) 919 return (NULL); 920 bcopy(src_ixa, ixa, sizeof (*ixa)); 921 ixa->ixa_refcnt = 1; 922 923 if (ixa->ixa_ire != NULL) 924 ire_refhold_notr(ixa->ixa_ire); 925 if (ixa->ixa_nce != NULL) 926 nce_refhold(ixa->ixa_nce); 927 if (ixa->ixa_dce != NULL) 928 dce_refhold_notr(ixa->ixa_dce); 929 930 #ifdef DEBUG 931 ixa->ixa_curthread = NULL; 932 #endif 933 934 if (ixa->ixa_ipsec_latch != NULL) 935 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 936 if (ixa->ixa_ipsec_ah_sa != NULL) 937 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 938 if (ixa->ixa_ipsec_esp_sa != NULL) 939 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 940 if (ixa->ixa_ipsec_policy != NULL) 941 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 942 if (ixa->ixa_ipsec_action != NULL) 943 IPACT_REFHOLD(ixa->ixa_ipsec_action); 944 945 if (ixa->ixa_tsl != NULL) { 946 label_hold(ixa->ixa_tsl); 947 ixa->ixa_free_flags |= IXA_FREE_TSL; 948 } 949 if (ixa->ixa_cred != NULL) { 950 crhold(ixa->ixa_cred); 951 ixa->ixa_free_flags |= IXA_FREE_CRED; 952 } 953 return (ixa); 954 } 955 956 /* 957 * Used to replace the ixa_label field. 958 * The caller should have a reference on the label, which we transfer to 959 * the attributes so that when the attribute is freed/cleaned up 960 * we will release that reference. 961 */ 962 void 963 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) 964 { 965 ASSERT(tsl != NULL); 966 967 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 968 ASSERT(ixa->ixa_tsl != NULL); 969 label_rele(ixa->ixa_tsl); 970 } else { 971 ixa->ixa_free_flags |= IXA_FREE_TSL; 972 } 973 ixa->ixa_tsl = tsl; 974 } 975 976 /* 977 * Replace the ip_recv_attr_t's label. 978 * Due to kernel RPC's use of db_credp we also need to replace ira_cred; 979 * TCP/UDP uses ira_cred to set db_credp for non-socket users. 980 * This can fail (and return B_FALSE) due to lack of memory. 981 */ 982 boolean_t 983 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) 984 { 985 cred_t *newcr; 986 987 if (ira->ira_free_flags & IRA_FREE_TSL) { 988 ASSERT(ira->ira_tsl != NULL); 989 label_rele(ira->ira_tsl); 990 } 991 label_hold(tsl); 992 ira->ira_tsl = tsl; 993 ira->ira_free_flags |= IRA_FREE_TSL; 994 995 /* 996 * Reset zoneid if we have a shared address. That allows 997 * ip_fanout_tx_v4/v6 to determine the zoneid again. 998 */ 999 if (ira->ira_flags & IRAF_TX_SHARED_ADDR) 1000 ira->ira_zoneid = ALL_ZONES; 1001 1002 /* We update ira_cred for RPC */ 1003 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); 1004 if (newcr == NULL) 1005 return (B_FALSE); 1006 if (ira->ira_free_flags & IRA_FREE_CRED) 1007 crfree(ira->ira_cred); 1008 ira->ira_cred = newcr; 1009 ira->ira_free_flags |= IRA_FREE_CRED; 1010 return (B_TRUE); 1011 } 1012 1013 /* 1014 * This needs to be called after ip_set_destination/tsol_check_dest might 1015 * have changed ixa_tsl to be specific for a destination, and we now want to 1016 * send to a different destination. 1017 * We have to restart with crgetlabel() since ip_set_destination/ 1018 * tsol_check_dest will start with ixa_tsl. 1019 */ 1020 void 1021 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) 1022 { 1023 if (!is_system_labeled()) 1024 return; 1025 1026 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1027 ASSERT(ixa->ixa_tsl != NULL); 1028 label_rele(ixa->ixa_tsl); 1029 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1030 } 1031 ixa->ixa_tsl = crgetlabel(cr); 1032 } 1033 1034 void 1035 ixa_refrele(ip_xmit_attr_t *ixa) 1036 { 1037 IXA_REFRELE(ixa); 1038 } 1039 1040 void 1041 ixa_inactive(ip_xmit_attr_t *ixa) 1042 { 1043 ASSERT(ixa->ixa_refcnt == 0); 1044 1045 ixa_cleanup(ixa); 1046 kmem_free(ixa, sizeof (*ixa)); 1047 } 1048 1049 /* 1050 * Release any references contained in the ixa. 1051 * Also clear any fields that are not controlled by ixa_flags. 1052 */ 1053 void 1054 ixa_cleanup(ip_xmit_attr_t *ixa) 1055 { 1056 if (ixa->ixa_ire != NULL) { 1057 ire_refrele_notr(ixa->ixa_ire); 1058 ixa->ixa_ire = NULL; 1059 } 1060 if (ixa->ixa_dce != NULL) { 1061 dce_refrele_notr(ixa->ixa_dce); 1062 ixa->ixa_dce = NULL; 1063 } 1064 if (ixa->ixa_nce != NULL) { 1065 nce_refrele(ixa->ixa_nce); 1066 ixa->ixa_nce = NULL; 1067 } 1068 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1069 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1070 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 1071 ipsec_out_release_refs(ixa); 1072 } 1073 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1074 ASSERT(ixa->ixa_tsl != NULL); 1075 label_rele(ixa->ixa_tsl); 1076 ixa->ixa_tsl = NULL; 1077 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1078 } 1079 if (ixa->ixa_free_flags & IXA_FREE_CRED) { 1080 ASSERT(ixa->ixa_cred != NULL); 1081 crfree(ixa->ixa_cred); 1082 ixa->ixa_cred = NULL; 1083 ixa->ixa_free_flags &= ~IXA_FREE_CRED; 1084 } 1085 ixa->ixa_src_preferences = 0; 1086 ixa->ixa_ifindex = 0; 1087 ixa->ixa_multicast_ifindex = 0; 1088 ixa->ixa_multicast_ifaddr = INADDR_ANY; 1089 } 1090 1091 /* 1092 * Release any references contained in the ira. 1093 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second 1094 * argument. 1095 */ 1096 void 1097 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) 1098 { 1099 if (ira->ira_ill != NULL) { 1100 if (ira->ira_rill != ira->ira_ill) { 1101 /* Caused by async processing */ 1102 ill_refrele(ira->ira_rill); 1103 } 1104 if (refrele_ill) 1105 ill_refrele(ira->ira_ill); 1106 } 1107 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1108 ipsec_in_release_refs(ira); 1109 } 1110 if (ira->ira_free_flags & IRA_FREE_TSL) { 1111 ASSERT(ira->ira_tsl != NULL); 1112 label_rele(ira->ira_tsl); 1113 ira->ira_tsl = NULL; 1114 ira->ira_free_flags &= ~IRA_FREE_TSL; 1115 } 1116 if (ira->ira_free_flags & IRA_FREE_CRED) { 1117 ASSERT(ira->ira_cred != NULL); 1118 crfree(ira->ira_cred); 1119 ira->ira_cred = NULL; 1120 ira->ira_free_flags &= ~IRA_FREE_CRED; 1121 } 1122 } 1123 1124 /* 1125 * Function to help release any IRE, NCE, or DCEs that 1126 * have been deleted and are marked as condemned. 1127 * The caller is responsible for any serialization which is different 1128 * for TCP, SCTP, and others. 1129 */ 1130 static void 1131 ixa_cleanup_stale(ip_xmit_attr_t *ixa) 1132 { 1133 ire_t *ire; 1134 nce_t *nce; 1135 dce_t *dce; 1136 1137 ire = ixa->ixa_ire; 1138 nce = ixa->ixa_nce; 1139 dce = ixa->ixa_dce; 1140 1141 if (ire != NULL && IRE_IS_CONDEMNED(ire)) { 1142 ire_refrele_notr(ire); 1143 ire = ire_blackhole(ixa->ixa_ipst, 1144 !(ixa->ixa_flags & IXAF_IS_IPV4)); 1145 ASSERT(ire != NULL); 1146 #ifdef DEBUG 1147 ire_refhold_notr(ire); 1148 ire_refrele(ire); 1149 #endif 1150 ixa->ixa_ire = ire; 1151 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1152 } 1153 if (nce != NULL && nce->nce_is_condemned) { 1154 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ 1155 nce_refrele(nce); 1156 ixa->ixa_nce = NULL; 1157 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1158 } 1159 if (dce != NULL && DCE_IS_CONDEMNED(dce)) { 1160 dce_refrele_notr(dce); 1161 dce = dce_get_default(ixa->ixa_ipst); 1162 ASSERT(dce != NULL); 1163 #ifdef DEBUG 1164 dce_refhold_notr(dce); 1165 dce_refrele(dce); 1166 #endif 1167 ixa->ixa_dce = dce; 1168 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1169 } 1170 } 1171 1172 /* 1173 * Used to run ixa_cleanup_stale inside the tcp squeue. 1174 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp 1175 * and waking up the caller. 1176 */ 1177 /* ARGSUSED2 */ 1178 static void 1179 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, 1180 ip_recv_attr_t *dummy) 1181 { 1182 conn_t *connp = (conn_t *)arg; 1183 tcp_stack_t *tcps; 1184 1185 tcps = connp->conn_netstack->netstack_tcp; 1186 1187 ixa_cleanup_stale(connp->conn_ixa); 1188 1189 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1190 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); 1191 tcps->tcps_ixa_cleanup_mp = mp; 1192 cv_signal(&tcps->tcps_ixa_cleanup_cv); 1193 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1194 } 1195 1196 1197 /* 1198 * ipcl_walk() function to help release any IRE, NCE, or DCEs that 1199 * have been deleted and are marked as condemned. 1200 * Note that we can't cleanup the pointers since there can be threads 1201 * in conn_ip_output() sending while we are called. 1202 */ 1203 void 1204 conn_ixa_cleanup(conn_t *connp, void *arg) 1205 { 1206 boolean_t tryhard = (boolean_t)arg; 1207 1208 if (IPCL_IS_TCP(connp)) { 1209 mblk_t *mp; 1210 tcp_stack_t *tcps; 1211 1212 tcps = connp->conn_netstack->netstack_tcp; 1213 1214 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1215 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { 1216 /* 1217 * Multiple concurrent cleanups; need to have the last 1218 * one run since it could be an unplumb. 1219 */ 1220 cv_wait(&tcps->tcps_ixa_cleanup_cv, 1221 &tcps->tcps_ixa_cleanup_lock); 1222 } 1223 tcps->tcps_ixa_cleanup_mp = NULL; 1224 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1225 1226 if (connp->conn_sqp->sq_run == curthread) { 1227 /* Already on squeue */ 1228 tcp_ixa_cleanup(connp, mp, NULL, NULL); 1229 } else { 1230 CONN_INC_REF(connp); 1231 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, 1232 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); 1233 1234 /* Wait until tcp_ixa_cleanup has run */ 1235 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1236 while (tcps->tcps_ixa_cleanup_mp == NULL) { 1237 cv_wait(&tcps->tcps_ixa_cleanup_cv, 1238 &tcps->tcps_ixa_cleanup_lock); 1239 } 1240 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1241 } 1242 } else if (IPCL_IS_SCTP(connp)) { 1243 sctp_t *sctp; 1244 sctp_faddr_t *fp; 1245 1246 sctp = CONN2SCTP(connp); 1247 RUN_SCTP(sctp); 1248 ixa_cleanup_stale(connp->conn_ixa); 1249 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) 1250 ixa_cleanup_stale(fp->ixa); 1251 WAKE_SCTP(sctp); 1252 } else { 1253 ip_xmit_attr_t *ixa; 1254 1255 /* 1256 * If there is a different thread using conn_ixa then we get a 1257 * new copy and cut the old one loose from conn_ixa. Otherwise 1258 * we use conn_ixa and prevent any other thread from 1259 * using/changing it. Anybody using conn_ixa (e.g., a thread in 1260 * conn_ip_output) will do an ixa_refrele which will remove any 1261 * references on the ire etc. 1262 * 1263 * Once we are done other threads can use conn_ixa since the 1264 * refcnt will be back at one. 1265 * 1266 * We are called either because an ill is going away, or 1267 * due to memory reclaim. In the former case we wait for 1268 * memory since we must remove the refcnts on the ill. 1269 */ 1270 if (tryhard) { 1271 ixa = conn_get_ixa_tryhard(connp, B_TRUE); 1272 ASSERT(ixa != NULL); 1273 } else { 1274 ixa = conn_get_ixa(connp, B_TRUE); 1275 if (ixa == NULL) { 1276 /* 1277 * Somebody else was using it and kmem_alloc 1278 * failed! Next memory reclaim will try to 1279 * clean up. 1280 */ 1281 DTRACE_PROBE1(conn__ixa__cleanup__bail, 1282 conn_t *, connp); 1283 return; 1284 } 1285 } 1286 ixa_cleanup_stale(ixa); 1287 ixa_refrele(ixa); 1288 } 1289 } 1290 1291 /* 1292 * ixa needs to be an exclusive copy so that no one changes the cookie 1293 * or the ixa_nce. 1294 */ 1295 boolean_t 1296 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) 1297 { 1298 uintptr_t cookie = ixa->ixa_cookie; 1299 ill_dld_direct_t *idd; 1300 idl_tx_list_t *idl_txl; 1301 ill_t *ill = ixa->ixa_nce->nce_ill; 1302 boolean_t inserted = B_FALSE; 1303 1304 idd = &(ill)->ill_dld_capab->idc_direct; 1305 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 1306 if (cookie == 0) { 1307 /* 1308 * ip_xmit failed the canputnext check 1309 */ 1310 connp->conn_did_putbq = 1; 1311 ASSERT(cookie == 0); 1312 conn_drain_insert(connp, idl_txl); 1313 if (!IPCL_IS_NONSTR(connp)) 1314 noenable(connp->conn_wq); 1315 return (B_TRUE); 1316 } 1317 ASSERT(ILL_DIRECT_CAPABLE(ill)); 1318 mutex_enter(&idl_txl->txl_lock); 1319 if (connp->conn_direct_blocked || 1320 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0)) { 1321 DTRACE_PROBE1(ill__tx__not__blocked, boolean, 1322 connp->conn_direct_blocked); 1323 } else if (idl_txl->txl_cookie != NULL && 1324 idl_txl->txl_cookie != ixa->ixa_cookie) { 1325 DTRACE_PROBE2(ill__send__tx__collision, uintptr_t, cookie, 1326 uintptr_t, idl_txl->txl_cookie); 1327 /* bump kstat for cookie collision */ 1328 } else { 1329 connp->conn_direct_blocked = B_TRUE; 1330 idl_txl->txl_cookie = cookie; 1331 conn_drain_insert(connp, idl_txl); 1332 if (!IPCL_IS_NONSTR(connp)) 1333 noenable(connp->conn_wq); 1334 inserted = B_TRUE; 1335 } 1336 mutex_exit(&idl_txl->txl_lock); 1337 return (inserted); 1338 } 1339