1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/strsun.h> 30 #include <sys/zone.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/debug.h> 35 #include <sys/atomic.h> 36 37 #include <sys/systm.h> 38 #include <sys/param.h> 39 #include <sys/kmem.h> 40 #include <sys/sdt.h> 41 #include <sys/socket.h> 42 #include <sys/mac.h> 43 #include <net/if.h> 44 #include <net/if_arp.h> 45 #include <net/route.h> 46 #include <sys/sockio.h> 47 #include <netinet/in.h> 48 #include <net/if_dl.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/mib2.h> 53 #include <inet/nd.h> 54 #include <inet/arp.h> 55 #include <inet/snmpcom.h> 56 #include <inet/kstatcom.h> 57 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <netinet/sctp.h> 62 63 #include <inet/ip.h> 64 #include <inet/ip_impl.h> 65 #include <inet/ip6.h> 66 #include <inet/ip6_asp.h> 67 #include <inet/tcp.h> 68 #include <inet/ip_multi.h> 69 #include <inet/ip_if.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_ftable.h> 72 #include <inet/ip_rts.h> 73 #include <inet/optcom.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/ip_listutils.h> 76 #include <netinet/igmp.h> 77 #include <netinet/ip_mroute.h> 78 #include <inet/ipp_common.h> 79 80 #include <net/pfkeyv2.h> 81 #include <inet/sadb.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipdrop.h> 84 #include <inet/ip_netinfo.h> 85 #include <sys/squeue_impl.h> 86 #include <sys/squeue.h> 87 88 #include <inet/ipclassifier.h> 89 #include <inet/sctp_ip.h> 90 #include <inet/sctp/sctp_impl.h> 91 #include <inet/udp_impl.h> 92 #include <sys/sunddi.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 97 /* 98 * Release a reference on ip_xmit_attr. 99 * The reference is acquired by conn_get_ixa() 100 */ 101 #define IXA_REFRELE(ixa) \ 102 { \ 103 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \ 104 ixa_inactive(ixa); \ 105 } 106 107 #define IXA_REFHOLD(ixa) \ 108 { \ 109 ASSERT((ixa)->ixa_refcnt != 0); \ 110 atomic_inc_32(&(ixa)->ixa_refcnt); \ 111 } 112 113 /* 114 * When we need to handle a transmit side asynchronous operation, then we need 115 * to save sufficient information so that we can call the fragment and postfrag 116 * functions. That information is captured in an mblk containing this structure. 117 * 118 * Since this is currently only used for IPsec, we include information for 119 * the kernel crypto framework. 120 */ 121 typedef struct ixamblk_s { 122 boolean_t ixm_inbound; /* B_FALSE */ 123 iaflags_t ixm_flags; /* ixa_flags */ 124 netstackid_t ixm_stackid; /* Verify it didn't go away */ 125 uint_t ixm_ifindex; /* Used to find the nce */ 126 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */ 127 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6) 128 uint32_t ixm_fragsize; 129 uint_t ixm_pktlen; 130 uint16_t ixm_ip_hdr_length; /* Points to ULP header */ 131 uint8_t ixm_protocol; /* Protocol number for ULP cksum */ 132 pfirepostfrag_t ixm_postfragfn; 133 134 zoneid_t ixm_zoneid; /* Needed for ipobs */ 135 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */ 136 137 uint_t ixm_scopeid; /* For IPv6 link-locals */ 138 139 uint32_t ixm_ident; /* For IPv6 fragment header */ 140 uint32_t ixm_xmit_hint; 141 142 uint64_t ixm_conn_id; /* Used by DTrace */ 143 cred_t *ixm_cred; /* For getpeerucred - refhold if set */ 144 pid_t ixm_cpid; /* For getpeerucred */ 145 146 ts_label_t *ixm_tsl; /* Refhold if set. */ 147 148 /* 149 * When the pointers below are set they have a refhold on the struct. 150 */ 151 ipsec_latch_t *ixm_ipsec_latch; 152 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */ 153 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */ 154 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */ 155 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */ 156 157 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */ 158 159 /* Need these while waiting for SA */ 160 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */ 161 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */ 162 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */ 163 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */ 164 165 sa_family_t ixm_ipsec_inaf; /* Inner address family */ 166 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */ 167 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */ 168 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */ 169 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */ 170 171 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */ 172 } ixamblk_t; 173 174 175 /* 176 * When we need to handle a receive side asynchronous operation, then we need 177 * to save sufficient information so that we can call ip_fanout. 178 * That information is captured in an mblk containing this structure. 179 * 180 * Since this is currently only used for IPsec, we include information for 181 * the kernel crypto framework. 182 */ 183 typedef struct iramblk_s { 184 boolean_t irm_inbound; /* B_TRUE */ 185 iaflags_t irm_flags; /* ira_flags */ 186 netstackid_t irm_stackid; /* Verify it didn't go away */ 187 uint_t irm_ifindex; /* To find ira_ill */ 188 189 uint_t irm_rifindex; /* ira_rifindex */ 190 uint_t irm_ruifindex; /* ira_ruifindex */ 191 uint_t irm_pktlen; 192 uint16_t irm_ip_hdr_length; /* Points to ULP header */ 193 uint8_t irm_protocol; /* Protocol number for ULP cksum */ 194 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */ 195 196 squeue_t *irm_sqp; 197 ill_rx_ring_t *irm_ring; 198 199 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */ 200 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */ 201 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */ 202 203 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */ 204 205 cred_t *irm_cred; /* For getpeerucred - refhold if set */ 206 pid_t irm_cpid; /* For getpeerucred */ 207 208 ts_label_t *irm_tsl; /* Refhold if set. */ 209 210 /* 211 * When set these correspond to a refhold on the object. 212 */ 213 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */ 214 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */ 215 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */ 216 } iramblk_t; 217 218 219 /* 220 * Take the information in ip_xmit_attr_t and stick it in an mblk 221 * that can later be passed to ip_xmit_attr_from_mblk to recreate the 222 * ip_xmit_attr_t. 223 * 224 * Returns NULL on memory allocation failure. 225 */ 226 mblk_t * 227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa) 228 { 229 mblk_t *ixamp; 230 ixamblk_t *ixm; 231 nce_t *nce = ixa->ixa_nce; 232 233 ASSERT(nce != NULL); 234 ixamp = allocb(sizeof (*ixm), BPRI_MED); 235 if (ixamp == NULL) 236 return (NULL); 237 238 ixamp->b_datap->db_type = M_BREAK; 239 ixamp->b_wptr += sizeof (*ixm); 240 ixm = (ixamblk_t *)ixamp->b_rptr; 241 242 bzero(ixm, sizeof (*ixm)); 243 ixm->ixm_inbound = B_FALSE; 244 ixm->ixm_flags = ixa->ixa_flags; 245 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid; 246 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex; 247 ixm->ixm_nceaddr_v6 = nce->nce_addr; 248 ixm->ixm_fragsize = ixa->ixa_fragsize; 249 ixm->ixm_pktlen = ixa->ixa_pktlen; 250 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length; 251 ixm->ixm_protocol = ixa->ixa_protocol; 252 ixm->ixm_postfragfn = ixa->ixa_postfragfn; 253 ixm->ixm_zoneid = ixa->ixa_zoneid; 254 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid; 255 ixm->ixm_scopeid = ixa->ixa_scopeid; 256 ixm->ixm_ident = ixa->ixa_ident; 257 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint; 258 259 if (ixa->ixa_tsl != NULL) { 260 ixm->ixm_tsl = ixa->ixa_tsl; 261 label_hold(ixm->ixm_tsl); 262 } 263 if (ixa->ixa_cred != NULL) { 264 ixm->ixm_cred = ixa->ixa_cred; 265 crhold(ixa->ixa_cred); 266 } 267 ixm->ixm_cpid = ixa->ixa_cpid; 268 ixm->ixm_conn_id = ixa->ixa_conn_id; 269 270 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 271 if (ixa->ixa_ipsec_ah_sa != NULL) { 272 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa; 273 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 274 } 275 if (ixa->ixa_ipsec_esp_sa != NULL) { 276 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa; 277 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 278 } 279 if (ixa->ixa_ipsec_policy != NULL) { 280 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy; 281 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 282 } 283 if (ixa->ixa_ipsec_action != NULL) { 284 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action; 285 IPACT_REFHOLD(ixa->ixa_ipsec_action); 286 } 287 if (ixa->ixa_ipsec_latch != NULL) { 288 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch; 289 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 290 } 291 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0]; 292 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1]; 293 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port; 294 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port; 295 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type; 296 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code; 297 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf; 298 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0]; 299 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1]; 300 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2]; 301 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3]; 302 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0]; 303 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1]; 304 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2]; 305 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3]; 306 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx; 307 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx; 308 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto; 309 } 310 return (ixamp); 311 } 312 313 /* 314 * Extract the ip_xmit_attr_t from the mblk, checking that the 315 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is 316 * not the case. 317 * 318 * Otherwise ixa is updated. 319 * Caller needs to release references on the ixa by calling ixa_refrele() 320 * which will imediately call ixa_inactive to release the references. 321 */ 322 boolean_t 323 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa) 324 { 325 ixamblk_t *ixm; 326 netstack_t *ns; 327 ip_stack_t *ipst; 328 ill_t *ill; 329 nce_t *nce; 330 331 /* We assume the caller hasn't initialized ixa */ 332 bzero(ixa, sizeof (*ixa)); 333 334 ASSERT(DB_TYPE(ixamp) == M_BREAK); 335 ASSERT(ixamp->b_cont == NULL); 336 337 ixm = (ixamblk_t *)ixamp->b_rptr; 338 ASSERT(!ixm->ixm_inbound); 339 340 /* Verify the netstack is still around */ 341 ns = netstack_find_by_stackid(ixm->ixm_stackid); 342 if (ns == NULL) { 343 /* Disappeared on us */ 344 (void) ip_xmit_attr_free_mblk(ixamp); 345 return (B_FALSE); 346 } 347 ipst = ns->netstack_ip; 348 349 /* Verify the ill is still around */ 350 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex, 351 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst); 352 353 /* We have the ill, hence the netstack can't go away */ 354 netstack_rele(ns); 355 if (ill == NULL) { 356 /* Disappeared on us */ 357 (void) ip_xmit_attr_free_mblk(ixamp); 358 return (B_FALSE); 359 } 360 /* 361 * Find the nce. We don't load-spread (only lookup nce's on the ill) 362 * because we want to find the same nce as the one we had when 363 * ip_xmit_attr_to_mblk was called. 364 */ 365 if (ixm->ixm_flags & IXAF_IS_IPV4) { 366 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4); 367 } else { 368 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6); 369 } 370 371 /* We have the nce, hence the ill can't go away */ 372 ill_refrele(ill); 373 if (nce == NULL) { 374 /* 375 * Since this is unusual and we don't know what type of 376 * nce it was, we drop the packet. 377 */ 378 (void) ip_xmit_attr_free_mblk(ixamp); 379 return (B_FALSE); 380 } 381 382 ixa->ixa_flags = ixm->ixm_flags; 383 ixa->ixa_refcnt = 1; 384 ixa->ixa_ipst = ipst; 385 ixa->ixa_fragsize = ixm->ixm_fragsize; 386 ixa->ixa_pktlen = ixm->ixm_pktlen; 387 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length; 388 ixa->ixa_protocol = ixm->ixm_protocol; 389 ixa->ixa_nce = nce; 390 ixa->ixa_postfragfn = ixm->ixm_postfragfn; 391 ixa->ixa_zoneid = ixm->ixm_zoneid; 392 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid; 393 ixa->ixa_scopeid = ixm->ixm_scopeid; 394 ixa->ixa_ident = ixm->ixm_ident; 395 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint; 396 397 if (ixm->ixm_tsl != NULL) { 398 ixa->ixa_tsl = ixm->ixm_tsl; 399 ixa->ixa_free_flags |= IXA_FREE_TSL; 400 ixm->ixm_tsl = NULL; 401 } 402 if (ixm->ixm_cred != NULL) { 403 ixa->ixa_cred = ixm->ixm_cred; 404 ixa->ixa_free_flags |= IXA_FREE_CRED; 405 ixm->ixm_cred = NULL; 406 } 407 ixa->ixa_cpid = ixm->ixm_cpid; 408 ixa->ixa_conn_id = ixm->ixm_conn_id; 409 410 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa; 411 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa; 412 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy; 413 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action; 414 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch; 415 416 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0]; 417 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1]; 418 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port; 419 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port; 420 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type; 421 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code; 422 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf; 423 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0]; 424 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1]; 425 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2]; 426 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3]; 427 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0]; 428 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1]; 429 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2]; 430 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3]; 431 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx; 432 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx; 433 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto; 434 435 freeb(ixamp); 436 return (B_TRUE); 437 } 438 439 /* 440 * Free the ixm mblk and any references it holds 441 * Returns b_cont. 442 */ 443 mblk_t * 444 ip_xmit_attr_free_mblk(mblk_t *ixamp) 445 { 446 ixamblk_t *ixm; 447 mblk_t *mp; 448 449 /* Consume mp */ 450 ASSERT(DB_TYPE(ixamp) == M_BREAK); 451 mp = ixamp->b_cont; 452 453 ixm = (ixamblk_t *)ixamp->b_rptr; 454 ASSERT(!ixm->ixm_inbound); 455 456 if (ixm->ixm_ipsec_ah_sa != NULL) { 457 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa); 458 ixm->ixm_ipsec_ah_sa = NULL; 459 } 460 if (ixm->ixm_ipsec_esp_sa != NULL) { 461 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa); 462 ixm->ixm_ipsec_esp_sa = NULL; 463 } 464 if (ixm->ixm_ipsec_policy != NULL) { 465 IPPOL_REFRELE(ixm->ixm_ipsec_policy); 466 ixm->ixm_ipsec_policy = NULL; 467 } 468 if (ixm->ixm_ipsec_action != NULL) { 469 IPACT_REFRELE(ixm->ixm_ipsec_action); 470 ixm->ixm_ipsec_action = NULL; 471 } 472 if (ixm->ixm_ipsec_latch) { 473 IPLATCH_REFRELE(ixm->ixm_ipsec_latch); 474 ixm->ixm_ipsec_latch = NULL; 475 } 476 477 if (ixm->ixm_tsl != NULL) { 478 label_rele(ixm->ixm_tsl); 479 ixm->ixm_tsl = NULL; 480 } 481 if (ixm->ixm_cred != NULL) { 482 crfree(ixm->ixm_cred); 483 ixm->ixm_cred = NULL; 484 } 485 freeb(ixamp); 486 return (mp); 487 } 488 489 /* 490 * Take the information in ip_recv_attr_t and stick it in an mblk 491 * that can later be passed to ip_recv_attr_from_mblk to recreate the 492 * ip_recv_attr_t. 493 * 494 * Returns NULL on memory allocation failure. 495 */ 496 mblk_t * 497 ip_recv_attr_to_mblk(ip_recv_attr_t *ira) 498 { 499 mblk_t *iramp; 500 iramblk_t *irm; 501 ill_t *ill = ira->ira_ill; 502 503 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0); 504 505 iramp = allocb(sizeof (*irm), BPRI_MED); 506 if (iramp == NULL) 507 return (NULL); 508 509 iramp->b_datap->db_type = M_BREAK; 510 iramp->b_wptr += sizeof (*irm); 511 irm = (iramblk_t *)iramp->b_rptr; 512 513 bzero(irm, sizeof (*irm)); 514 irm->irm_inbound = B_TRUE; 515 irm->irm_flags = ira->ira_flags; 516 if (ill != NULL) { 517 /* Internal to IP - preserve ip_stack_t, ill and rill */ 518 irm->irm_stackid = 519 ill->ill_ipst->ips_netstack->netstack_stackid; 520 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 521 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex == 522 ira->ira_rifindex); 523 } else { 524 /* Let ip_recv_attr_from_stackid know there isn't one */ 525 irm->irm_stackid = -1; 526 } 527 irm->irm_rifindex = ira->ira_rifindex; 528 irm->irm_ruifindex = ira->ira_ruifindex; 529 irm->irm_pktlen = ira->ira_pktlen; 530 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length; 531 irm->irm_protocol = ira->ira_protocol; 532 533 irm->irm_sqp = ira->ira_sqp; 534 irm->irm_ring = ira->ira_ring; 535 536 irm->irm_zoneid = ira->ira_zoneid; 537 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel; 538 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid; 539 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports; 540 541 if (ira->ira_tsl != NULL) { 542 irm->irm_tsl = ira->ira_tsl; 543 label_hold(irm->irm_tsl); 544 } 545 if (ira->ira_cred != NULL) { 546 irm->irm_cred = ira->ira_cred; 547 crhold(ira->ira_cred); 548 } 549 irm->irm_cpid = ira->ira_cpid; 550 551 if (ira->ira_flags & IRAF_L2SRC_SET) 552 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE); 553 554 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 555 if (ira->ira_ipsec_ah_sa != NULL) { 556 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa; 557 IPSA_REFHOLD(ira->ira_ipsec_ah_sa); 558 } 559 if (ira->ira_ipsec_esp_sa != NULL) { 560 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa; 561 IPSA_REFHOLD(ira->ira_ipsec_esp_sa); 562 } 563 if (ira->ira_ipsec_action != NULL) { 564 irm->irm_ipsec_action = ira->ira_ipsec_action; 565 IPACT_REFHOLD(ira->ira_ipsec_action); 566 } 567 } 568 return (iramp); 569 } 570 571 /* 572 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP 573 * then irm_stackid is not -1, in which case we check that the 574 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is 575 * not the case. 576 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter) 577 * and we just proceed with ira_ill and ira_rill as NULL. 578 * 579 * The caller needs to release any references on the pointers inside the ire 580 * by calling ira_cleanup. 581 */ 582 boolean_t 583 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira) 584 { 585 iramblk_t *irm; 586 netstack_t *ns; 587 ip_stack_t *ipst = NULL; 588 ill_t *ill = NULL, *rill = NULL; 589 590 /* We assume the caller hasn't initialized ira */ 591 bzero(ira, sizeof (*ira)); 592 593 ASSERT(DB_TYPE(iramp) == M_BREAK); 594 ASSERT(iramp->b_cont == NULL); 595 596 irm = (iramblk_t *)iramp->b_rptr; 597 ASSERT(irm->irm_inbound); 598 599 if (irm->irm_stackid != -1) { 600 /* Verify the netstack is still around */ 601 ns = netstack_find_by_stackid(irm->irm_stackid); 602 if (ns == NULL) { 603 /* Disappeared on us */ 604 (void) ip_recv_attr_free_mblk(iramp); 605 return (B_FALSE); 606 } 607 ipst = ns->netstack_ip; 608 609 /* Verify the ill is still around */ 610 ill = ill_lookup_on_ifindex(irm->irm_ifindex, 611 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 612 613 if (irm->irm_ifindex == irm->irm_rifindex) { 614 rill = ill; 615 } else { 616 rill = ill_lookup_on_ifindex(irm->irm_rifindex, 617 !(irm->irm_flags & IRAF_IS_IPV4), ipst); 618 } 619 620 /* We have the ill, hence the netstack can't go away */ 621 netstack_rele(ns); 622 if (ill == NULL || rill == NULL) { 623 /* Disappeared on us */ 624 if (ill != NULL) 625 ill_refrele(ill); 626 if (rill != NULL && rill != ill) 627 ill_refrele(rill); 628 (void) ip_recv_attr_free_mblk(iramp); 629 return (B_FALSE); 630 } 631 } 632 633 ira->ira_flags = irm->irm_flags; 634 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */ 635 ira->ira_ill = ill; 636 ira->ira_rill = rill; 637 638 ira->ira_rifindex = irm->irm_rifindex; 639 ira->ira_ruifindex = irm->irm_ruifindex; 640 ira->ira_pktlen = irm->irm_pktlen; 641 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length; 642 ira->ira_protocol = irm->irm_protocol; 643 644 ira->ira_sqp = irm->irm_sqp; 645 /* The rest of IP assumes that the rings never go away. */ 646 ira->ira_ring = irm->irm_ring; 647 648 ira->ira_zoneid = irm->irm_zoneid; 649 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel; 650 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid; 651 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports; 652 653 if (irm->irm_tsl != NULL) { 654 ira->ira_tsl = irm->irm_tsl; 655 ira->ira_free_flags |= IRA_FREE_TSL; 656 irm->irm_tsl = NULL; 657 } 658 if (irm->irm_cred != NULL) { 659 ira->ira_cred = irm->irm_cred; 660 ira->ira_free_flags |= IRA_FREE_CRED; 661 irm->irm_cred = NULL; 662 } 663 ira->ira_cpid = irm->irm_cpid; 664 665 if (ira->ira_flags & IRAF_L2SRC_SET) 666 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE); 667 668 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa; 669 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa; 670 ira->ira_ipsec_action = irm->irm_ipsec_action; 671 672 freeb(iramp); 673 return (B_TRUE); 674 } 675 676 /* 677 * Free the irm mblk and any references it holds 678 * Returns b_cont. 679 */ 680 mblk_t * 681 ip_recv_attr_free_mblk(mblk_t *iramp) 682 { 683 iramblk_t *irm; 684 mblk_t *mp; 685 686 /* Consume mp */ 687 ASSERT(DB_TYPE(iramp) == M_BREAK); 688 mp = iramp->b_cont; 689 690 irm = (iramblk_t *)iramp->b_rptr; 691 ASSERT(irm->irm_inbound); 692 693 if (irm->irm_ipsec_ah_sa != NULL) { 694 IPSA_REFRELE(irm->irm_ipsec_ah_sa); 695 irm->irm_ipsec_ah_sa = NULL; 696 } 697 if (irm->irm_ipsec_esp_sa != NULL) { 698 IPSA_REFRELE(irm->irm_ipsec_esp_sa); 699 irm->irm_ipsec_esp_sa = NULL; 700 } 701 if (irm->irm_ipsec_action != NULL) { 702 IPACT_REFRELE(irm->irm_ipsec_action); 703 irm->irm_ipsec_action = NULL; 704 } 705 if (irm->irm_tsl != NULL) { 706 label_rele(irm->irm_tsl); 707 irm->irm_tsl = NULL; 708 } 709 if (irm->irm_cred != NULL) { 710 crfree(irm->irm_cred); 711 irm->irm_cred = NULL; 712 } 713 714 freeb(iramp); 715 return (mp); 716 } 717 718 /* 719 * Returns true if the mblk contains an ip_recv_attr_t 720 * For now we just check db_type. 721 */ 722 boolean_t 723 ip_recv_attr_is_mblk(mblk_t *mp) 724 { 725 /* 726 * Need to handle the various forms of tcp_timermp which are tagged 727 * with b_wptr and might have a NULL b_datap. 728 */ 729 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1) 730 return (B_FALSE); 731 732 #ifdef DEBUG 733 iramblk_t *irm; 734 735 if (DB_TYPE(mp) != M_BREAK) 736 return (B_FALSE); 737 738 irm = (iramblk_t *)mp->b_rptr; 739 ASSERT(irm->irm_inbound); 740 return (B_TRUE); 741 #else 742 return (DB_TYPE(mp) == M_BREAK); 743 #endif 744 } 745 746 static ip_xmit_attr_t * 747 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag) 748 { 749 ip_xmit_attr_t *ixa; 750 ip_xmit_attr_t *oldixa; 751 752 mutex_enter(&connp->conn_lock); 753 ixa = connp->conn_ixa; 754 755 /* At least one references for the conn_t */ 756 ASSERT(ixa->ixa_refcnt >= 1); 757 if (atomic_inc_32_nv(&ixa->ixa_refcnt) == 2) { 758 /* No other thread using conn_ixa */ 759 mutex_exit(&connp->conn_lock); 760 return (ixa); 761 } 762 ixa = kmem_alloc(sizeof (*ixa), kmflag); 763 if (ixa == NULL) { 764 mutex_exit(&connp->conn_lock); 765 ixa_refrele(connp->conn_ixa); 766 return (NULL); 767 } 768 ixa_safe_copy(connp->conn_ixa, ixa); 769 770 /* Make sure we drop conn_lock before any refrele */ 771 if (replace) { 772 ixa->ixa_refcnt++; /* No atomic needed - not visible */ 773 oldixa = connp->conn_ixa; 774 connp->conn_ixa = ixa; 775 mutex_exit(&connp->conn_lock); 776 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */ 777 } else { 778 oldixa = connp->conn_ixa; 779 mutex_exit(&connp->conn_lock); 780 } 781 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */ 782 783 return (ixa); 784 } 785 786 /* 787 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only 788 * the caller can access the ip_xmit_attr_t. 789 * 790 * If nobody else is using conn_ixa we return it. 791 * Otherwise we make a "safe" copy of conn_ixa 792 * and return it. The "safe" copy has the pointers set to NULL 793 * (since the pointers might be changed by another thread using 794 * conn_ixa). The caller needs to check for NULL pointers to see 795 * if ip_set_destination needs to be called to re-establish the pointers. 796 * 797 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t. 798 * That is used when we connect() the ULP. 799 */ 800 ip_xmit_attr_t * 801 conn_get_ixa(conn_t *connp, boolean_t replace) 802 { 803 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP)); 804 } 805 806 /* 807 * Used only when the option is to have the kernel hang due to not 808 * cleaning up ixa references on ills etc. 809 */ 810 ip_xmit_attr_t * 811 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace) 812 { 813 return (conn_get_ixa_impl(connp, replace, KM_SLEEP)); 814 } 815 816 /* 817 * Replace conn_ixa with the ixa argument. 818 * 819 * The caller must hold conn_lock. 820 * 821 * We return the old ixa; the caller must ixa_refrele that after conn_lock 822 * has been dropped. 823 */ 824 ip_xmit_attr_t * 825 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa) 826 { 827 ip_xmit_attr_t *oldixa; 828 829 ASSERT(MUTEX_HELD(&connp->conn_lock)); 830 831 oldixa = connp->conn_ixa; 832 IXA_REFHOLD(ixa); 833 ixa->ixa_conn_id = oldixa->ixa_conn_id; 834 connp->conn_ixa = ixa; 835 return (oldixa); 836 } 837 838 /* 839 * Return a ip_xmit_attr_t to use with a conn_t that is based on but 840 * separate from conn_ixa. 841 * 842 * This "safe" copy has the pointers set to NULL 843 * (since the pointers might be changed by another thread using 844 * conn_ixa). The caller needs to check for NULL pointers to see 845 * if ip_set_destination needs to be called to re-establish the pointers. 846 */ 847 ip_xmit_attr_t * 848 conn_get_ixa_exclusive(conn_t *connp) 849 { 850 ip_xmit_attr_t *ixa; 851 852 mutex_enter(&connp->conn_lock); 853 ixa = connp->conn_ixa; 854 855 /* At least one references for the conn_t */ 856 ASSERT(ixa->ixa_refcnt >= 1); 857 858 /* Make sure conn_ixa doesn't disappear while we copy it */ 859 atomic_inc_32(&ixa->ixa_refcnt); 860 861 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 862 if (ixa == NULL) { 863 mutex_exit(&connp->conn_lock); 864 ixa_refrele(connp->conn_ixa); 865 return (NULL); 866 } 867 ixa_safe_copy(connp->conn_ixa, ixa); 868 mutex_exit(&connp->conn_lock); 869 IXA_REFRELE(connp->conn_ixa); 870 return (ixa); 871 } 872 873 void 874 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa) 875 { 876 bcopy(src, ixa, sizeof (*ixa)); 877 ixa->ixa_refcnt = 1; 878 /* 879 * Clear any pointers that have references and might be changed 880 * by ip_set_destination or the ULP 881 */ 882 ixa->ixa_ire = NULL; 883 ixa->ixa_nce = NULL; 884 ixa->ixa_dce = NULL; 885 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 886 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 887 #ifdef DEBUG 888 ixa->ixa_curthread = NULL; 889 #endif 890 /* Clear all the IPsec pointers and the flag as well. */ 891 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE; 892 893 ixa->ixa_ipsec_latch = NULL; 894 ixa->ixa_ipsec_ah_sa = NULL; 895 ixa->ixa_ipsec_esp_sa = NULL; 896 ixa->ixa_ipsec_policy = NULL; 897 ixa->ixa_ipsec_action = NULL; 898 899 /* 900 * We leave ixa_tsl unchanged, but if it has a refhold we need 901 * to get an extra refhold. 902 */ 903 if (ixa->ixa_free_flags & IXA_FREE_TSL) 904 label_hold(ixa->ixa_tsl); 905 906 /* 907 * We leave ixa_cred unchanged, but if it has a refhold we need 908 * to get an extra refhold. 909 */ 910 if (ixa->ixa_free_flags & IXA_FREE_CRED) 911 crhold(ixa->ixa_cred); 912 913 /* 914 * There is no cleanup in progress on this new copy. 915 */ 916 ixa->ixa_tcpcleanup = IXATC_IDLE; 917 } 918 919 /* 920 * Duplicate an ip_xmit_attr_t. 921 * Assumes that the caller controls the ixa, hence we do not need to use 922 * a safe copy. We just have to increase the refcnt on any pointers. 923 */ 924 ip_xmit_attr_t * 925 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa) 926 { 927 ip_xmit_attr_t *ixa; 928 929 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP); 930 if (ixa == NULL) 931 return (NULL); 932 bcopy(src_ixa, ixa, sizeof (*ixa)); 933 ixa->ixa_refcnt = 1; 934 935 if (ixa->ixa_ire != NULL) 936 ire_refhold_notr(ixa->ixa_ire); 937 if (ixa->ixa_nce != NULL) 938 nce_refhold(ixa->ixa_nce); 939 if (ixa->ixa_dce != NULL) 940 dce_refhold_notr(ixa->ixa_dce); 941 942 #ifdef DEBUG 943 ixa->ixa_curthread = NULL; 944 #endif 945 946 if (ixa->ixa_ipsec_latch != NULL) 947 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch); 948 if (ixa->ixa_ipsec_ah_sa != NULL) 949 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa); 950 if (ixa->ixa_ipsec_esp_sa != NULL) 951 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa); 952 if (ixa->ixa_ipsec_policy != NULL) 953 IPPOL_REFHOLD(ixa->ixa_ipsec_policy); 954 if (ixa->ixa_ipsec_action != NULL) 955 IPACT_REFHOLD(ixa->ixa_ipsec_action); 956 957 if (ixa->ixa_tsl != NULL) { 958 label_hold(ixa->ixa_tsl); 959 ixa->ixa_free_flags |= IXA_FREE_TSL; 960 } 961 if (ixa->ixa_cred != NULL) { 962 crhold(ixa->ixa_cred); 963 ixa->ixa_free_flags |= IXA_FREE_CRED; 964 } 965 return (ixa); 966 } 967 968 /* 969 * Used to replace the ixa_label field. 970 * The caller should have a reference on the label, which we transfer to 971 * the attributes so that when the attribute is freed/cleaned up 972 * we will release that reference. 973 */ 974 void 975 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl) 976 { 977 ASSERT(tsl != NULL); 978 979 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 980 ASSERT(ixa->ixa_tsl != NULL); 981 label_rele(ixa->ixa_tsl); 982 } else { 983 ixa->ixa_free_flags |= IXA_FREE_TSL; 984 } 985 ixa->ixa_tsl = tsl; 986 } 987 988 /* 989 * Replace the ip_recv_attr_t's label. 990 * Due to kernel RPC's use of db_credp we also need to replace ira_cred; 991 * TCP/UDP uses ira_cred to set db_credp for non-socket users. 992 * This can fail (and return B_FALSE) due to lack of memory. 993 */ 994 boolean_t 995 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl) 996 { 997 cred_t *newcr; 998 999 if (ira->ira_free_flags & IRA_FREE_TSL) { 1000 ASSERT(ira->ira_tsl != NULL); 1001 label_rele(ira->ira_tsl); 1002 } 1003 label_hold(tsl); 1004 ira->ira_tsl = tsl; 1005 ira->ira_free_flags |= IRA_FREE_TSL; 1006 1007 /* 1008 * Reset zoneid if we have a shared address. That allows 1009 * ip_fanout_tx_v4/v6 to determine the zoneid again. 1010 */ 1011 if (ira->ira_flags & IRAF_TX_SHARED_ADDR) 1012 ira->ira_zoneid = ALL_ZONES; 1013 1014 /* We update ira_cred for RPC */ 1015 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP); 1016 if (newcr == NULL) 1017 return (B_FALSE); 1018 if (ira->ira_free_flags & IRA_FREE_CRED) 1019 crfree(ira->ira_cred); 1020 ira->ira_cred = newcr; 1021 ira->ira_free_flags |= IRA_FREE_CRED; 1022 return (B_TRUE); 1023 } 1024 1025 /* 1026 * This needs to be called after ip_set_destination/tsol_check_dest might 1027 * have changed ixa_tsl to be specific for a destination, and we now want to 1028 * send to a different destination. 1029 * We have to restart with crgetlabel() since ip_set_destination/ 1030 * tsol_check_dest will start with ixa_tsl. 1031 */ 1032 void 1033 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr) 1034 { 1035 if (!is_system_labeled()) 1036 return; 1037 1038 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1039 ASSERT(ixa->ixa_tsl != NULL); 1040 label_rele(ixa->ixa_tsl); 1041 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1042 } 1043 ixa->ixa_tsl = crgetlabel(cr); 1044 } 1045 1046 void 1047 ixa_refrele(ip_xmit_attr_t *ixa) 1048 { 1049 IXA_REFRELE(ixa); 1050 } 1051 1052 void 1053 ixa_inactive(ip_xmit_attr_t *ixa) 1054 { 1055 ASSERT(ixa->ixa_refcnt == 0); 1056 1057 ixa_cleanup(ixa); 1058 kmem_free(ixa, sizeof (*ixa)); 1059 } 1060 1061 /* 1062 * Release any references contained in the ixa. 1063 * Also clear any fields that are not controlled by ixa_flags. 1064 */ 1065 void 1066 ixa_cleanup(ip_xmit_attr_t *ixa) 1067 { 1068 if (ixa->ixa_ire != NULL) { 1069 ire_refrele_notr(ixa->ixa_ire); 1070 ixa->ixa_ire = NULL; 1071 } 1072 if (ixa->ixa_dce != NULL) { 1073 dce_refrele_notr(ixa->ixa_dce); 1074 ixa->ixa_dce = NULL; 1075 } 1076 if (ixa->ixa_nce != NULL) { 1077 nce_refrele(ixa->ixa_nce); 1078 ixa->ixa_nce = NULL; 1079 } 1080 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1081 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1082 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) { 1083 ipsec_out_release_refs(ixa); 1084 } 1085 if (ixa->ixa_free_flags & IXA_FREE_TSL) { 1086 ASSERT(ixa->ixa_tsl != NULL); 1087 label_rele(ixa->ixa_tsl); 1088 ixa->ixa_free_flags &= ~IXA_FREE_TSL; 1089 } 1090 ixa->ixa_tsl = NULL; 1091 if (ixa->ixa_free_flags & IXA_FREE_CRED) { 1092 ASSERT(ixa->ixa_cred != NULL); 1093 crfree(ixa->ixa_cred); 1094 ixa->ixa_free_flags &= ~IXA_FREE_CRED; 1095 } 1096 ixa->ixa_cred = NULL; 1097 ixa->ixa_src_preferences = 0; 1098 ixa->ixa_ifindex = 0; 1099 ixa->ixa_multicast_ifindex = 0; 1100 ixa->ixa_multicast_ifaddr = INADDR_ANY; 1101 } 1102 1103 /* 1104 * Release any references contained in the ira. 1105 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second 1106 * argument. 1107 */ 1108 void 1109 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill) 1110 { 1111 if (ira->ira_ill != NULL) { 1112 if (ira->ira_rill != ira->ira_ill) { 1113 /* Caused by async processing */ 1114 ill_refrele(ira->ira_rill); 1115 } 1116 if (refrele_ill) 1117 ill_refrele(ira->ira_ill); 1118 } 1119 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1120 ipsec_in_release_refs(ira); 1121 } 1122 if (ira->ira_free_flags & IRA_FREE_TSL) { 1123 ASSERT(ira->ira_tsl != NULL); 1124 label_rele(ira->ira_tsl); 1125 ira->ira_free_flags &= ~IRA_FREE_TSL; 1126 } 1127 ira->ira_tsl = NULL; 1128 if (ira->ira_free_flags & IRA_FREE_CRED) { 1129 ASSERT(ira->ira_cred != NULL); 1130 crfree(ira->ira_cred); 1131 ira->ira_free_flags &= ~IRA_FREE_CRED; 1132 } 1133 ira->ira_cred = NULL; 1134 } 1135 1136 /* 1137 * Function to help release any IRE, NCE, or DCEs that 1138 * have been deleted and are marked as condemned. 1139 * The caller is responsible for any serialization which is different 1140 * for TCP, SCTP, and others. 1141 */ 1142 static void 1143 ixa_cleanup_stale(ip_xmit_attr_t *ixa) 1144 { 1145 ire_t *ire; 1146 nce_t *nce; 1147 dce_t *dce; 1148 1149 ire = ixa->ixa_ire; 1150 nce = ixa->ixa_nce; 1151 dce = ixa->ixa_dce; 1152 1153 if (ire != NULL && IRE_IS_CONDEMNED(ire)) { 1154 ire_refrele_notr(ire); 1155 ire = ire_blackhole(ixa->ixa_ipst, 1156 !(ixa->ixa_flags & IXAF_IS_IPV4)); 1157 ASSERT(ire != NULL); 1158 #ifdef DEBUG 1159 ire_refhold_notr(ire); 1160 ire_refrele(ire); 1161 #endif 1162 ixa->ixa_ire = ire; 1163 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1164 } 1165 if (nce != NULL && nce->nce_is_condemned) { 1166 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */ 1167 nce_refrele(nce); 1168 ixa->ixa_nce = NULL; 1169 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 1170 } 1171 if (dce != NULL && DCE_IS_CONDEMNED(dce)) { 1172 dce_refrele_notr(dce); 1173 dce = dce_get_default(ixa->ixa_ipst); 1174 ASSERT(dce != NULL); 1175 #ifdef DEBUG 1176 dce_refhold_notr(dce); 1177 dce_refrele(dce); 1178 #endif 1179 ixa->ixa_dce = dce; 1180 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 1181 } 1182 } 1183 1184 static mblk_t * 1185 tcp_ixa_cleanup_getmblk(conn_t *connp) 1186 { 1187 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1188 int need_retry; 1189 mblk_t *mp; 1190 1191 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1192 1193 /* 1194 * It's possible that someone else came in and started cleaning up 1195 * another connection between the time we verified this one is not being 1196 * cleaned up and the time we actually get the shared mblk. If that's 1197 * the case, we've dropped the lock, and some other thread may have 1198 * cleaned up this connection again, and is still waiting for 1199 * notification of that cleanup's completion. Therefore we need to 1200 * recheck. 1201 */ 1202 do { 1203 need_retry = 0; 1204 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) { 1205 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1206 &tcps->tcps_ixa_cleanup_lock); 1207 } 1208 1209 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) { 1210 /* 1211 * Multiple concurrent cleanups; need to have the last 1212 * one run since it could be an unplumb. 1213 */ 1214 need_retry = 1; 1215 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv, 1216 &tcps->tcps_ixa_cleanup_lock); 1217 } 1218 } while (need_retry); 1219 1220 /* 1221 * We now have the lock and the mblk; now make sure that no one else can 1222 * try to clean up this connection or enqueue it for cleanup, clear the 1223 * mblk pointer for this stack, drop the lock, and return the mblk. 1224 */ 1225 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock)); 1226 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE); 1227 ASSERT(tcps->tcps_ixa_cleanup_mp == mp); 1228 ASSERT(mp != NULL); 1229 1230 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS; 1231 tcps->tcps_ixa_cleanup_mp = NULL; 1232 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1233 1234 return (mp); 1235 } 1236 1237 /* 1238 * Used to run ixa_cleanup_stale inside the tcp squeue. 1239 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp 1240 * and waking up the caller. 1241 */ 1242 /* ARGSUSED2 */ 1243 static void 1244 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2, 1245 ip_recv_attr_t *dummy) 1246 { 1247 conn_t *connp = (conn_t *)arg; 1248 tcp_stack_t *tcps; 1249 1250 tcps = connp->conn_netstack->netstack_tcp; 1251 1252 ixa_cleanup_stale(connp->conn_ixa); 1253 1254 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1255 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL); 1256 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE; 1257 tcps->tcps_ixa_cleanup_mp = mp; 1258 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv); 1259 /* 1260 * It is possible for any number of threads to be waiting for cleanup of 1261 * different connections. Absent a per-connection (or per-IXA) CV, we 1262 * need to wake them all up even though only one can be waiting on this 1263 * particular cleanup. 1264 */ 1265 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1266 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1267 } 1268 1269 static void 1270 tcp_ixa_cleanup_wait_and_finish(conn_t *connp) 1271 { 1272 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp; 1273 1274 mutex_enter(&tcps->tcps_ixa_cleanup_lock); 1275 1276 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE); 1277 1278 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) { 1279 cv_wait(&tcps->tcps_ixa_cleanup_done_cv, 1280 &tcps->tcps_ixa_cleanup_lock); 1281 } 1282 1283 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE); 1284 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE; 1285 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv); 1286 1287 mutex_exit(&tcps->tcps_ixa_cleanup_lock); 1288 } 1289 1290 /* 1291 * ipcl_walk() function to help release any IRE, NCE, or DCEs that 1292 * have been deleted and are marked as condemned. 1293 * Note that we can't cleanup the pointers since there can be threads 1294 * in conn_ip_output() sending while we are called. 1295 */ 1296 void 1297 conn_ixa_cleanup(conn_t *connp, void *arg) 1298 { 1299 boolean_t tryhard = (boolean_t)arg; 1300 1301 if (IPCL_IS_TCP(connp)) { 1302 mblk_t *mp; 1303 1304 mp = tcp_ixa_cleanup_getmblk(connp); 1305 1306 if (connp->conn_sqp->sq_run == curthread) { 1307 /* Already on squeue */ 1308 tcp_ixa_cleanup(connp, mp, NULL, NULL); 1309 } else { 1310 CONN_INC_REF(connp); 1311 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup, 1312 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP); 1313 } 1314 tcp_ixa_cleanup_wait_and_finish(connp); 1315 } else if (IPCL_IS_SCTP(connp)) { 1316 sctp_t *sctp; 1317 sctp_faddr_t *fp; 1318 1319 sctp = CONN2SCTP(connp); 1320 RUN_SCTP(sctp); 1321 ixa_cleanup_stale(connp->conn_ixa); 1322 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) 1323 ixa_cleanup_stale(fp->sf_ixa); 1324 WAKE_SCTP(sctp); 1325 } else { 1326 ip_xmit_attr_t *ixa; 1327 1328 /* 1329 * If there is a different thread using conn_ixa then we get a 1330 * new copy and cut the old one loose from conn_ixa. Otherwise 1331 * we use conn_ixa and prevent any other thread from 1332 * using/changing it. Anybody using conn_ixa (e.g., a thread in 1333 * conn_ip_output) will do an ixa_refrele which will remove any 1334 * references on the ire etc. 1335 * 1336 * Once we are done other threads can use conn_ixa since the 1337 * refcnt will be back at one. 1338 * 1339 * We are called either because an ill is going away, or 1340 * due to memory reclaim. In the former case we wait for 1341 * memory since we must remove the refcnts on the ill. 1342 */ 1343 if (tryhard) { 1344 ixa = conn_get_ixa_tryhard(connp, B_TRUE); 1345 ASSERT(ixa != NULL); 1346 } else { 1347 ixa = conn_get_ixa(connp, B_TRUE); 1348 if (ixa == NULL) { 1349 /* 1350 * Somebody else was using it and kmem_alloc 1351 * failed! Next memory reclaim will try to 1352 * clean up. 1353 */ 1354 DTRACE_PROBE1(conn__ixa__cleanup__bail, 1355 conn_t *, connp); 1356 return; 1357 } 1358 } 1359 ixa_cleanup_stale(ixa); 1360 ixa_refrele(ixa); 1361 } 1362 } 1363 1364 /* 1365 * ixa needs to be an exclusive copy so that no one changes the cookie 1366 * or the ixa_nce. 1367 */ 1368 boolean_t 1369 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa) 1370 { 1371 uintptr_t cookie = ixa->ixa_cookie; 1372 ill_dld_direct_t *idd; 1373 idl_tx_list_t *idl_txl; 1374 ill_t *ill = ixa->ixa_nce->nce_ill; 1375 boolean_t inserted = B_FALSE; 1376 1377 idd = &(ill)->ill_dld_capab->idc_direct; 1378 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 1379 mutex_enter(&idl_txl->txl_lock); 1380 1381 /* 1382 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow 1383 * control is asserted on an ill that does not support direct calls. 1384 * Jump to insert. 1385 */ 1386 if (cookie == 0) 1387 goto tryinsert; 1388 1389 ASSERT(ILL_DIRECT_CAPABLE(ill)); 1390 1391 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) { 1392 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie); 1393 } else if (idl_txl->txl_cookie != NULL && 1394 idl_txl->txl_cookie != ixa->ixa_cookie) { 1395 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie, 1396 uintptr_t, idl_txl->txl_cookie); 1397 /* TODO: bump kstat for cookie collision */ 1398 } else { 1399 /* 1400 * Check/set conn_blocked under conn_lock. Note that txl_lock 1401 * will not suffice since two separate UDP threads may be 1402 * racing to send to different destinations that are 1403 * associated with different cookies and thus may not be 1404 * holding the same txl_lock. Further, since a given conn_t 1405 * can only be on a single drain list, the conn_t will be 1406 * enqueued on whichever thread wins this race. 1407 */ 1408 tryinsert: mutex_enter(&connp->conn_lock); 1409 if (connp->conn_blocked) { 1410 DTRACE_PROBE1(ill__tx__conn__already__blocked, 1411 conn_t *, connp); 1412 mutex_exit(&connp->conn_lock); 1413 } else { 1414 connp->conn_blocked = B_TRUE; 1415 mutex_exit(&connp->conn_lock); 1416 idl_txl->txl_cookie = cookie; 1417 conn_drain_insert(connp, idl_txl); 1418 if (!IPCL_IS_NONSTR(connp)) 1419 noenable(connp->conn_wq); 1420 inserted = B_TRUE; 1421 } 1422 } 1423 mutex_exit(&idl_txl->txl_lock); 1424 return (inserted); 1425 } 1426