xref: /titanic_52/usr/src/uts/common/inet/ip/ip_attr.c (revision 3f7d54a6b84904c8f4d8daa4c7b577bede7df8b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #include <sys/zone.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/atomic.h>
37 
38 #include <sys/systm.h>
39 #include <sys/param.h>
40 #include <sys/kmem.h>
41 #include <sys/sdt.h>
42 #include <sys/socket.h>
43 #include <sys/mac.h>
44 #include <net/if.h>
45 #include <net/if_arp.h>
46 #include <net/route.h>
47 #include <sys/sockio.h>
48 #include <netinet/in.h>
49 #include <net/if_dl.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 #include <inet/mib2.h>
54 #include <inet/nd.h>
55 #include <inet/arp.h>
56 #include <inet/snmpcom.h>
57 #include <inet/kstatcom.h>
58 
59 #include <netinet/igmp_var.h>
60 #include <netinet/ip6.h>
61 #include <netinet/icmp6.h>
62 #include <netinet/sctp.h>
63 
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ip6.h>
67 #include <inet/ip6_asp.h>
68 #include <inet/tcp.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_if.h>
71 #include <inet/ip_ire.h>
72 #include <inet/ip_ftable.h>
73 #include <inet/ip_rts.h>
74 #include <inet/optcom.h>
75 #include <inet/ip_ndp.h>
76 #include <inet/ip_listutils.h>
77 #include <netinet/igmp.h>
78 #include <netinet/ip_mroute.h>
79 #include <inet/ipp_common.h>
80 
81 #include <net/pfkeyv2.h>
82 #include <inet/sadb.h>
83 #include <inet/ipsec_impl.h>
84 #include <inet/ipdrop.h>
85 #include <inet/ip_netinfo.h>
86 #include <sys/squeue_impl.h>
87 #include <sys/squeue.h>
88 
89 #include <inet/ipclassifier.h>
90 #include <inet/sctp_ip.h>
91 #include <inet/sctp/sctp_impl.h>
92 #include <inet/udp_impl.h>
93 #include <sys/sunddi.h>
94 
95 #include <sys/tsol/label.h>
96 #include <sys/tsol/tnet.h>
97 
98 /*
99  * Release a reference on ip_xmit_attr.
100  * The reference is acquired by conn_get_ixa()
101  */
102 #define	IXA_REFRELE(ixa)					\
103 {								\
104 	if (atomic_add_32_nv(&(ixa)->ixa_refcnt, -1) == 0)	\
105 		ixa_inactive(ixa);				\
106 }
107 
108 #define	IXA_REFHOLD(ixa)					\
109 {								\
110 	ASSERT((ixa)->ixa_refcnt != 0);				\
111 	atomic_add_32(&(ixa)->ixa_refcnt, 1);			\
112 }
113 
114 /*
115  * When we need to handle a transmit side asynchronous operation, then we need
116  * to save sufficient information so that we can call the fragment and postfrag
117  * functions. That information is captured in an mblk containing this structure.
118  *
119  * Since this is currently only used for IPsec, we include information for
120  * the kernel crypto framework.
121  */
122 typedef struct ixamblk_s {
123 	boolean_t	ixm_inbound;	/* B_FALSE */
124 	iaflags_t	ixm_flags;	/* ixa_flags */
125 	netstackid_t	ixm_stackid;	/* Verify it didn't go away */
126 	uint_t		ixm_ifindex;	/* Used to find the nce */
127 	in6_addr_t	ixm_nceaddr_v6;	/* Used to find nce */
128 #define	ixm_nceaddr_v4	V4_PART_OF_V6(ixm_nceaddr_v6)
129 	uint32_t	ixm_fragsize;
130 	uint_t		ixm_pktlen;
131 	uint16_t	ixm_ip_hdr_length; /* Points to ULP header */
132 	uint8_t		ixm_protocol;	/* Protocol number for ULP cksum */
133 	pfirepostfrag_t	ixm_postfragfn;
134 
135 	zoneid_t	ixm_zoneid;		/* Needed for ipobs */
136 	zoneid_t	ixm_no_loop_zoneid;	/* IXAF_NO_LOOP_ZONEID_SET */
137 
138 	uint_t		ixm_scopeid;		/* For IPv6 link-locals */
139 
140 	uint32_t	ixm_ident;		/* For IPv6 fragment header */
141 	uint32_t	ixm_xmit_hint;
142 
143 	cred_t		*ixm_cred;	/* For getpeerucred - refhold if set */
144 	pid_t		ixm_cpid;	/* For getpeerucred */
145 
146 	ts_label_t	*ixm_tsl;	/* Refhold if set. */
147 
148 	/*
149 	 * When the pointers below are set they have a refhold on the struct.
150 	 */
151 	ipsec_latch_t		*ixm_ipsec_latch;
152 	struct ipsa_s		*ixm_ipsec_ah_sa;	/* SA for AH */
153 	struct ipsa_s		*ixm_ipsec_esp_sa;	/* SA for ESP */
154 	struct ipsec_policy_s 	*ixm_ipsec_policy;	/* why are we here? */
155 	struct ipsec_action_s	*ixm_ipsec_action; /* For reflected packets */
156 
157 	ipsa_ref_t		ixm_ipsec_ref[2]; /* Soft reference to SA */
158 
159 	/* Need these while waiting for SA */
160 	uint16_t ixm_ipsec_src_port;	/* Source port number of d-gram. */
161 	uint16_t ixm_ipsec_dst_port;	/* Destination port number of d-gram. */
162 	uint8_t  ixm_ipsec_icmp_type;	/* ICMP type of d-gram */
163 	uint8_t  ixm_ipsec_icmp_code;	/* ICMP code of d-gram */
164 
165 	sa_family_t ixm_ipsec_inaf;	/* Inner address family */
166 	uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN];	/* Inner src address */
167 	uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN];	/* Inner dest address */
168 	uint8_t  ixm_ipsec_insrcpfx;	/* Inner source prefix */
169 	uint8_t  ixm_ipsec_indstpfx;	/* Inner destination prefix */
170 
171 	uint8_t ixm_ipsec_proto;	/* IP protocol number for d-gram. */
172 } ixamblk_t;
173 
174 
175 /*
176  * When we need to handle a receive side asynchronous operation, then we need
177  * to save sufficient information so that we can call ip_fanout.
178  * That information is captured in an mblk containing this structure.
179  *
180  * Since this is currently only used for IPsec, we include information for
181  * the kernel crypto framework.
182  */
183 typedef struct iramblk_s {
184 	boolean_t	irm_inbound;	/* B_TRUE */
185 	iaflags_t	irm_flags;	/* ira_flags */
186 	netstackid_t	irm_stackid;	/* Verify it didn't go away */
187 	uint_t		irm_ifindex;	/* To find ira_ill */
188 
189 	uint_t		irm_rifindex;	/* ira_rifindex */
190 	uint_t		irm_ruifindex;	/* ira_ruifindex */
191 	uint_t		irm_pktlen;
192 	uint16_t	irm_ip_hdr_length; /* Points to ULP header */
193 	uint8_t		irm_protocol;	/* Protocol number for ULP cksum */
194 	zoneid_t	irm_zoneid;	/* ALL_ZONES unless local delivery */
195 
196 	squeue_t	*irm_sqp;
197 	ill_rx_ring_t	*irm_ring;
198 
199 	ipaddr_t	irm_mroute_tunnel;	/* IRAF_MROUTE_TUNNEL_SET */
200 	zoneid_t	irm_no_loop_zoneid;	/* IRAF_NO_LOOP_ZONEID_SET */
201 	uint32_t	irm_esp_udp_ports;	/* IRAF_ESP_UDP_PORTS */
202 
203 	char		irm_l2src[IRA_L2SRC_SIZE];	/* If IRAF_L2SRC_SET */
204 
205 	cred_t		*irm_cred;	/* For getpeerucred - refhold if set */
206 	pid_t		irm_cpid;	/* For getpeerucred */
207 
208 	ts_label_t	*irm_tsl;	/* Refhold if set. */
209 
210 	/*
211 	 * When set these correspond to a refhold on the object.
212 	 */
213 	struct ipsa_s		*irm_ipsec_ah_sa;	/* SA for AH */
214 	struct ipsa_s		*irm_ipsec_esp_sa;	/* SA for ESP */
215 	struct ipsec_action_s	*irm_ipsec_action; /* For reflected packets */
216 } iramblk_t;
217 
218 
219 /*
220  * Take the information in ip_xmit_attr_t and stick it in an mblk
221  * that can later be passed to ip_xmit_attr_from_mblk to recreate the
222  * ip_xmit_attr_t.
223  *
224  * Returns NULL on memory allocation failure.
225  */
226 mblk_t *
227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa)
228 {
229 	mblk_t		*ixamp;
230 	ixamblk_t	*ixm;
231 	nce_t		*nce = ixa->ixa_nce;
232 
233 	ASSERT(nce != NULL);
234 	ixamp = allocb(sizeof (*ixm), BPRI_MED);
235 	if (ixamp == NULL)
236 		return (NULL);
237 
238 	ixamp->b_datap->db_type = M_BREAK;
239 	ixamp->b_wptr += sizeof (*ixm);
240 	ixm = (ixamblk_t *)ixamp->b_rptr;
241 
242 	bzero(ixm, sizeof (*ixm));
243 	ixm->ixm_inbound = B_FALSE;
244 	ixm->ixm_flags = ixa->ixa_flags;
245 	ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid;
246 	ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex;
247 	ixm->ixm_nceaddr_v6 = nce->nce_addr;
248 	ixm->ixm_fragsize = ixa->ixa_fragsize;
249 	ixm->ixm_pktlen = ixa->ixa_pktlen;
250 	ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length;
251 	ixm->ixm_protocol = ixa->ixa_protocol;
252 	ixm->ixm_postfragfn = ixa->ixa_postfragfn;
253 	ixm->ixm_zoneid = ixa->ixa_zoneid;
254 	ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid;
255 	ixm->ixm_scopeid = ixa->ixa_scopeid;
256 	ixm->ixm_ident = ixa->ixa_ident;
257 	ixm->ixm_xmit_hint = ixa->ixa_xmit_hint;
258 
259 	if (ixa->ixa_tsl != NULL) {
260 		ixm->ixm_tsl = ixa->ixa_tsl;
261 		label_hold(ixm->ixm_tsl);
262 	}
263 	if (ixa->ixa_cred != NULL) {
264 		ixm->ixm_cred = ixa->ixa_cred;
265 		crhold(ixa->ixa_cred);
266 	}
267 	ixm->ixm_cpid = ixa->ixa_cpid;
268 
269 	if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
270 		if (ixa->ixa_ipsec_ah_sa != NULL) {
271 			ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa;
272 			IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
273 		}
274 		if (ixa->ixa_ipsec_esp_sa != NULL) {
275 			ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa;
276 			IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
277 		}
278 		if (ixa->ixa_ipsec_policy != NULL) {
279 			ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy;
280 			IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
281 		}
282 		if (ixa->ixa_ipsec_action != NULL) {
283 			ixm->ixm_ipsec_action = ixa->ixa_ipsec_action;
284 			IPACT_REFHOLD(ixa->ixa_ipsec_action);
285 		}
286 		if (ixa->ixa_ipsec_latch != NULL) {
287 			ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch;
288 			IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
289 		}
290 		ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0];
291 		ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1];
292 		ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port;
293 		ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port;
294 		ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type;
295 		ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code;
296 		ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf;
297 		ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0];
298 		ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1];
299 		ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2];
300 		ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3];
301 		ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0];
302 		ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1];
303 		ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2];
304 		ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3];
305 		ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx;
306 		ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx;
307 		ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto;
308 	}
309 	return (ixamp);
310 }
311 
312 /*
313  * Extract the ip_xmit_attr_t from the mblk, checking that the
314  * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is
315  * not the case.
316  *
317  * Otherwise ixa is updated.
318  * Caller needs to release references on the ixa by calling ixa_refrele()
319  * which will imediately call ixa_inactive to release the references.
320  */
321 boolean_t
322 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa)
323 {
324 	ixamblk_t	*ixm;
325 	netstack_t	*ns;
326 	ip_stack_t	*ipst;
327 	ill_t		*ill;
328 	nce_t		*nce;
329 
330 	/* We assume the caller hasn't initialized ixa */
331 	bzero(ixa, sizeof (*ixa));
332 
333 	ASSERT(DB_TYPE(ixamp) == M_BREAK);
334 	ASSERT(ixamp->b_cont == NULL);
335 
336 	ixm = (ixamblk_t *)ixamp->b_rptr;
337 	ASSERT(!ixm->ixm_inbound);
338 
339 	/* Verify the netstack is still around */
340 	ns = netstack_find_by_stackid(ixm->ixm_stackid);
341 	if (ns == NULL) {
342 		/* Disappeared on us */
343 		(void) ip_xmit_attr_free_mblk(ixamp);
344 		return (B_FALSE);
345 	}
346 	ipst = ns->netstack_ip;
347 
348 	/* Verify the ill is still around */
349 	ill = ill_lookup_on_ifindex(ixm->ixm_ifindex,
350 	    !(ixm->ixm_flags & IXAF_IS_IPV4), ipst);
351 
352 	/* We have the ill, hence the netstack can't go away */
353 	netstack_rele(ns);
354 	if (ill == NULL) {
355 		/* Disappeared on us */
356 		(void) ip_xmit_attr_free_mblk(ixamp);
357 		return (B_FALSE);
358 	}
359 	/*
360 	 * Find the nce. We don't load-spread (only lookup nce's on the ill)
361 	 * because we want to find the same nce as the one we had when
362 	 * ip_xmit_attr_to_mblk was called.
363 	 */
364 	if (ixm->ixm_flags & IXAF_IS_IPV4) {
365 		nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4);
366 	} else {
367 		nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6);
368 	}
369 
370 	/* We have the nce, hence the ill can't go away */
371 	ill_refrele(ill);
372 	if (nce == NULL) {
373 		/*
374 		 * Since this is unusual and we don't know what type of
375 		 * nce it was, we drop the packet.
376 		 */
377 		(void) ip_xmit_attr_free_mblk(ixamp);
378 		return (B_FALSE);
379 	}
380 
381 	ixa->ixa_flags = ixm->ixm_flags;
382 	ixa->ixa_refcnt = 1;
383 	ixa->ixa_ipst = ipst;
384 	ixa->ixa_fragsize = ixm->ixm_fragsize;
385 	ixa->ixa_pktlen =  ixm->ixm_pktlen;
386 	ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length;
387 	ixa->ixa_protocol = ixm->ixm_protocol;
388 	ixa->ixa_nce = nce;
389 	ixa->ixa_postfragfn = ixm->ixm_postfragfn;
390 	ixa->ixa_zoneid = ixm->ixm_zoneid;
391 	ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid;
392 	ixa->ixa_scopeid = ixm->ixm_scopeid;
393 	ixa->ixa_ident = ixm->ixm_ident;
394 	ixa->ixa_xmit_hint = ixm->ixm_xmit_hint;
395 
396 	if (ixm->ixm_tsl != NULL) {
397 		ixa->ixa_tsl = ixm->ixm_tsl;
398 		ixa->ixa_free_flags |= IXA_FREE_TSL;
399 		ixm->ixm_tsl = NULL;
400 	}
401 	if (ixm->ixm_cred != NULL) {
402 		ixa->ixa_cred = ixm->ixm_cred;
403 		ixa->ixa_free_flags |= IXA_FREE_CRED;
404 		ixm->ixm_cred = NULL;
405 	}
406 	ixa->ixa_cpid = ixm->ixm_cpid;
407 
408 	ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa;
409 	ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa;
410 	ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy;
411 	ixa->ixa_ipsec_action = ixm->ixm_ipsec_action;
412 	ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch;
413 
414 	ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0];
415 	ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1];
416 	ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port;
417 	ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port;
418 	ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type;
419 	ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code;
420 	ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf;
421 	ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0];
422 	ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1];
423 	ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2];
424 	ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3];
425 	ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0];
426 	ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1];
427 	ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2];
428 	ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3];
429 	ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx;
430 	ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx;
431 	ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto;
432 
433 	freeb(ixamp);
434 	return (B_TRUE);
435 }
436 
437 /*
438  * Free the ixm mblk and any references it holds
439  * Returns b_cont.
440  */
441 mblk_t *
442 ip_xmit_attr_free_mblk(mblk_t *ixamp)
443 {
444 	ixamblk_t	*ixm;
445 	mblk_t		*mp;
446 
447 	/* Consume mp */
448 	ASSERT(DB_TYPE(ixamp) == M_BREAK);
449 	mp = ixamp->b_cont;
450 
451 	ixm = (ixamblk_t *)ixamp->b_rptr;
452 	ASSERT(!ixm->ixm_inbound);
453 
454 	if (ixm->ixm_ipsec_ah_sa != NULL) {
455 		IPSA_REFRELE(ixm->ixm_ipsec_ah_sa);
456 		ixm->ixm_ipsec_ah_sa = NULL;
457 	}
458 	if (ixm->ixm_ipsec_esp_sa != NULL) {
459 		IPSA_REFRELE(ixm->ixm_ipsec_esp_sa);
460 		ixm->ixm_ipsec_esp_sa = NULL;
461 	}
462 	if (ixm->ixm_ipsec_policy != NULL) {
463 		IPPOL_REFRELE(ixm->ixm_ipsec_policy);
464 		ixm->ixm_ipsec_policy = NULL;
465 	}
466 	if (ixm->ixm_ipsec_action != NULL) {
467 		IPACT_REFRELE(ixm->ixm_ipsec_action);
468 		ixm->ixm_ipsec_action = NULL;
469 	}
470 	if (ixm->ixm_ipsec_latch) {
471 		IPLATCH_REFRELE(ixm->ixm_ipsec_latch);
472 		ixm->ixm_ipsec_latch = NULL;
473 	}
474 
475 	if (ixm->ixm_tsl != NULL) {
476 		label_rele(ixm->ixm_tsl);
477 		ixm->ixm_tsl = NULL;
478 	}
479 	if (ixm->ixm_cred != NULL) {
480 		crfree(ixm->ixm_cred);
481 		ixm->ixm_cred = NULL;
482 	}
483 	freeb(ixamp);
484 	return (mp);
485 }
486 
487 /*
488  * Take the information in ip_recv_attr_t and stick it in an mblk
489  * that can later be passed to ip_recv_attr_from_mblk to recreate the
490  * ip_recv_attr_t.
491  *
492  * Returns NULL on memory allocation failure.
493  */
494 mblk_t *
495 ip_recv_attr_to_mblk(ip_recv_attr_t *ira)
496 {
497 	mblk_t		*iramp;
498 	iramblk_t	*irm;
499 	ill_t		*ill = ira->ira_ill;
500 
501 	ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0);
502 
503 	iramp = allocb(sizeof (*irm), BPRI_MED);
504 	if (iramp == NULL)
505 		return (NULL);
506 
507 	iramp->b_datap->db_type = M_BREAK;
508 	iramp->b_wptr += sizeof (*irm);
509 	irm = (iramblk_t *)iramp->b_rptr;
510 
511 	bzero(irm, sizeof (*irm));
512 	irm->irm_inbound = B_TRUE;
513 	irm->irm_flags = ira->ira_flags;
514 	if (ill != NULL) {
515 		/* Internal to IP - preserve ip_stack_t, ill and rill */
516 		irm->irm_stackid =
517 		    ill->ill_ipst->ips_netstack->netstack_stackid;
518 		irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
519 		ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex ==
520 		    ira->ira_rifindex);
521 	} else {
522 		/* Let ip_recv_attr_from_stackid know there isn't one */
523 		irm->irm_stackid = -1;
524 	}
525 	irm->irm_rifindex = ira->ira_rifindex;
526 	irm->irm_ruifindex = ira->ira_ruifindex;
527 	irm->irm_pktlen = ira->ira_pktlen;
528 	irm->irm_ip_hdr_length = ira->ira_ip_hdr_length;
529 	irm->irm_protocol = ira->ira_protocol;
530 
531 	irm->irm_sqp = ira->ira_sqp;
532 	irm->irm_ring = ira->ira_ring;
533 
534 	irm->irm_zoneid = ira->ira_zoneid;
535 	irm->irm_mroute_tunnel = ira->ira_mroute_tunnel;
536 	irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid;
537 	irm->irm_esp_udp_ports = ira->ira_esp_udp_ports;
538 
539 	if (ira->ira_tsl != NULL) {
540 		irm->irm_tsl = ira->ira_tsl;
541 		label_hold(irm->irm_tsl);
542 	}
543 	if (ira->ira_cred != NULL) {
544 		irm->irm_cred = ira->ira_cred;
545 		crhold(ira->ira_cred);
546 	}
547 	irm->irm_cpid = ira->ira_cpid;
548 
549 	if (ira->ira_flags & IRAF_L2SRC_SET)
550 		bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE);
551 
552 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
553 		if (ira->ira_ipsec_ah_sa != NULL) {
554 			irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa;
555 			IPSA_REFHOLD(ira->ira_ipsec_ah_sa);
556 		}
557 		if (ira->ira_ipsec_esp_sa != NULL) {
558 			irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa;
559 			IPSA_REFHOLD(ira->ira_ipsec_esp_sa);
560 		}
561 		if (ira->ira_ipsec_action != NULL) {
562 			irm->irm_ipsec_action = ira->ira_ipsec_action;
563 			IPACT_REFHOLD(ira->ira_ipsec_action);
564 		}
565 	}
566 	return (iramp);
567 }
568 
569 /*
570  * Extract the ip_recv_attr_t from the mblk. If we are used inside IP
571  * then irm_stackid is not -1, in which case we check that the
572  * ip_stack_t and ill_t still exist. Returns B_FALSE if that is
573  * not the case.
574  * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter)
575  * and we just proceed with ira_ill and ira_rill as NULL.
576  *
577  * The caller needs to release any references on the pointers inside the ire
578  * by calling ira_cleanup.
579  */
580 boolean_t
581 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira)
582 {
583 	iramblk_t	*irm;
584 	netstack_t	*ns;
585 	ip_stack_t	*ipst = NULL;
586 	ill_t		*ill = NULL, *rill = NULL;
587 
588 	/* We assume the caller hasn't initialized ira */
589 	bzero(ira, sizeof (*ira));
590 
591 	ASSERT(DB_TYPE(iramp) == M_BREAK);
592 	ASSERT(iramp->b_cont == NULL);
593 
594 	irm = (iramblk_t *)iramp->b_rptr;
595 	ASSERT(irm->irm_inbound);
596 
597 	if (irm->irm_stackid != -1) {
598 		/* Verify the netstack is still around */
599 		ns = netstack_find_by_stackid(irm->irm_stackid);
600 		if (ns == NULL) {
601 			/* Disappeared on us */
602 			(void) ip_recv_attr_free_mblk(iramp);
603 			return (B_FALSE);
604 		}
605 		ipst = ns->netstack_ip;
606 
607 		/* Verify the ill is still around */
608 		ill = ill_lookup_on_ifindex(irm->irm_ifindex,
609 		    !(irm->irm_flags & IRAF_IS_IPV4), ipst);
610 
611 		if (irm->irm_ifindex == irm->irm_rifindex) {
612 			rill = ill;
613 		} else {
614 			rill = ill_lookup_on_ifindex(irm->irm_rifindex,
615 			    !(irm->irm_flags & IRAF_IS_IPV4), ipst);
616 		}
617 
618 		/* We have the ill, hence the netstack can't go away */
619 		netstack_rele(ns);
620 		if (ill == NULL || rill == NULL) {
621 			/* Disappeared on us */
622 			if (ill != NULL)
623 				ill_refrele(ill);
624 			if (rill != NULL && rill != ill)
625 				ill_refrele(rill);
626 			(void) ip_recv_attr_free_mblk(iramp);
627 			return (B_FALSE);
628 		}
629 	}
630 
631 	ira->ira_flags = irm->irm_flags;
632 	/* Caller must ill_refele(ira_ill) by using ira_cleanup() */
633 	ira->ira_ill = ill;
634 	ira->ira_rill = rill;
635 
636 	ira->ira_rifindex = irm->irm_rifindex;
637 	ira->ira_ruifindex = irm->irm_ruifindex;
638 	ira->ira_pktlen = irm->irm_pktlen;
639 	ira->ira_ip_hdr_length = irm->irm_ip_hdr_length;
640 	ira->ira_protocol = irm->irm_protocol;
641 
642 	ira->ira_sqp = irm->irm_sqp;
643 	/* The rest of IP assumes that the rings never go away. */
644 	ira->ira_ring = irm->irm_ring;
645 
646 	ira->ira_zoneid = irm->irm_zoneid;
647 	ira->ira_mroute_tunnel = irm->irm_mroute_tunnel;
648 	ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid;
649 	ira->ira_esp_udp_ports = irm->irm_esp_udp_ports;
650 
651 	if (irm->irm_tsl != NULL) {
652 		ira->ira_tsl = irm->irm_tsl;
653 		ira->ira_free_flags |= IRA_FREE_TSL;
654 		irm->irm_tsl = NULL;
655 	}
656 	if (irm->irm_cred != NULL) {
657 		ira->ira_cred = irm->irm_cred;
658 		ira->ira_free_flags |= IRA_FREE_CRED;
659 		irm->irm_cred = NULL;
660 	}
661 	ira->ira_cpid = irm->irm_cpid;
662 
663 	if (ira->ira_flags & IRAF_L2SRC_SET)
664 		bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE);
665 
666 	ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa;
667 	ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa;
668 	ira->ira_ipsec_action = irm->irm_ipsec_action;
669 
670 	freeb(iramp);
671 	return (B_TRUE);
672 }
673 
674 /*
675  * Free the irm mblk and any references it holds
676  * Returns b_cont.
677  */
678 mblk_t *
679 ip_recv_attr_free_mblk(mblk_t *iramp)
680 {
681 	iramblk_t	*irm;
682 	mblk_t		*mp;
683 
684 	/* Consume mp */
685 	ASSERT(DB_TYPE(iramp) == M_BREAK);
686 	mp = iramp->b_cont;
687 
688 	irm = (iramblk_t *)iramp->b_rptr;
689 	ASSERT(irm->irm_inbound);
690 
691 	if (irm->irm_ipsec_ah_sa != NULL) {
692 		IPSA_REFRELE(irm->irm_ipsec_ah_sa);
693 		irm->irm_ipsec_ah_sa = NULL;
694 	}
695 	if (irm->irm_ipsec_esp_sa != NULL) {
696 		IPSA_REFRELE(irm->irm_ipsec_esp_sa);
697 		irm->irm_ipsec_esp_sa = NULL;
698 	}
699 	if (irm->irm_ipsec_action != NULL) {
700 		IPACT_REFRELE(irm->irm_ipsec_action);
701 		irm->irm_ipsec_action = NULL;
702 	}
703 	if (irm->irm_tsl != NULL) {
704 		label_rele(irm->irm_tsl);
705 		irm->irm_tsl = NULL;
706 	}
707 	if (irm->irm_cred != NULL) {
708 		crfree(irm->irm_cred);
709 		irm->irm_cred = NULL;
710 	}
711 
712 	freeb(iramp);
713 	return (mp);
714 }
715 
716 /*
717  * Returns true if the mblk contains an ip_recv_attr_t
718  * For now we just check db_type.
719  */
720 boolean_t
721 ip_recv_attr_is_mblk(mblk_t *mp)
722 {
723 	/*
724 	 * Need to handle the various forms of tcp_timermp which are tagged
725 	 * with b_wptr and might have a NULL b_datap.
726 	 */
727 	if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1)
728 		return (B_FALSE);
729 
730 #ifdef	DEBUG
731 	iramblk_t	*irm;
732 
733 	if (DB_TYPE(mp) != M_BREAK)
734 		return (B_FALSE);
735 
736 	irm = (iramblk_t *)mp->b_rptr;
737 	ASSERT(irm->irm_inbound);
738 	return (B_TRUE);
739 #else
740 	return (DB_TYPE(mp) == M_BREAK);
741 #endif
742 }
743 
744 static ip_xmit_attr_t *
745 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag)
746 {
747 	ip_xmit_attr_t	*ixa;
748 	ip_xmit_attr_t	*oldixa;
749 
750 	mutex_enter(&connp->conn_lock);
751 	ixa = connp->conn_ixa;
752 
753 	/* At least one references for the conn_t */
754 	ASSERT(ixa->ixa_refcnt >= 1);
755 	if (atomic_add_32_nv(&ixa->ixa_refcnt, 1) == 2) {
756 		/* No other thread using conn_ixa */
757 		mutex_exit(&connp->conn_lock);
758 		return (ixa);
759 	}
760 	ixa = kmem_alloc(sizeof (*ixa), kmflag);
761 	if (ixa == NULL) {
762 		mutex_exit(&connp->conn_lock);
763 		ixa_refrele(connp->conn_ixa);
764 		return (NULL);
765 	}
766 	ixa_safe_copy(connp->conn_ixa, ixa);
767 
768 	/* Make sure we drop conn_lock before any refrele */
769 	if (replace) {
770 		ixa->ixa_refcnt++;	/* No atomic needed - not visible */
771 		oldixa = connp->conn_ixa;
772 		connp->conn_ixa = ixa;
773 		mutex_exit(&connp->conn_lock);
774 		IXA_REFRELE(oldixa);	/* Undo refcnt from conn_t */
775 	} else {
776 		oldixa = connp->conn_ixa;
777 		mutex_exit(&connp->conn_lock);
778 	}
779 	IXA_REFRELE(oldixa);	/* Undo above atomic_add_32_nv */
780 
781 	return (ixa);
782 }
783 
784 /*
785  * Return an ip_xmit_attr_t to use with a conn_t that ensures that only
786  * the caller can access the ip_xmit_attr_t.
787  *
788  * If nobody else is using conn_ixa we return it.
789  * Otherwise we make a "safe" copy of conn_ixa
790  * and return it. The "safe" copy has the pointers set to NULL
791  * (since the pointers might be changed by another thread using
792  * conn_ixa). The caller needs to check for NULL pointers to see
793  * if ip_set_destination needs to be called to re-establish the pointers.
794  *
795  * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t.
796  * That is used when we connect() the ULP.
797  */
798 ip_xmit_attr_t *
799 conn_get_ixa(conn_t *connp, boolean_t replace)
800 {
801 	return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP));
802 }
803 
804 /*
805  * Used only when the option is to have the kernel hang due to not
806  * cleaning up ixa references on ills etc.
807  */
808 ip_xmit_attr_t *
809 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace)
810 {
811 	return (conn_get_ixa_impl(connp, replace, KM_SLEEP));
812 }
813 
814 /*
815  * Replace conn_ixa with the ixa argument.
816  *
817  * The caller must hold conn_lock.
818  *
819  * We return the old ixa; the caller must ixa_refrele that after conn_lock
820  * has been dropped.
821  */
822 ip_xmit_attr_t *
823 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa)
824 {
825 	ip_xmit_attr_t	*oldixa;
826 
827 	ASSERT(MUTEX_HELD(&connp->conn_lock));
828 
829 	oldixa = connp->conn_ixa;
830 	IXA_REFHOLD(ixa);
831 	connp->conn_ixa = ixa;
832 	return (oldixa);
833 }
834 
835 /*
836  * Return a ip_xmit_attr_t to use with a conn_t that is based on but
837  * separate from conn_ixa.
838  *
839  * This "safe" copy has the pointers set to NULL
840  * (since the pointers might be changed by another thread using
841  * conn_ixa). The caller needs to check for NULL pointers to see
842  * if ip_set_destination needs to be called to re-establish the pointers.
843  */
844 ip_xmit_attr_t *
845 conn_get_ixa_exclusive(conn_t *connp)
846 {
847 	ip_xmit_attr_t *ixa;
848 
849 	mutex_enter(&connp->conn_lock);
850 	ixa = connp->conn_ixa;
851 
852 	/* At least one references for the conn_t */
853 	ASSERT(ixa->ixa_refcnt >= 1);
854 
855 	/* Make sure conn_ixa doesn't disappear while we copy it */
856 	atomic_add_32(&ixa->ixa_refcnt, 1);
857 
858 	ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
859 	if (ixa == NULL) {
860 		mutex_exit(&connp->conn_lock);
861 		ixa_refrele(connp->conn_ixa);
862 		return (NULL);
863 	}
864 	ixa_safe_copy(connp->conn_ixa, ixa);
865 	mutex_exit(&connp->conn_lock);
866 	IXA_REFRELE(connp->conn_ixa);
867 	return (ixa);
868 }
869 
870 void
871 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
872 {
873 	bcopy(src, ixa, sizeof (*ixa));
874 	ixa->ixa_refcnt = 1;
875 	/*
876 	 * Clear any pointers that have references and might be changed
877 	 * by ip_set_destination or the ULP
878 	 */
879 	ixa->ixa_ire = NULL;
880 	ixa->ixa_nce = NULL;
881 	ixa->ixa_dce = NULL;
882 	ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
883 	ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
884 #ifdef DEBUG
885 	ixa->ixa_curthread = NULL;
886 #endif
887 	/* Clear all the IPsec pointers and the flag as well. */
888 	ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
889 
890 	ixa->ixa_ipsec_latch = NULL;
891 	ixa->ixa_ipsec_ah_sa = NULL;
892 	ixa->ixa_ipsec_esp_sa = NULL;
893 	ixa->ixa_ipsec_policy = NULL;
894 	ixa->ixa_ipsec_action = NULL;
895 
896 	/*
897 	 * We leave ixa_tsl unchanged, but if it has a refhold we need
898 	 * to get an extra refhold.
899 	 */
900 	if (ixa->ixa_free_flags & IXA_FREE_TSL)
901 		label_hold(ixa->ixa_tsl);
902 
903 	/*
904 	 * We leave ixa_cred unchanged, but if it has a refhold we need
905 	 * to get an extra refhold.
906 	 */
907 	if (ixa->ixa_free_flags & IXA_FREE_CRED)
908 		crhold(ixa->ixa_cred);
909 }
910 
911 /*
912  * Duplicate an ip_xmit_attr_t.
913  * Assumes that the caller controls the ixa, hence we do not need to use
914  * a safe copy. We just have to increase the refcnt on any pointers.
915  */
916 ip_xmit_attr_t *
917 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa)
918 {
919 	ip_xmit_attr_t *ixa;
920 
921 	ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
922 	if (ixa == NULL)
923 		return (NULL);
924 	bcopy(src_ixa, ixa, sizeof (*ixa));
925 	ixa->ixa_refcnt = 1;
926 
927 	if (ixa->ixa_ire != NULL)
928 		ire_refhold_notr(ixa->ixa_ire);
929 	if (ixa->ixa_nce != NULL)
930 		nce_refhold(ixa->ixa_nce);
931 	if (ixa->ixa_dce != NULL)
932 		dce_refhold_notr(ixa->ixa_dce);
933 
934 #ifdef DEBUG
935 	ixa->ixa_curthread = NULL;
936 #endif
937 
938 	if (ixa->ixa_ipsec_latch != NULL)
939 		IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
940 	if (ixa->ixa_ipsec_ah_sa != NULL)
941 		IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
942 	if (ixa->ixa_ipsec_esp_sa != NULL)
943 		IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
944 	if (ixa->ixa_ipsec_policy != NULL)
945 		IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
946 	if (ixa->ixa_ipsec_action != NULL)
947 		IPACT_REFHOLD(ixa->ixa_ipsec_action);
948 
949 	if (ixa->ixa_tsl != NULL) {
950 		label_hold(ixa->ixa_tsl);
951 		ixa->ixa_free_flags |= IXA_FREE_TSL;
952 	}
953 	if (ixa->ixa_cred != NULL) {
954 		crhold(ixa->ixa_cred);
955 		ixa->ixa_free_flags |= IXA_FREE_CRED;
956 	}
957 	return (ixa);
958 }
959 
960 /*
961  * Used to replace the ixa_label field.
962  * The caller should have a reference on the label, which we transfer to
963  * the attributes so that when the attribute is freed/cleaned up
964  * we will release that reference.
965  */
966 void
967 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl)
968 {
969 	ASSERT(tsl != NULL);
970 
971 	if (ixa->ixa_free_flags & IXA_FREE_TSL) {
972 		ASSERT(ixa->ixa_tsl != NULL);
973 		label_rele(ixa->ixa_tsl);
974 	} else {
975 		ixa->ixa_free_flags |= IXA_FREE_TSL;
976 	}
977 	ixa->ixa_tsl = tsl;
978 }
979 
980 /*
981  * Replace the ip_recv_attr_t's label.
982  * Due to kernel RPC's use of db_credp we also need to replace ira_cred;
983  * TCP/UDP uses ira_cred to set db_credp for non-socket users.
984  * This can fail (and return B_FALSE) due to lack of memory.
985  */
986 boolean_t
987 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl)
988 {
989 	cred_t	*newcr;
990 
991 	if (ira->ira_free_flags & IRA_FREE_TSL) {
992 		ASSERT(ira->ira_tsl != NULL);
993 		label_rele(ira->ira_tsl);
994 	}
995 	label_hold(tsl);
996 	ira->ira_tsl = tsl;
997 	ira->ira_free_flags |= IRA_FREE_TSL;
998 
999 	/*
1000 	 * Reset zoneid if we have a shared address. That allows
1001 	 * ip_fanout_tx_v4/v6 to determine the zoneid again.
1002 	 */
1003 	if (ira->ira_flags & IRAF_TX_SHARED_ADDR)
1004 		ira->ira_zoneid = ALL_ZONES;
1005 
1006 	/* We update ira_cred for RPC */
1007 	newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP);
1008 	if (newcr == NULL)
1009 		return (B_FALSE);
1010 	if (ira->ira_free_flags & IRA_FREE_CRED)
1011 		crfree(ira->ira_cred);
1012 	ira->ira_cred = newcr;
1013 	ira->ira_free_flags |= IRA_FREE_CRED;
1014 	return (B_TRUE);
1015 }
1016 
1017 /*
1018  * This needs to be called after ip_set_destination/tsol_check_dest might
1019  * have changed ixa_tsl to be specific for a destination, and we now want to
1020  * send to a different destination.
1021  * We have to restart with crgetlabel() since ip_set_destination/
1022  * tsol_check_dest will start with ixa_tsl.
1023  */
1024 void
1025 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr)
1026 {
1027 	if (!is_system_labeled())
1028 		return;
1029 
1030 	if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1031 		ASSERT(ixa->ixa_tsl != NULL);
1032 		label_rele(ixa->ixa_tsl);
1033 		ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1034 	}
1035 	ixa->ixa_tsl = crgetlabel(cr);
1036 }
1037 
1038 void
1039 ixa_refrele(ip_xmit_attr_t *ixa)
1040 {
1041 	IXA_REFRELE(ixa);
1042 }
1043 
1044 void
1045 ixa_inactive(ip_xmit_attr_t *ixa)
1046 {
1047 	ASSERT(ixa->ixa_refcnt == 0);
1048 
1049 	ixa_cleanup(ixa);
1050 	kmem_free(ixa, sizeof (*ixa));
1051 }
1052 
1053 /*
1054  * Release any references contained in the ixa.
1055  * Also clear any fields that are not controlled by ixa_flags.
1056  */
1057 void
1058 ixa_cleanup(ip_xmit_attr_t *ixa)
1059 {
1060 	if (ixa->ixa_ire != NULL) {
1061 		ire_refrele_notr(ixa->ixa_ire);
1062 		ixa->ixa_ire = NULL;
1063 	}
1064 	if (ixa->ixa_dce != NULL) {
1065 		dce_refrele_notr(ixa->ixa_dce);
1066 		ixa->ixa_dce = NULL;
1067 	}
1068 	if (ixa->ixa_nce != NULL) {
1069 		nce_refrele(ixa->ixa_nce);
1070 		ixa->ixa_nce = NULL;
1071 	}
1072 	ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1073 	ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1074 	if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
1075 		ipsec_out_release_refs(ixa);
1076 	}
1077 	if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1078 		ASSERT(ixa->ixa_tsl != NULL);
1079 		label_rele(ixa->ixa_tsl);
1080 		ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1081 	}
1082 	ixa->ixa_tsl = NULL;
1083 	if (ixa->ixa_free_flags & IXA_FREE_CRED) {
1084 		ASSERT(ixa->ixa_cred != NULL);
1085 		crfree(ixa->ixa_cred);
1086 		ixa->ixa_free_flags &= ~IXA_FREE_CRED;
1087 	}
1088 	ixa->ixa_cred = NULL;
1089 	ixa->ixa_src_preferences = 0;
1090 	ixa->ixa_ifindex = 0;
1091 	ixa->ixa_multicast_ifindex = 0;
1092 	ixa->ixa_multicast_ifaddr = INADDR_ANY;
1093 }
1094 
1095 /*
1096  * Release any references contained in the ira.
1097  * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second
1098  * argument.
1099  */
1100 void
1101 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill)
1102 {
1103 	if (ira->ira_ill != NULL) {
1104 		if (ira->ira_rill != ira->ira_ill) {
1105 			/* Caused by async processing */
1106 			ill_refrele(ira->ira_rill);
1107 		}
1108 		if (refrele_ill)
1109 			ill_refrele(ira->ira_ill);
1110 	}
1111 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1112 		ipsec_in_release_refs(ira);
1113 	}
1114 	if (ira->ira_free_flags & IRA_FREE_TSL) {
1115 		ASSERT(ira->ira_tsl != NULL);
1116 		label_rele(ira->ira_tsl);
1117 		ira->ira_free_flags &= ~IRA_FREE_TSL;
1118 	}
1119 	ira->ira_tsl = NULL;
1120 	if (ira->ira_free_flags & IRA_FREE_CRED) {
1121 		ASSERT(ira->ira_cred != NULL);
1122 		crfree(ira->ira_cred);
1123 		ira->ira_free_flags &= ~IRA_FREE_CRED;
1124 	}
1125 	ira->ira_cred = NULL;
1126 }
1127 
1128 /*
1129  * Function to help release any IRE, NCE, or DCEs that
1130  * have been deleted and are marked as condemned.
1131  * The caller is responsible for any serialization which is different
1132  * for TCP, SCTP, and others.
1133  */
1134 static void
1135 ixa_cleanup_stale(ip_xmit_attr_t *ixa)
1136 {
1137 	ire_t		*ire;
1138 	nce_t		*nce;
1139 	dce_t		*dce;
1140 
1141 	ire = ixa->ixa_ire;
1142 	nce = ixa->ixa_nce;
1143 	dce = ixa->ixa_dce;
1144 
1145 	if (ire != NULL && IRE_IS_CONDEMNED(ire)) {
1146 		ire_refrele_notr(ire);
1147 		ire = ire_blackhole(ixa->ixa_ipst,
1148 		    !(ixa->ixa_flags & IXAF_IS_IPV4));
1149 		ASSERT(ire != NULL);
1150 #ifdef DEBUG
1151 		ire_refhold_notr(ire);
1152 		ire_refrele(ire);
1153 #endif
1154 		ixa->ixa_ire = ire;
1155 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1156 	}
1157 	if (nce != NULL && nce->nce_is_condemned) {
1158 		/* Can make it NULL as long as we set IRE_GENERATION_VERIFY */
1159 		nce_refrele(nce);
1160 		ixa->ixa_nce = NULL;
1161 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1162 	}
1163 	if (dce != NULL && DCE_IS_CONDEMNED(dce)) {
1164 		dce_refrele_notr(dce);
1165 		dce = dce_get_default(ixa->ixa_ipst);
1166 		ASSERT(dce != NULL);
1167 #ifdef DEBUG
1168 		dce_refhold_notr(dce);
1169 		dce_refrele(dce);
1170 #endif
1171 		ixa->ixa_dce = dce;
1172 		ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1173 	}
1174 }
1175 
1176 /*
1177  * Used to run ixa_cleanup_stale inside the tcp squeue.
1178  * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
1179  * and waking up the caller.
1180  */
1181 /* ARGSUSED2 */
1182 static void
1183 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
1184     ip_recv_attr_t *dummy)
1185 {
1186 	conn_t	*connp = (conn_t *)arg;
1187 	tcp_stack_t	*tcps;
1188 
1189 	tcps = connp->conn_netstack->netstack_tcp;
1190 
1191 	ixa_cleanup_stale(connp->conn_ixa);
1192 
1193 	mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1194 	ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
1195 	tcps->tcps_ixa_cleanup_mp = mp;
1196 	cv_signal(&tcps->tcps_ixa_cleanup_cv);
1197 	mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1198 }
1199 
1200 
1201 /*
1202  * ipcl_walk() function to help release any IRE, NCE, or DCEs that
1203  * have been deleted and are marked as condemned.
1204  * Note that we can't cleanup the pointers since there can be threads
1205  * in conn_ip_output() sending while we are called.
1206  */
1207 void
1208 conn_ixa_cleanup(conn_t *connp, void *arg)
1209 {
1210 	boolean_t tryhard = (boolean_t)arg;
1211 
1212 	if (IPCL_IS_TCP(connp)) {
1213 		mblk_t		*mp;
1214 		tcp_stack_t	*tcps;
1215 
1216 		tcps = connp->conn_netstack->netstack_tcp;
1217 
1218 		mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1219 		while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
1220 			/*
1221 			 * Multiple concurrent cleanups; need to have the last
1222 			 * one run since it could be an unplumb.
1223 			 */
1224 			cv_wait(&tcps->tcps_ixa_cleanup_cv,
1225 			    &tcps->tcps_ixa_cleanup_lock);
1226 		}
1227 		tcps->tcps_ixa_cleanup_mp = NULL;
1228 		mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1229 
1230 		if (connp->conn_sqp->sq_run == curthread) {
1231 			/* Already on squeue */
1232 			tcp_ixa_cleanup(connp, mp, NULL, NULL);
1233 		} else {
1234 			CONN_INC_REF(connp);
1235 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
1236 			    connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
1237 
1238 			/* Wait until tcp_ixa_cleanup has run */
1239 			mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1240 			while (tcps->tcps_ixa_cleanup_mp == NULL) {
1241 				cv_wait(&tcps->tcps_ixa_cleanup_cv,
1242 				    &tcps->tcps_ixa_cleanup_lock);
1243 			}
1244 			mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1245 		}
1246 	} else if (IPCL_IS_SCTP(connp)) {
1247 		sctp_t	*sctp;
1248 		sctp_faddr_t *fp;
1249 
1250 		sctp = CONN2SCTP(connp);
1251 		RUN_SCTP(sctp);
1252 		ixa_cleanup_stale(connp->conn_ixa);
1253 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next)
1254 			ixa_cleanup_stale(fp->ixa);
1255 		WAKE_SCTP(sctp);
1256 	} else {
1257 		ip_xmit_attr_t	*ixa;
1258 
1259 		/*
1260 		 * If there is a different thread using conn_ixa then we get a
1261 		 * new copy and cut the old one loose from conn_ixa. Otherwise
1262 		 * we use conn_ixa and prevent any other thread from
1263 		 * using/changing it. Anybody using conn_ixa (e.g., a thread in
1264 		 * conn_ip_output) will do an ixa_refrele which will remove any
1265 		 * references on the ire etc.
1266 		 *
1267 		 * Once we are done other threads can use conn_ixa since the
1268 		 * refcnt will be back at one.
1269 		 *
1270 		 * We are called either because an ill is going away, or
1271 		 * due to memory reclaim. In the former case we wait for
1272 		 * memory since we must remove the refcnts on the ill.
1273 		 */
1274 		if (tryhard) {
1275 			ixa = conn_get_ixa_tryhard(connp, B_TRUE);
1276 			ASSERT(ixa != NULL);
1277 		} else {
1278 			ixa = conn_get_ixa(connp, B_TRUE);
1279 			if (ixa == NULL) {
1280 				/*
1281 				 * Somebody else was using it and kmem_alloc
1282 				 * failed! Next memory reclaim will try to
1283 				 * clean up.
1284 				 */
1285 				DTRACE_PROBE1(conn__ixa__cleanup__bail,
1286 				    conn_t *, connp);
1287 				return;
1288 			}
1289 		}
1290 		ixa_cleanup_stale(ixa);
1291 		ixa_refrele(ixa);
1292 	}
1293 }
1294 
1295 /*
1296  * ixa needs to be an exclusive copy so that no one changes the cookie
1297  * or the ixa_nce.
1298  */
1299 boolean_t
1300 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa)
1301 {
1302 	uintptr_t cookie = ixa->ixa_cookie;
1303 	ill_dld_direct_t *idd;
1304 	idl_tx_list_t *idl_txl;
1305 	ill_t *ill = ixa->ixa_nce->nce_ill;
1306 	boolean_t inserted = B_FALSE;
1307 
1308 	idd = &(ill)->ill_dld_capab->idc_direct;
1309 	idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
1310 	mutex_enter(&idl_txl->txl_lock);
1311 
1312 	/*
1313 	 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow
1314 	 * control is asserted on an ill that does not support direct calls.
1315 	 * Jump to insert.
1316 	 */
1317 	if (cookie == 0)
1318 		goto tryinsert;
1319 
1320 	ASSERT(ILL_DIRECT_CAPABLE(ill));
1321 
1322 	if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) {
1323 		DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie);
1324 	} else if (idl_txl->txl_cookie != NULL &&
1325 	    idl_txl->txl_cookie != ixa->ixa_cookie) {
1326 		DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie,
1327 		    uintptr_t, idl_txl->txl_cookie);
1328 		/* TODO: bump kstat for cookie collision */
1329 	} else {
1330 		/*
1331 		 * Check/set conn_blocked under conn_lock.  Note that txl_lock
1332 		 * will not suffice since two separate UDP threads may be
1333 		 * racing to send to different destinations that are
1334 		 * associated with different cookies and thus may not be
1335 		 * holding the same txl_lock.  Further, since a given conn_t
1336 		 * can only be on a single drain list, the conn_t will be
1337 		 * enqueued on whichever thread wins this race.
1338 		 */
1339 tryinsert:	mutex_enter(&connp->conn_lock);
1340 		if (connp->conn_blocked) {
1341 			DTRACE_PROBE1(ill__tx__conn__already__blocked,
1342 			    conn_t *, connp);
1343 			mutex_exit(&connp->conn_lock);
1344 		} else {
1345 			connp->conn_blocked = B_TRUE;
1346 			mutex_exit(&connp->conn_lock);
1347 			idl_txl->txl_cookie = cookie;
1348 			conn_drain_insert(connp, idl_txl);
1349 			if (!IPCL_IS_NONSTR(connp))
1350 				noenable(connp->conn_wq);
1351 			inserted = B_TRUE;
1352 		}
1353 	}
1354 	mutex_exit(&idl_txl->txl_lock);
1355 	return (inserted);
1356 }
1357