1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #include <sys/zone.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/debug.h>
35 #include <sys/atomic.h>
36
37 #include <sys/systm.h>
38 #include <sys/param.h>
39 #include <sys/kmem.h>
40 #include <sys/sdt.h>
41 #include <sys/socket.h>
42 #include <sys/mac.h>
43 #include <net/if.h>
44 #include <net/if_arp.h>
45 #include <net/route.h>
46 #include <sys/sockio.h>
47 #include <netinet/in.h>
48 #include <net/if_dl.h>
49
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/mib2.h>
53 #include <inet/nd.h>
54 #include <inet/arp.h>
55 #include <inet/snmpcom.h>
56 #include <inet/kstatcom.h>
57
58 #include <netinet/igmp_var.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/sctp.h>
62
63 #include <inet/ip.h>
64 #include <inet/ip_impl.h>
65 #include <inet/ip6.h>
66 #include <inet/ip6_asp.h>
67 #include <inet/tcp.h>
68 #include <inet/ip_multi.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 #include <inet/optcom.h>
74 #include <inet/ip_ndp.h>
75 #include <inet/ip_listutils.h>
76 #include <netinet/igmp.h>
77 #include <netinet/ip_mroute.h>
78 #include <inet/ipp_common.h>
79
80 #include <net/pfkeyv2.h>
81 #include <inet/sadb.h>
82 #include <inet/ipsec_impl.h>
83 #include <inet/ipdrop.h>
84 #include <inet/ip_netinfo.h>
85 #include <sys/squeue_impl.h>
86 #include <sys/squeue.h>
87
88 #include <inet/ipclassifier.h>
89 #include <inet/sctp_ip.h>
90 #include <inet/sctp/sctp_impl.h>
91 #include <inet/udp_impl.h>
92 #include <sys/sunddi.h>
93
94 #include <sys/tsol/label.h>
95 #include <sys/tsol/tnet.h>
96
97 /*
98 * Release a reference on ip_xmit_attr.
99 * The reference is acquired by conn_get_ixa()
100 */
101 #define IXA_REFRELE(ixa) \
102 { \
103 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \
104 ixa_inactive(ixa); \
105 }
106
107 #define IXA_REFHOLD(ixa) \
108 { \
109 ASSERT((ixa)->ixa_refcnt != 0); \
110 atomic_inc_32(&(ixa)->ixa_refcnt); \
111 }
112
113 /*
114 * When we need to handle a transmit side asynchronous operation, then we need
115 * to save sufficient information so that we can call the fragment and postfrag
116 * functions. That information is captured in an mblk containing this structure.
117 *
118 * Since this is currently only used for IPsec, we include information for
119 * the kernel crypto framework.
120 */
121 typedef struct ixamblk_s {
122 boolean_t ixm_inbound; /* B_FALSE */
123 iaflags_t ixm_flags; /* ixa_flags */
124 netstackid_t ixm_stackid; /* Verify it didn't go away */
125 uint_t ixm_ifindex; /* Used to find the nce */
126 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */
127 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6)
128 uint32_t ixm_fragsize;
129 uint_t ixm_pktlen;
130 uint16_t ixm_ip_hdr_length; /* Points to ULP header */
131 uint8_t ixm_protocol; /* Protocol number for ULP cksum */
132 pfirepostfrag_t ixm_postfragfn;
133
134 zoneid_t ixm_zoneid; /* Needed for ipobs */
135 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */
136
137 uint_t ixm_scopeid; /* For IPv6 link-locals */
138
139 uint32_t ixm_ident; /* For IPv6 fragment header */
140 uint32_t ixm_xmit_hint;
141
142 uint64_t ixm_conn_id; /* Used by DTrace */
143 cred_t *ixm_cred; /* For getpeerucred - refhold if set */
144 pid_t ixm_cpid; /* For getpeerucred */
145
146 ts_label_t *ixm_tsl; /* Refhold if set. */
147
148 /*
149 * When the pointers below are set they have a refhold on the struct.
150 */
151 ipsec_latch_t *ixm_ipsec_latch;
152 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */
153 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */
154 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */
155 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */
156
157 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */
158
159 /* Need these while waiting for SA */
160 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */
161 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */
162 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */
163 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */
164
165 sa_family_t ixm_ipsec_inaf; /* Inner address family */
166 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */
167 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */
168 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */
169 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */
170
171 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */
172 } ixamblk_t;
173
174
175 /*
176 * When we need to handle a receive side asynchronous operation, then we need
177 * to save sufficient information so that we can call ip_fanout.
178 * That information is captured in an mblk containing this structure.
179 *
180 * Since this is currently only used for IPsec, we include information for
181 * the kernel crypto framework.
182 */
183 typedef struct iramblk_s {
184 boolean_t irm_inbound; /* B_TRUE */
185 iaflags_t irm_flags; /* ira_flags */
186 netstackid_t irm_stackid; /* Verify it didn't go away */
187 uint_t irm_ifindex; /* To find ira_ill */
188
189 uint_t irm_rifindex; /* ira_rifindex */
190 uint_t irm_ruifindex; /* ira_ruifindex */
191 uint_t irm_pktlen;
192 uint16_t irm_ip_hdr_length; /* Points to ULP header */
193 uint8_t irm_protocol; /* Protocol number for ULP cksum */
194 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */
195
196 squeue_t *irm_sqp;
197 ill_rx_ring_t *irm_ring;
198
199 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */
200 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */
201 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */
202
203 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */
204
205 cred_t *irm_cred; /* For getpeerucred - refhold if set */
206 pid_t irm_cpid; /* For getpeerucred */
207
208 ts_label_t *irm_tsl; /* Refhold if set. */
209
210 /*
211 * When set these correspond to a refhold on the object.
212 */
213 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */
214 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */
215 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */
216 } iramblk_t;
217
218
219 /*
220 * Take the information in ip_xmit_attr_t and stick it in an mblk
221 * that can later be passed to ip_xmit_attr_from_mblk to recreate the
222 * ip_xmit_attr_t.
223 *
224 * Returns NULL on memory allocation failure.
225 */
226 mblk_t *
ip_xmit_attr_to_mblk(ip_xmit_attr_t * ixa)227 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa)
228 {
229 mblk_t *ixamp;
230 ixamblk_t *ixm;
231 nce_t *nce = ixa->ixa_nce;
232
233 ASSERT(nce != NULL);
234 ixamp = allocb(sizeof (*ixm), BPRI_MED);
235 if (ixamp == NULL)
236 return (NULL);
237
238 ixamp->b_datap->db_type = M_BREAK;
239 ixamp->b_wptr += sizeof (*ixm);
240 ixm = (ixamblk_t *)ixamp->b_rptr;
241
242 bzero(ixm, sizeof (*ixm));
243 ixm->ixm_inbound = B_FALSE;
244 ixm->ixm_flags = ixa->ixa_flags;
245 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid;
246 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex;
247 ixm->ixm_nceaddr_v6 = nce->nce_addr;
248 ixm->ixm_fragsize = ixa->ixa_fragsize;
249 ixm->ixm_pktlen = ixa->ixa_pktlen;
250 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length;
251 ixm->ixm_protocol = ixa->ixa_protocol;
252 ixm->ixm_postfragfn = ixa->ixa_postfragfn;
253 ixm->ixm_zoneid = ixa->ixa_zoneid;
254 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid;
255 ixm->ixm_scopeid = ixa->ixa_scopeid;
256 ixm->ixm_ident = ixa->ixa_ident;
257 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint;
258
259 if (ixa->ixa_tsl != NULL) {
260 ixm->ixm_tsl = ixa->ixa_tsl;
261 label_hold(ixm->ixm_tsl);
262 }
263 if (ixa->ixa_cred != NULL) {
264 ixm->ixm_cred = ixa->ixa_cred;
265 crhold(ixa->ixa_cred);
266 }
267 ixm->ixm_cpid = ixa->ixa_cpid;
268 ixm->ixm_conn_id = ixa->ixa_conn_id;
269
270 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
271 if (ixa->ixa_ipsec_ah_sa != NULL) {
272 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa;
273 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
274 }
275 if (ixa->ixa_ipsec_esp_sa != NULL) {
276 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa;
277 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
278 }
279 if (ixa->ixa_ipsec_policy != NULL) {
280 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy;
281 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
282 }
283 if (ixa->ixa_ipsec_action != NULL) {
284 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action;
285 IPACT_REFHOLD(ixa->ixa_ipsec_action);
286 }
287 if (ixa->ixa_ipsec_latch != NULL) {
288 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch;
289 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
290 }
291 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0];
292 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1];
293 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port;
294 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port;
295 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type;
296 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code;
297 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf;
298 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0];
299 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1];
300 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2];
301 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3];
302 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0];
303 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1];
304 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2];
305 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3];
306 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx;
307 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx;
308 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto;
309 }
310 return (ixamp);
311 }
312
313 /*
314 * Extract the ip_xmit_attr_t from the mblk, checking that the
315 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is
316 * not the case.
317 *
318 * Otherwise ixa is updated.
319 * Caller needs to release references on the ixa by calling ixa_refrele()
320 * which will imediately call ixa_inactive to release the references.
321 */
322 boolean_t
ip_xmit_attr_from_mblk(mblk_t * ixamp,ip_xmit_attr_t * ixa)323 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa)
324 {
325 ixamblk_t *ixm;
326 netstack_t *ns;
327 ip_stack_t *ipst;
328 ill_t *ill;
329 nce_t *nce;
330
331 /* We assume the caller hasn't initialized ixa */
332 bzero(ixa, sizeof (*ixa));
333
334 ASSERT(DB_TYPE(ixamp) == M_BREAK);
335 ASSERT(ixamp->b_cont == NULL);
336
337 ixm = (ixamblk_t *)ixamp->b_rptr;
338 ASSERT(!ixm->ixm_inbound);
339
340 /* Verify the netstack is still around */
341 ns = netstack_find_by_stackid(ixm->ixm_stackid);
342 if (ns == NULL) {
343 /* Disappeared on us */
344 (void) ip_xmit_attr_free_mblk(ixamp);
345 return (B_FALSE);
346 }
347 ipst = ns->netstack_ip;
348
349 /* Verify the ill is still around */
350 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex,
351 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst);
352
353 /* We have the ill, hence the netstack can't go away */
354 netstack_rele(ns);
355 if (ill == NULL) {
356 /* Disappeared on us */
357 (void) ip_xmit_attr_free_mblk(ixamp);
358 return (B_FALSE);
359 }
360 /*
361 * Find the nce. We don't load-spread (only lookup nce's on the ill)
362 * because we want to find the same nce as the one we had when
363 * ip_xmit_attr_to_mblk was called.
364 */
365 if (ixm->ixm_flags & IXAF_IS_IPV4) {
366 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4);
367 } else {
368 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6);
369 }
370
371 /* We have the nce, hence the ill can't go away */
372 ill_refrele(ill);
373 if (nce == NULL) {
374 /*
375 * Since this is unusual and we don't know what type of
376 * nce it was, we drop the packet.
377 */
378 (void) ip_xmit_attr_free_mblk(ixamp);
379 return (B_FALSE);
380 }
381
382 ixa->ixa_flags = ixm->ixm_flags;
383 ixa->ixa_refcnt = 1;
384 ixa->ixa_ipst = ipst;
385 ixa->ixa_fragsize = ixm->ixm_fragsize;
386 ixa->ixa_pktlen = ixm->ixm_pktlen;
387 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length;
388 ixa->ixa_protocol = ixm->ixm_protocol;
389 ixa->ixa_nce = nce;
390 ixa->ixa_postfragfn = ixm->ixm_postfragfn;
391 ixa->ixa_zoneid = ixm->ixm_zoneid;
392 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid;
393 ixa->ixa_scopeid = ixm->ixm_scopeid;
394 ixa->ixa_ident = ixm->ixm_ident;
395 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint;
396
397 if (ixm->ixm_tsl != NULL) {
398 ixa->ixa_tsl = ixm->ixm_tsl;
399 ixa->ixa_free_flags |= IXA_FREE_TSL;
400 ixm->ixm_tsl = NULL;
401 }
402 if (ixm->ixm_cred != NULL) {
403 ixa->ixa_cred = ixm->ixm_cred;
404 ixa->ixa_free_flags |= IXA_FREE_CRED;
405 ixm->ixm_cred = NULL;
406 }
407 ixa->ixa_cpid = ixm->ixm_cpid;
408 ixa->ixa_conn_id = ixm->ixm_conn_id;
409
410 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa;
411 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa;
412 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy;
413 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action;
414 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch;
415
416 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0];
417 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1];
418 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port;
419 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port;
420 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type;
421 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code;
422 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf;
423 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0];
424 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1];
425 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2];
426 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3];
427 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0];
428 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1];
429 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2];
430 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3];
431 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx;
432 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx;
433 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto;
434
435 freeb(ixamp);
436 return (B_TRUE);
437 }
438
439 /*
440 * Free the ixm mblk and any references it holds
441 * Returns b_cont.
442 */
443 mblk_t *
ip_xmit_attr_free_mblk(mblk_t * ixamp)444 ip_xmit_attr_free_mblk(mblk_t *ixamp)
445 {
446 ixamblk_t *ixm;
447 mblk_t *mp;
448
449 /* Consume mp */
450 ASSERT(DB_TYPE(ixamp) == M_BREAK);
451 mp = ixamp->b_cont;
452
453 ixm = (ixamblk_t *)ixamp->b_rptr;
454 ASSERT(!ixm->ixm_inbound);
455
456 if (ixm->ixm_ipsec_ah_sa != NULL) {
457 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa);
458 ixm->ixm_ipsec_ah_sa = NULL;
459 }
460 if (ixm->ixm_ipsec_esp_sa != NULL) {
461 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa);
462 ixm->ixm_ipsec_esp_sa = NULL;
463 }
464 if (ixm->ixm_ipsec_policy != NULL) {
465 IPPOL_REFRELE(ixm->ixm_ipsec_policy);
466 ixm->ixm_ipsec_policy = NULL;
467 }
468 if (ixm->ixm_ipsec_action != NULL) {
469 IPACT_REFRELE(ixm->ixm_ipsec_action);
470 ixm->ixm_ipsec_action = NULL;
471 }
472 if (ixm->ixm_ipsec_latch) {
473 IPLATCH_REFRELE(ixm->ixm_ipsec_latch);
474 ixm->ixm_ipsec_latch = NULL;
475 }
476
477 if (ixm->ixm_tsl != NULL) {
478 label_rele(ixm->ixm_tsl);
479 ixm->ixm_tsl = NULL;
480 }
481 if (ixm->ixm_cred != NULL) {
482 crfree(ixm->ixm_cred);
483 ixm->ixm_cred = NULL;
484 }
485 freeb(ixamp);
486 return (mp);
487 }
488
489 /*
490 * Take the information in ip_recv_attr_t and stick it in an mblk
491 * that can later be passed to ip_recv_attr_from_mblk to recreate the
492 * ip_recv_attr_t.
493 *
494 * Returns NULL on memory allocation failure.
495 */
496 mblk_t *
ip_recv_attr_to_mblk(ip_recv_attr_t * ira)497 ip_recv_attr_to_mblk(ip_recv_attr_t *ira)
498 {
499 mblk_t *iramp;
500 iramblk_t *irm;
501 ill_t *ill = ira->ira_ill;
502
503 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0);
504
505 iramp = allocb(sizeof (*irm), BPRI_MED);
506 if (iramp == NULL)
507 return (NULL);
508
509 iramp->b_datap->db_type = M_BREAK;
510 iramp->b_wptr += sizeof (*irm);
511 irm = (iramblk_t *)iramp->b_rptr;
512
513 bzero(irm, sizeof (*irm));
514 irm->irm_inbound = B_TRUE;
515 irm->irm_flags = ira->ira_flags;
516 if (ill != NULL) {
517 /* Internal to IP - preserve ip_stack_t, ill and rill */
518 irm->irm_stackid =
519 ill->ill_ipst->ips_netstack->netstack_stackid;
520 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
521 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex ==
522 ira->ira_rifindex);
523 } else {
524 /* Let ip_recv_attr_from_stackid know there isn't one */
525 irm->irm_stackid = -1;
526 }
527 irm->irm_rifindex = ira->ira_rifindex;
528 irm->irm_ruifindex = ira->ira_ruifindex;
529 irm->irm_pktlen = ira->ira_pktlen;
530 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length;
531 irm->irm_protocol = ira->ira_protocol;
532
533 irm->irm_sqp = ira->ira_sqp;
534 irm->irm_ring = ira->ira_ring;
535
536 irm->irm_zoneid = ira->ira_zoneid;
537 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel;
538 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid;
539 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports;
540
541 if (ira->ira_tsl != NULL) {
542 irm->irm_tsl = ira->ira_tsl;
543 label_hold(irm->irm_tsl);
544 }
545 if (ira->ira_cred != NULL) {
546 irm->irm_cred = ira->ira_cred;
547 crhold(ira->ira_cred);
548 }
549 irm->irm_cpid = ira->ira_cpid;
550
551 if (ira->ira_flags & IRAF_L2SRC_SET)
552 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE);
553
554 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
555 if (ira->ira_ipsec_ah_sa != NULL) {
556 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa;
557 IPSA_REFHOLD(ira->ira_ipsec_ah_sa);
558 }
559 if (ira->ira_ipsec_esp_sa != NULL) {
560 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa;
561 IPSA_REFHOLD(ira->ira_ipsec_esp_sa);
562 }
563 if (ira->ira_ipsec_action != NULL) {
564 irm->irm_ipsec_action = ira->ira_ipsec_action;
565 IPACT_REFHOLD(ira->ira_ipsec_action);
566 }
567 }
568 return (iramp);
569 }
570
571 /*
572 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP
573 * then irm_stackid is not -1, in which case we check that the
574 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is
575 * not the case.
576 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter)
577 * and we just proceed with ira_ill and ira_rill as NULL.
578 *
579 * The caller needs to release any references on the pointers inside the ire
580 * by calling ira_cleanup.
581 */
582 boolean_t
ip_recv_attr_from_mblk(mblk_t * iramp,ip_recv_attr_t * ira)583 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira)
584 {
585 iramblk_t *irm;
586 netstack_t *ns;
587 ip_stack_t *ipst = NULL;
588 ill_t *ill = NULL, *rill = NULL;
589
590 /* We assume the caller hasn't initialized ira */
591 bzero(ira, sizeof (*ira));
592
593 ASSERT(DB_TYPE(iramp) == M_BREAK);
594 ASSERT(iramp->b_cont == NULL);
595
596 irm = (iramblk_t *)iramp->b_rptr;
597 ASSERT(irm->irm_inbound);
598
599 if (irm->irm_stackid != -1) {
600 /* Verify the netstack is still around */
601 ns = netstack_find_by_stackid(irm->irm_stackid);
602 if (ns == NULL) {
603 /* Disappeared on us */
604 (void) ip_recv_attr_free_mblk(iramp);
605 return (B_FALSE);
606 }
607 ipst = ns->netstack_ip;
608
609 /* Verify the ill is still around */
610 ill = ill_lookup_on_ifindex(irm->irm_ifindex,
611 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
612
613 if (irm->irm_ifindex == irm->irm_rifindex) {
614 rill = ill;
615 } else {
616 rill = ill_lookup_on_ifindex(irm->irm_rifindex,
617 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
618 }
619
620 /* We have the ill, hence the netstack can't go away */
621 netstack_rele(ns);
622 if (ill == NULL || rill == NULL) {
623 /* Disappeared on us */
624 if (ill != NULL)
625 ill_refrele(ill);
626 if (rill != NULL && rill != ill)
627 ill_refrele(rill);
628 (void) ip_recv_attr_free_mblk(iramp);
629 return (B_FALSE);
630 }
631 }
632
633 ira->ira_flags = irm->irm_flags;
634 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */
635 ira->ira_ill = ill;
636 ira->ira_rill = rill;
637
638 ira->ira_rifindex = irm->irm_rifindex;
639 ira->ira_ruifindex = irm->irm_ruifindex;
640 ira->ira_pktlen = irm->irm_pktlen;
641 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length;
642 ira->ira_protocol = irm->irm_protocol;
643
644 ira->ira_sqp = irm->irm_sqp;
645 /* The rest of IP assumes that the rings never go away. */
646 ira->ira_ring = irm->irm_ring;
647
648 ira->ira_zoneid = irm->irm_zoneid;
649 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel;
650 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid;
651 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports;
652
653 if (irm->irm_tsl != NULL) {
654 ira->ira_tsl = irm->irm_tsl;
655 ira->ira_free_flags |= IRA_FREE_TSL;
656 irm->irm_tsl = NULL;
657 }
658 if (irm->irm_cred != NULL) {
659 ira->ira_cred = irm->irm_cred;
660 ira->ira_free_flags |= IRA_FREE_CRED;
661 irm->irm_cred = NULL;
662 }
663 ira->ira_cpid = irm->irm_cpid;
664
665 if (ira->ira_flags & IRAF_L2SRC_SET)
666 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE);
667
668 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa;
669 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa;
670 ira->ira_ipsec_action = irm->irm_ipsec_action;
671
672 freeb(iramp);
673 return (B_TRUE);
674 }
675
676 /*
677 * Free the irm mblk and any references it holds
678 * Returns b_cont.
679 */
680 mblk_t *
ip_recv_attr_free_mblk(mblk_t * iramp)681 ip_recv_attr_free_mblk(mblk_t *iramp)
682 {
683 iramblk_t *irm;
684 mblk_t *mp;
685
686 /* Consume mp */
687 ASSERT(DB_TYPE(iramp) == M_BREAK);
688 mp = iramp->b_cont;
689
690 irm = (iramblk_t *)iramp->b_rptr;
691 ASSERT(irm->irm_inbound);
692
693 if (irm->irm_ipsec_ah_sa != NULL) {
694 IPSA_REFRELE(irm->irm_ipsec_ah_sa);
695 irm->irm_ipsec_ah_sa = NULL;
696 }
697 if (irm->irm_ipsec_esp_sa != NULL) {
698 IPSA_REFRELE(irm->irm_ipsec_esp_sa);
699 irm->irm_ipsec_esp_sa = NULL;
700 }
701 if (irm->irm_ipsec_action != NULL) {
702 IPACT_REFRELE(irm->irm_ipsec_action);
703 irm->irm_ipsec_action = NULL;
704 }
705 if (irm->irm_tsl != NULL) {
706 label_rele(irm->irm_tsl);
707 irm->irm_tsl = NULL;
708 }
709 if (irm->irm_cred != NULL) {
710 crfree(irm->irm_cred);
711 irm->irm_cred = NULL;
712 }
713
714 freeb(iramp);
715 return (mp);
716 }
717
718 /*
719 * Returns true if the mblk contains an ip_recv_attr_t
720 * For now we just check db_type.
721 */
722 boolean_t
ip_recv_attr_is_mblk(mblk_t * mp)723 ip_recv_attr_is_mblk(mblk_t *mp)
724 {
725 /*
726 * Need to handle the various forms of tcp_timermp which are tagged
727 * with b_wptr and might have a NULL b_datap.
728 */
729 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1)
730 return (B_FALSE);
731
732 #ifdef DEBUG
733 iramblk_t *irm;
734
735 if (DB_TYPE(mp) != M_BREAK)
736 return (B_FALSE);
737
738 irm = (iramblk_t *)mp->b_rptr;
739 ASSERT(irm->irm_inbound);
740 return (B_TRUE);
741 #else
742 return (DB_TYPE(mp) == M_BREAK);
743 #endif
744 }
745
746 static ip_xmit_attr_t *
conn_get_ixa_impl(conn_t * connp,boolean_t replace,int kmflag)747 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag)
748 {
749 ip_xmit_attr_t *ixa;
750 ip_xmit_attr_t *oldixa;
751
752 mutex_enter(&connp->conn_lock);
753 ixa = connp->conn_ixa;
754
755 /* At least one references for the conn_t */
756 ASSERT(ixa->ixa_refcnt >= 1);
757 if (atomic_inc_32_nv(&ixa->ixa_refcnt) == 2) {
758 /* No other thread using conn_ixa */
759 mutex_exit(&connp->conn_lock);
760 return (ixa);
761 }
762 ixa = kmem_alloc(sizeof (*ixa), kmflag);
763 if (ixa == NULL) {
764 mutex_exit(&connp->conn_lock);
765 ixa_refrele(connp->conn_ixa);
766 return (NULL);
767 }
768 ixa_safe_copy(connp->conn_ixa, ixa);
769
770 /* Make sure we drop conn_lock before any refrele */
771 if (replace) {
772 ixa->ixa_refcnt++; /* No atomic needed - not visible */
773 oldixa = connp->conn_ixa;
774 connp->conn_ixa = ixa;
775 mutex_exit(&connp->conn_lock);
776 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */
777 } else {
778 oldixa = connp->conn_ixa;
779 mutex_exit(&connp->conn_lock);
780 }
781 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */
782
783 return (ixa);
784 }
785
786 /*
787 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only
788 * the caller can access the ip_xmit_attr_t.
789 *
790 * If nobody else is using conn_ixa we return it.
791 * Otherwise we make a "safe" copy of conn_ixa
792 * and return it. The "safe" copy has the pointers set to NULL
793 * (since the pointers might be changed by another thread using
794 * conn_ixa). The caller needs to check for NULL pointers to see
795 * if ip_set_destination needs to be called to re-establish the pointers.
796 *
797 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t.
798 * That is used when we connect() the ULP.
799 */
800 ip_xmit_attr_t *
conn_get_ixa(conn_t * connp,boolean_t replace)801 conn_get_ixa(conn_t *connp, boolean_t replace)
802 {
803 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP));
804 }
805
806 /*
807 * Used only when the option is to have the kernel hang due to not
808 * cleaning up ixa references on ills etc.
809 */
810 ip_xmit_attr_t *
conn_get_ixa_tryhard(conn_t * connp,boolean_t replace)811 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace)
812 {
813 return (conn_get_ixa_impl(connp, replace, KM_SLEEP));
814 }
815
816 /*
817 * Replace conn_ixa with the ixa argument.
818 *
819 * The caller must hold conn_lock.
820 *
821 * We return the old ixa; the caller must ixa_refrele that after conn_lock
822 * has been dropped.
823 */
824 ip_xmit_attr_t *
conn_replace_ixa(conn_t * connp,ip_xmit_attr_t * ixa)825 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa)
826 {
827 ip_xmit_attr_t *oldixa;
828
829 ASSERT(MUTEX_HELD(&connp->conn_lock));
830
831 oldixa = connp->conn_ixa;
832 IXA_REFHOLD(ixa);
833 ixa->ixa_conn_id = oldixa->ixa_conn_id;
834 connp->conn_ixa = ixa;
835 return (oldixa);
836 }
837
838 /*
839 * Return a ip_xmit_attr_t to use with a conn_t that is based on but
840 * separate from conn_ixa.
841 *
842 * This "safe" copy has the pointers set to NULL
843 * (since the pointers might be changed by another thread using
844 * conn_ixa). The caller needs to check for NULL pointers to see
845 * if ip_set_destination needs to be called to re-establish the pointers.
846 */
847 ip_xmit_attr_t *
conn_get_ixa_exclusive(conn_t * connp)848 conn_get_ixa_exclusive(conn_t *connp)
849 {
850 ip_xmit_attr_t *ixa;
851
852 mutex_enter(&connp->conn_lock);
853 ixa = connp->conn_ixa;
854
855 /* At least one references for the conn_t */
856 ASSERT(ixa->ixa_refcnt >= 1);
857
858 /* Make sure conn_ixa doesn't disappear while we copy it */
859 atomic_inc_32(&ixa->ixa_refcnt);
860
861 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
862 if (ixa == NULL) {
863 mutex_exit(&connp->conn_lock);
864 ixa_refrele(connp->conn_ixa);
865 return (NULL);
866 }
867 ixa_safe_copy(connp->conn_ixa, ixa);
868 mutex_exit(&connp->conn_lock);
869 IXA_REFRELE(connp->conn_ixa);
870 return (ixa);
871 }
872
873 void
ixa_safe_copy(ip_xmit_attr_t * src,ip_xmit_attr_t * ixa)874 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
875 {
876 bcopy(src, ixa, sizeof (*ixa));
877 ixa->ixa_refcnt = 1;
878 /*
879 * Clear any pointers that have references and might be changed
880 * by ip_set_destination or the ULP
881 */
882 ixa->ixa_ire = NULL;
883 ixa->ixa_nce = NULL;
884 ixa->ixa_dce = NULL;
885 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
886 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
887 #ifdef DEBUG
888 ixa->ixa_curthread = NULL;
889 #endif
890 /* Clear all the IPsec pointers and the flag as well. */
891 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
892
893 ixa->ixa_ipsec_latch = NULL;
894 ixa->ixa_ipsec_ah_sa = NULL;
895 ixa->ixa_ipsec_esp_sa = NULL;
896 ixa->ixa_ipsec_policy = NULL;
897 ixa->ixa_ipsec_action = NULL;
898
899 /*
900 * We leave ixa_tsl unchanged, but if it has a refhold we need
901 * to get an extra refhold.
902 */
903 if (ixa->ixa_free_flags & IXA_FREE_TSL)
904 label_hold(ixa->ixa_tsl);
905
906 /*
907 * We leave ixa_cred unchanged, but if it has a refhold we need
908 * to get an extra refhold.
909 */
910 if (ixa->ixa_free_flags & IXA_FREE_CRED)
911 crhold(ixa->ixa_cred);
912
913 /*
914 * There is no cleanup in progress on this new copy.
915 */
916 ixa->ixa_tcpcleanup = IXATC_IDLE;
917 }
918
919 /*
920 * Duplicate an ip_xmit_attr_t.
921 * Assumes that the caller controls the ixa, hence we do not need to use
922 * a safe copy. We just have to increase the refcnt on any pointers.
923 */
924 ip_xmit_attr_t *
ip_xmit_attr_duplicate(ip_xmit_attr_t * src_ixa)925 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa)
926 {
927 ip_xmit_attr_t *ixa;
928
929 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
930 if (ixa == NULL)
931 return (NULL);
932 bcopy(src_ixa, ixa, sizeof (*ixa));
933 ixa->ixa_refcnt = 1;
934
935 if (ixa->ixa_ire != NULL)
936 ire_refhold_notr(ixa->ixa_ire);
937 if (ixa->ixa_nce != NULL)
938 nce_refhold(ixa->ixa_nce);
939 if (ixa->ixa_dce != NULL)
940 dce_refhold_notr(ixa->ixa_dce);
941
942 #ifdef DEBUG
943 ixa->ixa_curthread = NULL;
944 #endif
945
946 if (ixa->ixa_ipsec_latch != NULL)
947 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
948 if (ixa->ixa_ipsec_ah_sa != NULL)
949 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
950 if (ixa->ixa_ipsec_esp_sa != NULL)
951 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
952 if (ixa->ixa_ipsec_policy != NULL)
953 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
954 if (ixa->ixa_ipsec_action != NULL)
955 IPACT_REFHOLD(ixa->ixa_ipsec_action);
956
957 if (ixa->ixa_tsl != NULL) {
958 label_hold(ixa->ixa_tsl);
959 ixa->ixa_free_flags |= IXA_FREE_TSL;
960 }
961 if (ixa->ixa_cred != NULL) {
962 crhold(ixa->ixa_cred);
963 ixa->ixa_free_flags |= IXA_FREE_CRED;
964 }
965 return (ixa);
966 }
967
968 /*
969 * Used to replace the ixa_label field.
970 * The caller should have a reference on the label, which we transfer to
971 * the attributes so that when the attribute is freed/cleaned up
972 * we will release that reference.
973 */
974 void
ip_xmit_attr_replace_tsl(ip_xmit_attr_t * ixa,ts_label_t * tsl)975 ip_xmit_attr_replace_tsl(ip_xmit_attr_t *ixa, ts_label_t *tsl)
976 {
977 ASSERT(tsl != NULL);
978
979 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
980 ASSERT(ixa->ixa_tsl != NULL);
981 label_rele(ixa->ixa_tsl);
982 } else {
983 ixa->ixa_free_flags |= IXA_FREE_TSL;
984 }
985 ixa->ixa_tsl = tsl;
986 }
987
988 /*
989 * Replace the ip_recv_attr_t's label.
990 * Due to kernel RPC's use of db_credp we also need to replace ira_cred;
991 * TCP/UDP uses ira_cred to set db_credp for non-socket users.
992 * This can fail (and return B_FALSE) due to lack of memory.
993 */
994 boolean_t
ip_recv_attr_replace_label(ip_recv_attr_t * ira,ts_label_t * tsl)995 ip_recv_attr_replace_label(ip_recv_attr_t *ira, ts_label_t *tsl)
996 {
997 cred_t *newcr;
998
999 if (ira->ira_free_flags & IRA_FREE_TSL) {
1000 ASSERT(ira->ira_tsl != NULL);
1001 label_rele(ira->ira_tsl);
1002 }
1003 label_hold(tsl);
1004 ira->ira_tsl = tsl;
1005 ira->ira_free_flags |= IRA_FREE_TSL;
1006
1007 /*
1008 * Reset zoneid if we have a shared address. That allows
1009 * ip_fanout_tx_v4/v6 to determine the zoneid again.
1010 */
1011 if (ira->ira_flags & IRAF_TX_SHARED_ADDR)
1012 ira->ira_zoneid = ALL_ZONES;
1013
1014 /* We update ira_cred for RPC */
1015 newcr = copycred_from_tslabel(ira->ira_cred, ira->ira_tsl, KM_NOSLEEP);
1016 if (newcr == NULL)
1017 return (B_FALSE);
1018 if (ira->ira_free_flags & IRA_FREE_CRED)
1019 crfree(ira->ira_cred);
1020 ira->ira_cred = newcr;
1021 ira->ira_free_flags |= IRA_FREE_CRED;
1022 return (B_TRUE);
1023 }
1024
1025 /*
1026 * This needs to be called after ip_set_destination/tsol_check_dest might
1027 * have changed ixa_tsl to be specific for a destination, and we now want to
1028 * send to a different destination.
1029 * We have to restart with crgetlabel() since ip_set_destination/
1030 * tsol_check_dest will start with ixa_tsl.
1031 */
1032 void
ip_xmit_attr_restore_tsl(ip_xmit_attr_t * ixa,cred_t * cr)1033 ip_xmit_attr_restore_tsl(ip_xmit_attr_t *ixa, cred_t *cr)
1034 {
1035 if (!is_system_labeled())
1036 return;
1037
1038 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1039 ASSERT(ixa->ixa_tsl != NULL);
1040 label_rele(ixa->ixa_tsl);
1041 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1042 }
1043 ixa->ixa_tsl = crgetlabel(cr);
1044 }
1045
1046 void
ixa_refrele(ip_xmit_attr_t * ixa)1047 ixa_refrele(ip_xmit_attr_t *ixa)
1048 {
1049 IXA_REFRELE(ixa);
1050 }
1051
1052 void
ixa_inactive(ip_xmit_attr_t * ixa)1053 ixa_inactive(ip_xmit_attr_t *ixa)
1054 {
1055 ASSERT(ixa->ixa_refcnt == 0);
1056
1057 ixa_cleanup(ixa);
1058 kmem_free(ixa, sizeof (*ixa));
1059 }
1060
1061 /*
1062 * Release any references contained in the ixa.
1063 * Also clear any fields that are not controlled by ixa_flags.
1064 */
1065 void
ixa_cleanup(ip_xmit_attr_t * ixa)1066 ixa_cleanup(ip_xmit_attr_t *ixa)
1067 {
1068 if (ixa->ixa_ire != NULL) {
1069 ire_refrele_notr(ixa->ixa_ire);
1070 ixa->ixa_ire = NULL;
1071 }
1072 if (ixa->ixa_dce != NULL) {
1073 dce_refrele_notr(ixa->ixa_dce);
1074 ixa->ixa_dce = NULL;
1075 }
1076 if (ixa->ixa_nce != NULL) {
1077 nce_refrele(ixa->ixa_nce);
1078 ixa->ixa_nce = NULL;
1079 }
1080 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1081 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1082 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
1083 ipsec_out_release_refs(ixa);
1084 }
1085 if (ixa->ixa_free_flags & IXA_FREE_TSL) {
1086 ASSERT(ixa->ixa_tsl != NULL);
1087 label_rele(ixa->ixa_tsl);
1088 ixa->ixa_free_flags &= ~IXA_FREE_TSL;
1089 }
1090 ixa->ixa_tsl = NULL;
1091 if (ixa->ixa_free_flags & IXA_FREE_CRED) {
1092 ASSERT(ixa->ixa_cred != NULL);
1093 crfree(ixa->ixa_cred);
1094 ixa->ixa_free_flags &= ~IXA_FREE_CRED;
1095 }
1096 ixa->ixa_cred = NULL;
1097 ixa->ixa_src_preferences = 0;
1098 ixa->ixa_ifindex = 0;
1099 ixa->ixa_multicast_ifindex = 0;
1100 ixa->ixa_multicast_ifaddr = INADDR_ANY;
1101 }
1102
1103 /*
1104 * Release any references contained in the ira.
1105 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second
1106 * argument.
1107 */
1108 void
ira_cleanup(ip_recv_attr_t * ira,boolean_t refrele_ill)1109 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill)
1110 {
1111 if (ira->ira_ill != NULL) {
1112 if (ira->ira_rill != ira->ira_ill) {
1113 /* Caused by async processing */
1114 ill_refrele(ira->ira_rill);
1115 }
1116 if (refrele_ill)
1117 ill_refrele(ira->ira_ill);
1118 }
1119 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1120 ipsec_in_release_refs(ira);
1121 }
1122 if (ira->ira_free_flags & IRA_FREE_TSL) {
1123 ASSERT(ira->ira_tsl != NULL);
1124 label_rele(ira->ira_tsl);
1125 ira->ira_free_flags &= ~IRA_FREE_TSL;
1126 }
1127 ira->ira_tsl = NULL;
1128 if (ira->ira_free_flags & IRA_FREE_CRED) {
1129 ASSERT(ira->ira_cred != NULL);
1130 crfree(ira->ira_cred);
1131 ira->ira_free_flags &= ~IRA_FREE_CRED;
1132 }
1133 ira->ira_cred = NULL;
1134 }
1135
1136 /*
1137 * Function to help release any IRE, NCE, or DCEs that
1138 * have been deleted and are marked as condemned.
1139 * The caller is responsible for any serialization which is different
1140 * for TCP, SCTP, and others.
1141 */
1142 static void
ixa_cleanup_stale(ip_xmit_attr_t * ixa)1143 ixa_cleanup_stale(ip_xmit_attr_t *ixa)
1144 {
1145 ire_t *ire;
1146 nce_t *nce;
1147 dce_t *dce;
1148
1149 ire = ixa->ixa_ire;
1150 nce = ixa->ixa_nce;
1151 dce = ixa->ixa_dce;
1152
1153 if (ire != NULL && IRE_IS_CONDEMNED(ire)) {
1154 ire_refrele_notr(ire);
1155 ire = ire_blackhole(ixa->ixa_ipst,
1156 !(ixa->ixa_flags & IXAF_IS_IPV4));
1157 ASSERT(ire != NULL);
1158 #ifdef DEBUG
1159 ire_refhold_notr(ire);
1160 ire_refrele(ire);
1161 #endif
1162 ixa->ixa_ire = ire;
1163 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1164 }
1165 if (nce != NULL && nce->nce_is_condemned) {
1166 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */
1167 nce_refrele(nce);
1168 ixa->ixa_nce = NULL;
1169 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1170 }
1171 if (dce != NULL && DCE_IS_CONDEMNED(dce)) {
1172 dce_refrele_notr(dce);
1173 dce = dce_get_default(ixa->ixa_ipst);
1174 ASSERT(dce != NULL);
1175 #ifdef DEBUG
1176 dce_refhold_notr(dce);
1177 dce_refrele(dce);
1178 #endif
1179 ixa->ixa_dce = dce;
1180 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1181 }
1182 }
1183
1184 static mblk_t *
tcp_ixa_cleanup_getmblk(conn_t * connp)1185 tcp_ixa_cleanup_getmblk(conn_t *connp)
1186 {
1187 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1188 int need_retry;
1189 mblk_t *mp;
1190
1191 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1192
1193 /*
1194 * It's possible that someone else came in and started cleaning up
1195 * another connection between the time we verified this one is not being
1196 * cleaned up and the time we actually get the shared mblk. If that's
1197 * the case, we've dropped the lock, and some other thread may have
1198 * cleaned up this connection again, and is still waiting for
1199 * notification of that cleanup's completion. Therefore we need to
1200 * recheck.
1201 */
1202 do {
1203 need_retry = 0;
1204 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
1205 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1206 &tcps->tcps_ixa_cleanup_lock);
1207 }
1208
1209 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
1210 /*
1211 * Multiple concurrent cleanups; need to have the last
1212 * one run since it could be an unplumb.
1213 */
1214 need_retry = 1;
1215 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
1216 &tcps->tcps_ixa_cleanup_lock);
1217 }
1218 } while (need_retry);
1219
1220 /*
1221 * We now have the lock and the mblk; now make sure that no one else can
1222 * try to clean up this connection or enqueue it for cleanup, clear the
1223 * mblk pointer for this stack, drop the lock, and return the mblk.
1224 */
1225 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
1226 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
1227 ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
1228 ASSERT(mp != NULL);
1229
1230 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
1231 tcps->tcps_ixa_cleanup_mp = NULL;
1232 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1233
1234 return (mp);
1235 }
1236
1237 /*
1238 * Used to run ixa_cleanup_stale inside the tcp squeue.
1239 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
1240 * and waking up the caller.
1241 */
1242 /* ARGSUSED2 */
1243 static void
tcp_ixa_cleanup(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)1244 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
1245 ip_recv_attr_t *dummy)
1246 {
1247 conn_t *connp = (conn_t *)arg;
1248 tcp_stack_t *tcps;
1249
1250 tcps = connp->conn_netstack->netstack_tcp;
1251
1252 ixa_cleanup_stale(connp->conn_ixa);
1253
1254 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1255 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
1256 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
1257 tcps->tcps_ixa_cleanup_mp = mp;
1258 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
1259 /*
1260 * It is possible for any number of threads to be waiting for cleanup of
1261 * different connections. Absent a per-connection (or per-IXA) CV, we
1262 * need to wake them all up even though only one can be waiting on this
1263 * particular cleanup.
1264 */
1265 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1266 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1267 }
1268
1269 static void
tcp_ixa_cleanup_wait_and_finish(conn_t * connp)1270 tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
1271 {
1272 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1273
1274 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1275
1276 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);
1277
1278 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
1279 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1280 &tcps->tcps_ixa_cleanup_lock);
1281 }
1282
1283 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
1284 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
1285 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1286
1287 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1288 }
1289
1290 /*
1291 * ipcl_walk() function to help release any IRE, NCE, or DCEs that
1292 * have been deleted and are marked as condemned.
1293 * Note that we can't cleanup the pointers since there can be threads
1294 * in conn_ip_output() sending while we are called.
1295 */
1296 void
conn_ixa_cleanup(conn_t * connp,void * arg)1297 conn_ixa_cleanup(conn_t *connp, void *arg)
1298 {
1299 boolean_t tryhard = (boolean_t)arg;
1300
1301 if (IPCL_IS_TCP(connp)) {
1302 mblk_t *mp;
1303
1304 mp = tcp_ixa_cleanup_getmblk(connp);
1305
1306 if (connp->conn_sqp->sq_run == curthread) {
1307 /* Already on squeue */
1308 tcp_ixa_cleanup(connp, mp, NULL, NULL);
1309 } else {
1310 CONN_INC_REF(connp);
1311 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
1312 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
1313 }
1314 tcp_ixa_cleanup_wait_and_finish(connp);
1315 } else if (IPCL_IS_SCTP(connp)) {
1316 sctp_t *sctp;
1317 sctp_faddr_t *fp;
1318
1319 sctp = CONN2SCTP(connp);
1320 RUN_SCTP(sctp);
1321 ixa_cleanup_stale(connp->conn_ixa);
1322 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next)
1323 ixa_cleanup_stale(fp->sf_ixa);
1324 WAKE_SCTP(sctp);
1325 } else {
1326 ip_xmit_attr_t *ixa;
1327
1328 /*
1329 * If there is a different thread using conn_ixa then we get a
1330 * new copy and cut the old one loose from conn_ixa. Otherwise
1331 * we use conn_ixa and prevent any other thread from
1332 * using/changing it. Anybody using conn_ixa (e.g., a thread in
1333 * conn_ip_output) will do an ixa_refrele which will remove any
1334 * references on the ire etc.
1335 *
1336 * Once we are done other threads can use conn_ixa since the
1337 * refcnt will be back at one.
1338 *
1339 * We are called either because an ill is going away, or
1340 * due to memory reclaim. In the former case we wait for
1341 * memory since we must remove the refcnts on the ill.
1342 */
1343 if (tryhard) {
1344 ixa = conn_get_ixa_tryhard(connp, B_TRUE);
1345 ASSERT(ixa != NULL);
1346 } else {
1347 ixa = conn_get_ixa(connp, B_TRUE);
1348 if (ixa == NULL) {
1349 /*
1350 * Somebody else was using it and kmem_alloc
1351 * failed! Next memory reclaim will try to
1352 * clean up.
1353 */
1354 DTRACE_PROBE1(conn__ixa__cleanup__bail,
1355 conn_t *, connp);
1356 return;
1357 }
1358 }
1359 ixa_cleanup_stale(ixa);
1360 ixa_refrele(ixa);
1361 }
1362 }
1363
1364 /*
1365 * ixa needs to be an exclusive copy so that no one changes the cookie
1366 * or the ixa_nce.
1367 */
1368 boolean_t
ixa_check_drain_insert(conn_t * connp,ip_xmit_attr_t * ixa)1369 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa)
1370 {
1371 uintptr_t cookie = ixa->ixa_cookie;
1372 ill_dld_direct_t *idd;
1373 idl_tx_list_t *idl_txl;
1374 ill_t *ill = ixa->ixa_nce->nce_ill;
1375 boolean_t inserted = B_FALSE;
1376
1377 idd = &(ill)->ill_dld_capab->idc_direct;
1378 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
1379 mutex_enter(&idl_txl->txl_lock);
1380
1381 /*
1382 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow
1383 * control is asserted on an ill that does not support direct calls.
1384 * Jump to insert.
1385 */
1386 if (cookie == 0)
1387 goto tryinsert;
1388
1389 ASSERT(ILL_DIRECT_CAPABLE(ill));
1390
1391 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) {
1392 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie);
1393 } else if (idl_txl->txl_cookie != NULL &&
1394 idl_txl->txl_cookie != ixa->ixa_cookie) {
1395 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie,
1396 uintptr_t, idl_txl->txl_cookie);
1397 /* TODO: bump kstat for cookie collision */
1398 } else {
1399 /*
1400 * Check/set conn_blocked under conn_lock. Note that txl_lock
1401 * will not suffice since two separate UDP threads may be
1402 * racing to send to different destinations that are
1403 * associated with different cookies and thus may not be
1404 * holding the same txl_lock. Further, since a given conn_t
1405 * can only be on a single drain list, the conn_t will be
1406 * enqueued on whichever thread wins this race.
1407 */
1408 tryinsert: mutex_enter(&connp->conn_lock);
1409 if (connp->conn_blocked) {
1410 DTRACE_PROBE1(ill__tx__conn__already__blocked,
1411 conn_t *, connp);
1412 mutex_exit(&connp->conn_lock);
1413 } else {
1414 connp->conn_blocked = B_TRUE;
1415 mutex_exit(&connp->conn_lock);
1416 idl_txl->txl_cookie = cookie;
1417 conn_drain_insert(connp, idl_txl);
1418 if (!IPCL_IS_NONSTR(connp))
1419 noenable(connp->conn_wq);
1420 inserted = B_TRUE;
1421 }
1422 }
1423 mutex_exit(&idl_txl->txl_lock);
1424 return (inserted);
1425 }
1426