1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2021 Joyent, Inc.
26 */
27
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/dlpi.h>
31 #include <sys/stropts.h>
32 #include <sys/sysmacros.h>
33 #include <sys/strsun.h>
34 #include <sys/strlog.h>
35 #include <sys/strsubr.h>
36 #define _SUN_TPI_VERSION 2
37 #include <sys/tihdr.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/cmn_err.h>
41 #include <sys/debug.h>
42 #include <sys/sdt.h>
43 #include <sys/kobj.h>
44 #include <sys/zone.h>
45 #include <sys/neti.h>
46 #include <sys/hook.h>
47
48 #include <sys/kmem.h>
49 #include <sys/systm.h>
50 #include <sys/param.h>
51 #include <sys/socket.h>
52 #include <sys/vtrace.h>
53 #include <sys/isa_defs.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/mac.h>
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_dl.h>
61 #include <sys/sockio.h>
62 #include <netinet/in.h>
63 #include <netinet/ip6.h>
64 #include <netinet/icmp6.h>
65 #include <netinet/sctp.h>
66
67 #include <inet/common.h>
68 #include <inet/mi.h>
69 #include <inet/optcom.h>
70 #include <inet/mib2.h>
71 #include <inet/nd.h>
72 #include <inet/arp.h>
73
74 #include <inet/ip.h>
75 #include <inet/ip_impl.h>
76 #include <inet/ip6.h>
77 #include <inet/ip6_asp.h>
78 #include <inet/tcp.h>
79 #include <inet/tcp_impl.h>
80 #include <inet/udp_impl.h>
81 #include <inet/ipp_common.h>
82
83 #include <inet/ip_multi.h>
84 #include <inet/ip_if.h>
85 #include <inet/ip_ire.h>
86 #include <inet/ip_rts.h>
87 #include <inet/ip_ndp.h>
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/iptun/iptun_impl.h>
92 #include <inet/sctp_ip.h>
93 #include <sys/pattr.h>
94 #include <inet/ipclassifier.h>
95 #include <inet/ipsecah.h>
96 #include <inet/rawip_impl.h>
97 #include <inet/rts_impl.h>
98 #include <sys/squeue_impl.h>
99 #include <sys/squeue.h>
100
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103
104 /* Temporary; for CR 6451644 work-around */
105 #include <sys/ethernet.h>
106
107 /*
108 * Naming conventions:
109 * These rules should be judiciously applied
110 * if there is a need to identify something as IPv6 versus IPv4
111 * IPv6 funcions will end with _v6 in the ip module.
112 * IPv6 funcions will end with _ipv6 in the transport modules.
113 * IPv6 macros:
114 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
115 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
116 * And then there are ..V4_PART_OF_V6.
117 * The intent is that macros in the ip module end with _V6.
118 * IPv6 global variables will start with ipv6_
119 * IPv6 structures will start with ipv6
120 * IPv6 defined constants should start with IPV6_
121 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
122 */
123
124 /*
125 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
126 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
127 * from IANA. This mechanism will remain in effect until an official
128 * number is obtained.
129 */
130 uchar_t ip6opt_ls;
131
132 const in6_addr_t ipv6_all_ones =
133 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
134 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
135
136 #ifdef _BIG_ENDIAN
137 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
138 #else /* _BIG_ENDIAN */
139 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
140 #endif /* _BIG_ENDIAN */
141
142 #ifdef _BIG_ENDIAN
143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
144 #else /* _BIG_ENDIAN */
145 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
146 #endif /* _BIG_ENDIAN */
147
148 #ifdef _BIG_ENDIAN
149 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
150 #else /* _BIG_ENDIAN */
151 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
152 #endif /* _BIG_ENDIAN */
153
154 #ifdef _BIG_ENDIAN
155 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
156 #else /* _BIG_ENDIAN */
157 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
158 #endif /* _BIG_ENDIAN */
159
160 #ifdef _BIG_ENDIAN
161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
162 #else /* _BIG_ENDIAN */
163 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
164 #endif /* _BIG_ENDIAN */
165
166 #ifdef _BIG_ENDIAN
167 const in6_addr_t ipv6_solicited_node_mcast =
168 { 0xff020000U, 0, 0x00000001U, 0xff000000U };
169 #else /* _BIG_ENDIAN */
170 const in6_addr_t ipv6_solicited_node_mcast =
171 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
172 #endif /* _BIG_ENDIAN */
173
174 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
175 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
176 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
177 ip_recv_attr_t *);
178 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
179 ip_recv_attr_t *);
180 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
181 in6_addr_t *, ip_recv_attr_t *);
182 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
183 ip_recv_attr_t *);
184 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
185
186 /*
187 * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
188 * If the ICMP message is consumed by IP, i.e., it should not be delivered
189 * to any IPPROTO_ICMP raw sockets, then it returns NULL.
190 * Likewise, if the ICMP error is misformed (too short, etc), then it
191 * returns NULL. The caller uses this to determine whether or not to send
192 * to raw sockets.
193 *
194 * All error messages are passed to the matching transport stream.
195 *
196 * See comment for icmp_inbound_v4() on how IPsec is handled.
197 */
198 mblk_t *
icmp_inbound_v6(mblk_t * mp,ip_recv_attr_t * ira)199 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
200 {
201 icmp6_t *icmp6;
202 ip6_t *ip6h; /* Outer header */
203 int ip_hdr_length; /* Outer header length */
204 boolean_t interested;
205 ill_t *ill = ira->ira_ill;
206 ip_stack_t *ipst = ill->ill_ipst;
207 mblk_t *mp_ret = NULL;
208
209 ip6h = (ip6_t *)mp->b_rptr;
210
211 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
212
213 /* Check for Martian packets */
214 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
215 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
216 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
217 freemsg(mp);
218 return (NULL);
219 }
220
221 /* Make sure ira_l2src is set for ndp_input */
222 if (!(ira->ira_flags & IRAF_L2SRC_SET))
223 ip_setl2src(mp, ira, ira->ira_rill);
224
225 ip_hdr_length = ira->ira_ip_hdr_length;
226 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
227 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
229 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
230 freemsg(mp);
231 return (NULL);
232 }
233 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
234 if (ip6h == NULL) {
235 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
236 freemsg(mp);
237 return (NULL);
238 }
239 }
240
241 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
242 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
243 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
244 icmp6->icmp6_code));
245
246 /*
247 * We will set "interested" to "true" if we should pass a copy to
248 * the transport i.e., if it is an error message.
249 */
250 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
251
252 switch (icmp6->icmp6_type) {
253 case ICMP6_DST_UNREACH:
254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
255 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
256 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
257 break;
258
259 case ICMP6_TIME_EXCEEDED:
260 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
261 break;
262
263 case ICMP6_PARAM_PROB:
264 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
265 break;
266
267 case ICMP6_PACKET_TOO_BIG:
268 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
269 break;
270
271 case ICMP6_ECHO_REQUEST:
272 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
273 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
274 !ipst->ips_ipv6_resp_echo_mcast)
275 break;
276
277 /*
278 * We must have exclusive use of the mblk to convert it to
279 * a response.
280 * If not, we copy it.
281 */
282 if (mp->b_datap->db_ref > 1) {
283 mblk_t *mp1;
284
285 mp1 = copymsg(mp);
286 if (mp1 == NULL) {
287 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
288 ip_drop_input("ipIfStatsInDiscards - copymsg",
289 mp, ill);
290 freemsg(mp);
291 return (NULL);
292 }
293 freemsg(mp);
294 mp = mp1;
295 ip6h = (ip6_t *)mp->b_rptr;
296 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
297 }
298
299 icmp6->icmp6_type = ICMP6_ECHO_REPLY;
300 icmp_send_reply_v6(mp, ip6h, icmp6, ira);
301 return (NULL);
302
303 case ICMP6_ECHO_REPLY:
304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
305 break;
306
307 case ND_ROUTER_SOLICIT:
308 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
309 break;
310
311 case ND_ROUTER_ADVERT:
312 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
313 break;
314
315 case ND_NEIGHBOR_SOLICIT:
316 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
317 ndp_input(mp, ira);
318 return (NULL);
319
320 case ND_NEIGHBOR_ADVERT:
321 BUMP_MIB(ill->ill_icmp6_mib,
322 ipv6IfIcmpInNeighborAdvertisements);
323 ndp_input(mp, ira);
324 return (NULL);
325
326 case ND_REDIRECT:
327 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
328
329 if (ipst->ips_ipv6_ignore_redirect)
330 break;
331
332 /* We now allow a RAW socket to receive this. */
333 interested = B_TRUE;
334 break;
335
336 /*
337 * The next three icmp messages will be handled by MLD.
338 * Pass all valid MLD packets up to any process(es)
339 * listening on a raw ICMP socket.
340 */
341 case MLD_LISTENER_QUERY:
342 case MLD_LISTENER_REPORT:
343 case MLD_LISTENER_REDUCTION:
344 mp = mld_input(mp, ira);
345 return (mp);
346 default:
347 break;
348 }
349 /*
350 * See if there is an ICMP client to avoid an extra copymsg/freemsg
351 * if there isn't one.
352 */
353 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
354 /* If there is an ICMP client and we want one too, copy it. */
355
356 if (!interested) {
357 /* Caller will deliver to RAW sockets */
358 return (mp);
359 }
360 mp_ret = copymsg(mp);
361 if (mp_ret == NULL) {
362 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
363 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
364 }
365 } else if (!interested) {
366 /* Neither we nor raw sockets are interested. Drop packet now */
367 freemsg(mp);
368 return (NULL);
369 }
370
371 /*
372 * ICMP error or redirect packet. Make sure we have enough of
373 * the header and that db_ref == 1 since we might end up modifying
374 * the packet.
375 */
376 if (mp->b_cont != NULL) {
377 if (ip_pullup(mp, -1, ira) == NULL) {
378 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
379 ip_drop_input("ipIfStatsInDiscards - ip_pullup",
380 mp, ill);
381 freemsg(mp);
382 return (mp_ret);
383 }
384 }
385
386 if (mp->b_datap->db_ref > 1) {
387 mblk_t *mp1;
388
389 mp1 = copymsg(mp);
390 if (mp1 == NULL) {
391 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
392 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
393 freemsg(mp);
394 return (mp_ret);
395 }
396 freemsg(mp);
397 mp = mp1;
398 }
399
400 /*
401 * In case mp has changed, verify the message before any further
402 * processes.
403 */
404 ip6h = (ip6_t *)mp->b_rptr;
405 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
406 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
407 freemsg(mp);
408 return (mp_ret);
409 }
410
411 switch (icmp6->icmp6_type) {
412 case ND_REDIRECT:
413 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
414 break;
415 case ICMP6_PACKET_TOO_BIG:
416 /* Update DCE and adjust MTU is icmp header if needed */
417 icmp_inbound_too_big_v6(icmp6, ira);
418 /* FALLTHROUGH */
419 default:
420 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
421 break;
422 }
423
424 return (mp_ret);
425 }
426
427 /*
428 * Send an ICMP echo reply.
429 * The caller has already updated the payload part of the packet.
430 * We handle the ICMP checksum, IP source address selection and feed
431 * the packet into ip_output_simple.
432 */
433 static void
icmp_send_reply_v6(mblk_t * mp,ip6_t * ip6h,icmp6_t * icmp6,ip_recv_attr_t * ira)434 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
435 ip_recv_attr_t *ira)
436 {
437 uint_t ip_hdr_length = ira->ira_ip_hdr_length;
438 ill_t *ill = ira->ira_ill;
439 ip_stack_t *ipst = ill->ill_ipst;
440 ip_xmit_attr_t ixas;
441 in6_addr_t origsrc;
442
443 /*
444 * Remove any extension headers (do not reverse a source route)
445 * and clear the flow id (keep traffic class for now).
446 */
447 if (ip_hdr_length != IPV6_HDR_LEN) {
448 int i;
449
450 for (i = 0; i < IPV6_HDR_LEN; i++) {
451 mp->b_rptr[ip_hdr_length - i - 1] =
452 mp->b_rptr[IPV6_HDR_LEN - i - 1];
453 }
454 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
455 ip6h = (ip6_t *)mp->b_rptr;
456 ip6h->ip6_nxt = IPPROTO_ICMPV6;
457 i = ntohs(ip6h->ip6_plen);
458 i -= (ip_hdr_length - IPV6_HDR_LEN);
459 ip6h->ip6_plen = htons(i);
460 ip_hdr_length = IPV6_HDR_LEN;
461 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
462 }
463 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
464
465 /* Reverse the source and destination addresses. */
466 origsrc = ip6h->ip6_src;
467 ip6h->ip6_src = ip6h->ip6_dst;
468 ip6h->ip6_dst = origsrc;
469
470 /* set the hop limit */
471 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
472
473 /*
474 * Prepare for checksum by putting icmp length in the icmp
475 * checksum field. The checksum is calculated in ip_output
476 */
477 icmp6->icmp6_cksum = ip6h->ip6_plen;
478
479 bzero(&ixas, sizeof (ixas));
480 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
481 ixas.ixa_zoneid = ira->ira_zoneid;
482 ixas.ixa_cred = kcred;
483 ixas.ixa_cpid = NOPID;
484 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
485 ixas.ixa_ifindex = 0;
486 ixas.ixa_ipst = ipst;
487 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
488
489 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
490 /*
491 * This packet should go out the same way as it
492 * came in i.e in clear, independent of the IPsec
493 * policy for transmitting packets.
494 */
495 ixas.ixa_flags |= IXAF_NO_IPSEC;
496 } else {
497 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
499 /* Note: mp already consumed and ip_drop_packet done */
500 return;
501 }
502 }
503
504 /* Was the destination (now source) link-local? Send out same group */
505 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
506 ixas.ixa_flags |= IXAF_SCOPEID_SET;
507 if (IS_UNDER_IPMP(ill))
508 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
509 else
510 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
511 }
512
513 if (ira->ira_flags & IRAF_MULTIBROADCAST) {
514 /*
515 * Not one or our addresses (IRE_LOCALs), thus we let
516 * ip_output_simple pick the source.
517 */
518 ip6h->ip6_src = ipv6_all_zeros;
519 ixas.ixa_flags |= IXAF_SET_SOURCE;
520 }
521
522 /* Should we send using dce_pmtu? */
523 if (ipst->ips_ipv6_icmp_return_pmtu)
524 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
525
526 (void) ip_output_simple(mp, &ixas);
527 ixa_cleanup(&ixas);
528
529 }
530
531 /*
532 * Verify the ICMP messages for either for ICMP error or redirect packet.
533 * The caller should have fully pulled up the message. If it's a redirect
534 * packet, only basic checks on IP header will be done; otherwise, verify
535 * the packet by looking at the included ULP header.
536 *
537 * Called before icmp_inbound_error_fanout_v6 is called.
538 */
539 static boolean_t
icmp_inbound_verify_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)540 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
541 {
542 ill_t *ill = ira->ira_ill;
543 uint16_t hdr_length;
544 uint8_t *nexthdrp;
545 uint8_t nexthdr;
546 ip_stack_t *ipst = ill->ill_ipst;
547 conn_t *connp;
548 ip6_t *ip6h; /* Inner header */
549
550 ip6h = (ip6_t *)&icmp6[1];
551 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
552 goto truncated;
553
554 if (icmp6->icmp6_type == ND_REDIRECT) {
555 hdr_length = sizeof (nd_redirect_t);
556 } else {
557 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
558 goto discard_pkt;
559 hdr_length = IPV6_HDR_LEN;
560 }
561
562 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
563 goto truncated;
564
565 /*
566 * Stop here for ICMP_REDIRECT.
567 */
568 if (icmp6->icmp6_type == ND_REDIRECT)
569 return (B_TRUE);
570
571 /*
572 * ICMP errors only.
573 */
574 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
575 goto discard_pkt;
576 nexthdr = *nexthdrp;
577
578 /* Try to pass the ICMP message to clients who need it */
579 switch (nexthdr) {
580 case IPPROTO_UDP:
581 /*
582 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
583 * transport header.
584 */
585 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
586 mp->b_wptr)
587 goto truncated;
588 break;
589 case IPPROTO_TCP: {
590 tcpha_t *tcpha;
591
592 /*
593 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
594 * transport header.
595 */
596 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
597 mp->b_wptr)
598 goto truncated;
599
600 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
601 /*
602 * With IPMP we need to match across group, which we do
603 * since we have the upper ill from ira_ill.
604 */
605 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
606 ill->ill_phyint->phyint_ifindex, ipst);
607 if (connp == NULL)
608 goto discard_pkt;
609
610 if ((connp->conn_verifyicmp != NULL) &&
611 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
612 CONN_DEC_REF(connp);
613 goto discard_pkt;
614 }
615 CONN_DEC_REF(connp);
616 break;
617 }
618 case IPPROTO_SCTP:
619 /*
620 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
621 * transport header.
622 */
623 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
624 mp->b_wptr)
625 goto truncated;
626 break;
627 case IPPROTO_ESP:
628 case IPPROTO_AH:
629 break;
630 case IPPROTO_ENCAP:
631 case IPPROTO_IPV6: {
632 /* Look for self-encapsulated packets that caused an error */
633 ip6_t *in_ip6h;
634
635 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
636 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
637 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
638 goto truncated;
639 break;
640 }
641 default:
642 break;
643 }
644
645 return (B_TRUE);
646
647 discard_pkt:
648 /* Bogus ICMP error. */
649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
650 return (B_FALSE);
651
652 truncated:
653 /* We pulled up everthing already. Must be truncated */
654 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
655 return (B_FALSE);
656 }
657
658 /*
659 * Process received IPv6 ICMP Packet too big.
660 * The caller is responsible for validating the packet before passing it in
661 * and also to fanout the ICMP error to any matching transport conns. Assumes
662 * the message has been fully pulled up.
663 *
664 * Before getting here, the caller has called icmp_inbound_verify_v6()
665 * that should have verified with ULP to prevent undoing the changes we're
666 * going to make to DCE. For example, TCP might have verified that the packet
667 * which generated error is in the send window.
668 *
669 * In some cases modified this MTU in the ICMP header packet; the caller
670 * should pass to the matching ULP after this returns.
671 */
672 static void
icmp_inbound_too_big_v6(icmp6_t * icmp6,ip_recv_attr_t * ira)673 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
674 {
675 uint32_t mtu;
676 dce_t *dce;
677 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */
678 ip_stack_t *ipst = ill->ill_ipst;
679 int old_max_frag;
680 in6_addr_t final_dst;
681 ip6_t *ip6h; /* Inner IP header */
682
683 /* Caller has already pulled up everything. */
684 ip6h = (ip6_t *)&icmp6[1];
685 final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
686
687 mtu = ntohl(icmp6->icmp6_mtu);
688 if (mtu < IPV6_MIN_MTU) {
689 /*
690 * RFC 8021 suggests to ignore messages where mtu is
691 * less than the IPv6 minimum.
692 */
693 ip1dbg(("Received mtu less than IPv6 "
694 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
695 DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
696 return;
697 }
698
699 /*
700 * For link local destinations matching simply on address is not
701 * sufficient. Same link local addresses for different ILL's is
702 * possible.
703 */
704 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
705 dce = dce_lookup_and_add_v6(&final_dst,
706 ill->ill_phyint->phyint_ifindex, ipst);
707 } else {
708 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
709 }
710 if (dce == NULL) {
711 /* Couldn't add a unique one - ENOMEM */
712 if (ip_debug > 2) {
713 /* ip1dbg */
714 pr_addr_dbg("icmp_inbound_too_big_v6:"
715 "no dce for dst %s\n", AF_INET6,
716 &final_dst);
717 }
718 return;
719 }
720
721 mutex_enter(&dce->dce_lock);
722 if (dce->dce_flags & DCEF_PMTU)
723 old_max_frag = dce->dce_pmtu;
724 else if (IN6_IS_ADDR_MULTICAST(&final_dst))
725 old_max_frag = ill->ill_mc_mtu;
726 else
727 old_max_frag = ill->ill_mtu;
728
729 ip1dbg(("Received mtu from router: %d\n", mtu));
730 DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
731 dce->dce_pmtu = MIN(old_max_frag, mtu);
732 icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
733
734 /* We now have a PMTU for sure */
735 dce->dce_flags |= DCEF_PMTU;
736 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
737
738 mutex_exit(&dce->dce_lock);
739 /*
740 * After dropping the lock the new value is visible to everyone.
741 * Then we bump the generation number so any cached values reinspect
742 * the dce_t.
743 */
744 dce_increment_generation(dce);
745 dce_refrele(dce);
746 }
747
748 /*
749 * Fanout received ICMPv6 error packets to the transports.
750 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
751 *
752 * The caller must have called icmp_inbound_verify_v6.
753 */
754 void
icmp_inbound_error_fanout_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)755 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
756 {
757 uint16_t *up; /* Pointer to ports in ULP header */
758 uint32_t ports; /* reversed ports for fanout */
759 ip6_t rip6h; /* With reversed addresses */
760 ip6_t *ip6h; /* Inner IP header */
761 uint16_t hdr_length; /* Inner IP header length */
762 uint8_t *nexthdrp;
763 uint8_t nexthdr;
764 tcpha_t *tcpha;
765 conn_t *connp;
766 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */
767 ip_stack_t *ipst = ill->ill_ipst;
768 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
769
770 /* Caller has already pulled up everything. */
771 ip6h = (ip6_t *)&icmp6[1];
772 ASSERT(mp->b_cont == NULL);
773 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
774
775 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
776 goto drop_pkt;
777 nexthdr = *nexthdrp;
778 ira->ira_protocol = nexthdr;
779
780 /*
781 * We need a separate IP header with the source and destination
782 * addresses reversed to do fanout/classification because the ip6h in
783 * the ICMPv6 error is in the form we sent it out.
784 */
785 rip6h.ip6_src = ip6h->ip6_dst;
786 rip6h.ip6_dst = ip6h->ip6_src;
787 rip6h.ip6_nxt = nexthdr;
788
789 /* Try to pass the ICMP message to clients who need it */
790 switch (nexthdr) {
791 case IPPROTO_UDP: {
792 /* Attempt to find a client stream based on port. */
793 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
794
795 /* Note that we send error to all matches. */
796 ira->ira_flags |= IRAF_ICMP_ERROR;
797 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
798 ira->ira_flags &= ~IRAF_ICMP_ERROR;
799 return;
800 }
801 case IPPROTO_TCP: {
802 /*
803 * Attempt to find a client stream based on port.
804 * Note that we do a reverse lookup since the header is
805 * in the form we sent it out.
806 */
807 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
808 /*
809 * With IPMP we need to match across group, which we do
810 * since we have the upper ill from ira_ill.
811 */
812 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
813 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
814 if (connp == NULL) {
815 goto drop_pkt;
816 }
817
818 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
819 (ira->ira_flags & IRAF_IPSEC_SECURE)) {
820 mp = ipsec_check_inbound_policy(mp, connp,
821 NULL, ip6h, ira);
822 if (mp == NULL) {
823 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
824 /* Note that mp is NULL */
825 ip_drop_input("ipIfStatsInDiscards", mp, ill);
826 CONN_DEC_REF(connp);
827 return;
828 }
829 }
830
831 ira->ira_flags |= IRAF_ICMP_ERROR;
832 if (IPCL_IS_TCP(connp)) {
833 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
834 connp->conn_recvicmp, connp, ira, SQ_FILL,
835 SQTAG_TCP6_INPUT_ICMP_ERR);
836 } else {
837 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
838 ill_t *rill = ira->ira_rill;
839
840 ira->ira_ill = ira->ira_rill = NULL;
841 (connp->conn_recv)(connp, mp, NULL, ira);
842 CONN_DEC_REF(connp);
843 ira->ira_ill = ill;
844 ira->ira_rill = rill;
845 }
846 ira->ira_flags &= ~IRAF_ICMP_ERROR;
847 return;
848
849 }
850 case IPPROTO_SCTP:
851 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
852 /* Find a SCTP client stream for this packet. */
853 ((uint16_t *)&ports)[0] = up[1];
854 ((uint16_t *)&ports)[1] = up[0];
855
856 ira->ira_flags |= IRAF_ICMP_ERROR;
857 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
858 ira->ira_flags &= ~IRAF_ICMP_ERROR;
859 return;
860
861 case IPPROTO_ESP:
862 case IPPROTO_AH:
863 if (!ipsec_loaded(ipss)) {
864 ip_proto_not_sup(mp, ira);
865 return;
866 }
867
868 if (nexthdr == IPPROTO_ESP)
869 mp = ipsecesp_icmp_error(mp, ira);
870 else
871 mp = ipsecah_icmp_error(mp, ira);
872 if (mp == NULL)
873 return;
874
875 /* Just in case ipsec didn't preserve the NULL b_cont */
876 if (mp->b_cont != NULL) {
877 if (!pullupmsg(mp, -1))
878 goto drop_pkt;
879 }
880
881 /*
882 * If succesful, the mp has been modified to not include
883 * the ESP/AH header so we can fanout to the ULP's icmp
884 * error handler.
885 */
886 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
887 goto drop_pkt;
888
889 ip6h = (ip6_t *)mp->b_rptr;
890 /* Don't call hdr_length_v6() unless you have to. */
891 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
892 hdr_length = ip_hdr_length_v6(mp, ip6h);
893 else
894 hdr_length = IPV6_HDR_LEN;
895
896 /* Verify the modified message before any further processes. */
897 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
898 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
899 freemsg(mp);
900 return;
901 }
902
903 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
904 return;
905
906 case IPPROTO_IPV6: {
907 /* Look for self-encapsulated packets that caused an error */
908 ip6_t *in_ip6h;
909
910 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
911
912 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
913 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
914 /*
915 * Self-encapsulated case. As in the ipv4 case,
916 * we need to strip the 2nd IP header. Since mp
917 * is already pulled-up, we can simply bcopy
918 * the 3rd header + data over the 2nd header.
919 */
920 uint16_t unused_len;
921
922 /*
923 * Make sure we don't do recursion more than once.
924 */
925 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
926 &unused_len, &nexthdrp) ||
927 *nexthdrp == IPPROTO_IPV6) {
928 goto drop_pkt;
929 }
930
931 /*
932 * Copy the 3rd header + remaining data on top
933 * of the 2nd header.
934 */
935 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
936
937 /*
938 * Subtract length of the 2nd header.
939 */
940 mp->b_wptr -= hdr_length;
941
942 ip6h = (ip6_t *)mp->b_rptr;
943 /* Don't call hdr_length_v6() unless you have to. */
944 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
945 hdr_length = ip_hdr_length_v6(mp, ip6h);
946 else
947 hdr_length = IPV6_HDR_LEN;
948
949 /*
950 * Verify the modified message before any further
951 * processes.
952 */
953 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
954 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
955 freemsg(mp);
956 return;
957 }
958
959 /*
960 * Now recurse, and see what I _really_ should be
961 * doing here.
962 */
963 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
964 return;
965 }
966 }
967 /* FALLTHROUGH */
968 case IPPROTO_ENCAP:
969 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
970 &rip6h.ip6_dst, ipst)) != NULL) {
971 ira->ira_flags |= IRAF_ICMP_ERROR;
972 connp->conn_recvicmp(connp, mp, NULL, ira);
973 CONN_DEC_REF(connp);
974 ira->ira_flags &= ~IRAF_ICMP_ERROR;
975 return;
976 }
977 /*
978 * No IP tunnel is interested, fallthrough and see
979 * if a raw socket will want it.
980 */
981 /* FALLTHROUGH */
982 default:
983 ira->ira_flags |= IRAF_ICMP_ERROR;
984 ASSERT(ira->ira_protocol == nexthdr);
985 ip_fanout_proto_v6(mp, &rip6h, ira);
986 ira->ira_flags &= ~IRAF_ICMP_ERROR;
987 return;
988 }
989 /* NOTREACHED */
990 drop_pkt:
991 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
992 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
993 freemsg(mp);
994 }
995
996 /*
997 * Process received IPv6 ICMP Redirect messages.
998 * Assumes the caller has verified that the headers are in the pulled up mblk.
999 * Consumes mp.
1000 */
1001 /* ARGSUSED */
1002 static void
icmp_redirect_v6(mblk_t * mp,ip6_t * ip6h,nd_redirect_t * rd,ip_recv_attr_t * ira)1003 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1004 ip_recv_attr_t *ira)
1005 {
1006 ire_t *ire, *nire;
1007 ire_t *prev_ire = NULL;
1008 ire_t *redir_ire;
1009 in6_addr_t *src, *dst, *gateway;
1010 nd_opt_hdr_t *opt;
1011 nce_t *nce;
1012 int ncec_flags = 0;
1013 int err = 0;
1014 boolean_t redirect_to_router = B_FALSE;
1015 int len;
1016 int optlen;
1017 ill_t *ill = ira->ira_rill;
1018 ill_t *rill = ira->ira_rill;
1019 ip_stack_t *ipst = ill->ill_ipst;
1020
1021 /*
1022 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1023 * and make it be the IPMP upper so avoid being confused by a packet
1024 * addressed to a unicast address on a different ill.
1025 */
1026 if (IS_UNDER_IPMP(rill)) {
1027 rill = ipmp_ill_hold_ipmp_ill(rill);
1028 if (rill == NULL) {
1029 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1030 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1031 mp, ill);
1032 freemsg(mp);
1033 return;
1034 }
1035 ASSERT(rill != ira->ira_rill);
1036 }
1037
1038 len = mp->b_wptr - (uchar_t *)rd;
1039 src = &ip6h->ip6_src;
1040 dst = &rd->nd_rd_dst;
1041 gateway = &rd->nd_rd_target;
1042
1043 /* Verify if it is a valid redirect */
1044 if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1045 (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1046 (rd->nd_rd_code != 0) ||
1047 (len < sizeof (nd_redirect_t)) ||
1048 (IN6_IS_ADDR_V4MAPPED(dst)) ||
1049 (IN6_IS_ADDR_MULTICAST(dst))) {
1050 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1051 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1052 goto fail_redirect;
1053 }
1054
1055 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1056 IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1057 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1058 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1059 mp, ill);
1060 goto fail_redirect;
1061 }
1062
1063 optlen = len - sizeof (nd_redirect_t);
1064 if (optlen != 0) {
1065 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1066 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1067 ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1068 mp, ill);
1069 goto fail_redirect;
1070 }
1071 }
1072
1073 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1074 redirect_to_router = B_TRUE;
1075 ncec_flags |= NCE_F_ISROUTER;
1076 } else {
1077 gateway = dst; /* Add nce for dst */
1078 }
1079
1080
1081 /*
1082 * Verify that the IP source address of the redirect is
1083 * the same as the current first-hop router for the specified
1084 * ICMP destination address.
1085 * Also, Make sure we had a route for the dest in question and
1086 * that route was pointing to the old gateway (the source of the
1087 * redirect packet.)
1088 * We do longest match and then compare ire_gateway_addr_v6 below.
1089 */
1090 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1091 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1092
1093 /*
1094 * Check that
1095 * the redirect was not from ourselves
1096 * old gateway is still directly reachable
1097 */
1098 if (prev_ire == NULL ||
1099 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1100 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1101 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1102 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1103 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1104 goto fail_redirect;
1105 }
1106
1107 ASSERT(prev_ire->ire_ill != NULL);
1108 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1109 ncec_flags |= NCE_F_NONUD;
1110
1111 opt = (nd_opt_hdr_t *)&rd[1];
1112 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1113 if (opt != NULL) {
1114 err = nce_lookup_then_add_v6(rill,
1115 (uchar_t *)&opt[1], /* Link layer address */
1116 rill->ill_phys_addr_length,
1117 gateway, ncec_flags, ND_STALE, &nce);
1118 switch (err) {
1119 case 0:
1120 nce_refrele(nce);
1121 break;
1122 case EEXIST:
1123 /*
1124 * Check to see if link layer address has changed and
1125 * process the ncec_state accordingly.
1126 */
1127 nce_process(nce->nce_common,
1128 (uchar_t *)&opt[1], 0, B_FALSE);
1129 nce_refrele(nce);
1130 break;
1131 default:
1132 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1133 err));
1134 goto fail_redirect;
1135 }
1136 }
1137 if (redirect_to_router) {
1138 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1139
1140 /*
1141 * Create a Route Association. This will allow us to remember
1142 * a router told us to use the particular gateway.
1143 */
1144 ire = ire_create_v6(
1145 dst,
1146 &ipv6_all_ones, /* mask */
1147 gateway, /* gateway addr */
1148 IRE_HOST,
1149 prev_ire->ire_ill,
1150 ALL_ZONES,
1151 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1152 NULL,
1153 ipst);
1154 } else {
1155 ipif_t *ipif;
1156 in6_addr_t gw;
1157
1158 /*
1159 * Just create an on link entry, i.e. interface route.
1160 * The gateway field is our link-local on the ill.
1161 */
1162 mutex_enter(&rill->ill_lock);
1163 for (ipif = rill->ill_ipif; ipif != NULL;
1164 ipif = ipif->ipif_next) {
1165 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1166 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1167 break;
1168 }
1169 if (ipif == NULL) {
1170 /* We have no link-local address! */
1171 mutex_exit(&rill->ill_lock);
1172 goto fail_redirect;
1173 }
1174 gw = ipif->ipif_v6lcl_addr;
1175 mutex_exit(&rill->ill_lock);
1176
1177 ire = ire_create_v6(
1178 dst, /* gateway == dst */
1179 &ipv6_all_ones, /* mask */
1180 &gw, /* gateway addr */
1181 rill->ill_net_type, /* IF_[NO]RESOLVER */
1182 prev_ire->ire_ill,
1183 ALL_ZONES,
1184 (RTF_DYNAMIC | RTF_HOST),
1185 NULL,
1186 ipst);
1187 }
1188
1189 if (ire == NULL)
1190 goto fail_redirect;
1191
1192 nire = ire_add(ire);
1193 /* Check if it was a duplicate entry */
1194 if (nire != NULL && nire != ire) {
1195 ASSERT(nire->ire_identical_ref > 1);
1196 ire_delete(nire);
1197 ire_refrele(nire);
1198 nire = NULL;
1199 }
1200 ire = nire;
1201 if (ire != NULL) {
1202 ire_refrele(ire); /* Held in ire_add */
1203
1204 /* tell routing sockets that we received a redirect */
1205 ip_rts_change_v6(RTM_REDIRECT,
1206 &rd->nd_rd_dst,
1207 &rd->nd_rd_target,
1208 &ipv6_all_ones, 0, src,
1209 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1210 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1211
1212 /*
1213 * Delete any existing IRE_HOST type ires for this destination.
1214 * This together with the added IRE has the effect of
1215 * modifying an existing redirect.
1216 */
1217 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1218 prev_ire->ire_ill, ALL_ZONES, NULL,
1219 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1220 NULL);
1221
1222 if (redir_ire != NULL) {
1223 if (redir_ire->ire_flags & RTF_DYNAMIC)
1224 ire_delete(redir_ire);
1225 ire_refrele(redir_ire);
1226 }
1227 }
1228
1229 ire_refrele(prev_ire);
1230 prev_ire = NULL;
1231
1232 fail_redirect:
1233 if (prev_ire != NULL)
1234 ire_refrele(prev_ire);
1235 freemsg(mp);
1236 if (rill != ira->ira_rill)
1237 ill_refrele(rill);
1238 }
1239
1240 /*
1241 * Build and ship an IPv6 ICMP message using the packet data in mp,
1242 * and the ICMP header pointed to by "stuff". (May be called as
1243 * writer.)
1244 * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1245 * verify that an icmp error packet can be sent.
1246 *
1247 * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1248 * source address (see above function).
1249 */
1250 static void
icmp_pkt_v6(mblk_t * mp,void * stuff,size_t len,const in6_addr_t * v6src_ptr,ip_recv_attr_t * ira)1251 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1252 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1253 {
1254 ip6_t *ip6h;
1255 in6_addr_t v6dst;
1256 size_t len_needed;
1257 size_t msg_len;
1258 mblk_t *mp1;
1259 icmp6_t *icmp6;
1260 in6_addr_t v6src;
1261 ill_t *ill = ira->ira_ill;
1262 ip_stack_t *ipst = ill->ill_ipst;
1263 ip_xmit_attr_t ixas;
1264
1265 ip6h = (ip6_t *)mp->b_rptr;
1266
1267 bzero(&ixas, sizeof (ixas));
1268 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1269 ixas.ixa_zoneid = ira->ira_zoneid;
1270 ixas.ixa_ifindex = 0;
1271 ixas.ixa_ipst = ipst;
1272 ixas.ixa_cred = kcred;
1273 ixas.ixa_cpid = NOPID;
1274 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
1275 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1276
1277 /*
1278 * If the source of the original packet was link-local, then
1279 * make sure we send on the same ill (group) as we received it on.
1280 */
1281 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1282 ixas.ixa_flags |= IXAF_SCOPEID_SET;
1283 if (IS_UNDER_IPMP(ill))
1284 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1285 else
1286 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1287 }
1288
1289 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1290 /*
1291 * Apply IPsec based on how IPsec was applied to
1292 * the packet that had the error.
1293 *
1294 * If it was an outbound packet that caused the ICMP
1295 * error, then the caller will have setup the IRA
1296 * appropriately.
1297 */
1298 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1299 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1300 /* Note: mp already consumed and ip_drop_packet done */
1301 return;
1302 }
1303 } else {
1304 /*
1305 * This is in clear. The icmp message we are building
1306 * here should go out in clear, independent of our policy.
1307 */
1308 ixas.ixa_flags |= IXAF_NO_IPSEC;
1309 }
1310
1311 /*
1312 * If the caller specified the source we use that.
1313 * Otherwise, if the packet was for one of our unicast addresses, make
1314 * sure we respond with that as the source. Otherwise
1315 * have ip_output_simple pick the source address.
1316 */
1317 if (v6src_ptr != NULL) {
1318 v6src = *v6src_ptr;
1319 } else {
1320 ire_t *ire;
1321 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1322
1323 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1324 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1325 match_flags |= MATCH_IRE_ILL;
1326
1327 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1328 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1329 match_flags, 0, ipst, NULL);
1330 if (ire != NULL) {
1331 v6src = ip6h->ip6_dst;
1332 ire_refrele(ire);
1333 } else {
1334 v6src = ipv6_all_zeros;
1335 ixas.ixa_flags |= IXAF_SET_SOURCE;
1336 }
1337 }
1338 v6dst = ip6h->ip6_src;
1339 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1340 msg_len = msgdsize(mp);
1341 if (msg_len > len_needed) {
1342 if (!adjmsg(mp, len_needed - msg_len)) {
1343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1344 freemsg(mp);
1345 return;
1346 }
1347 msg_len = len_needed;
1348 }
1349 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1350 if (mp1 == NULL) {
1351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1352 freemsg(mp);
1353 return;
1354 }
1355 mp1->b_cont = mp;
1356 mp = mp1;
1357
1358 /*
1359 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1360 * node generates be accepted in peace by all on-host destinations.
1361 * If we do NOT assume that all on-host destinations trust
1362 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1363 * (Look for IXAF_TRUSTED_ICMP).
1364 */
1365 ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1366
1367 ip6h = (ip6_t *)mp->b_rptr;
1368 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1369
1370 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1371 ip6h->ip6_nxt = IPPROTO_ICMPV6;
1372 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1373 ip6h->ip6_dst = v6dst;
1374 ip6h->ip6_src = v6src;
1375 msg_len += IPV6_HDR_LEN + len;
1376 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1377 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1378 msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1379 }
1380 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1381 icmp6 = (icmp6_t *)&ip6h[1];
1382 bcopy(stuff, (char *)icmp6, len);
1383 /*
1384 * Prepare for checksum by putting icmp length in the icmp
1385 * checksum field. The checksum is calculated in ip_output_wire_v6.
1386 */
1387 icmp6->icmp6_cksum = ip6h->ip6_plen;
1388 if (icmp6->icmp6_type == ND_REDIRECT) {
1389 ip6h->ip6_hops = IPV6_MAX_HOPS;
1390 }
1391
1392 (void) ip_output_simple(mp, &ixas);
1393 ixa_cleanup(&ixas);
1394 }
1395
1396 /*
1397 * Update the output mib when ICMPv6 packets are sent.
1398 */
1399 void
icmp_update_out_mib_v6(ill_t * ill,icmp6_t * icmp6)1400 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1401 {
1402 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1403
1404 switch (icmp6->icmp6_type) {
1405 case ICMP6_DST_UNREACH:
1406 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1407 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1408 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1409 break;
1410
1411 case ICMP6_TIME_EXCEEDED:
1412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1413 break;
1414
1415 case ICMP6_PARAM_PROB:
1416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1417 break;
1418
1419 case ICMP6_PACKET_TOO_BIG:
1420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1421 break;
1422
1423 case ICMP6_ECHO_REQUEST:
1424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1425 break;
1426
1427 case ICMP6_ECHO_REPLY:
1428 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1429 break;
1430
1431 case ND_ROUTER_SOLICIT:
1432 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1433 break;
1434
1435 case ND_ROUTER_ADVERT:
1436 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1437 break;
1438
1439 case ND_NEIGHBOR_SOLICIT:
1440 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1441 break;
1442
1443 case ND_NEIGHBOR_ADVERT:
1444 BUMP_MIB(ill->ill_icmp6_mib,
1445 ipv6IfIcmpOutNeighborAdvertisements);
1446 break;
1447
1448 case ND_REDIRECT:
1449 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1450 break;
1451
1452 case MLD_LISTENER_QUERY:
1453 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1454 break;
1455
1456 case MLD_LISTENER_REPORT:
1457 case MLD_V2_LISTENER_REPORT:
1458 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1459 break;
1460
1461 case MLD_LISTENER_REDUCTION:
1462 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1463 break;
1464 }
1465 }
1466
1467 /*
1468 * Check if it is ok to send an ICMPv6 error packet in
1469 * response to the IP packet in mp.
1470 * Free the message and return null if no
1471 * ICMP error packet should be sent.
1472 */
1473 static mblk_t *
icmp_pkt_err_ok_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1474 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1475 {
1476 ill_t *ill = ira->ira_ill;
1477 ip_stack_t *ipst = ill->ill_ipst;
1478 boolean_t llbcast;
1479 ip6_t *ip6h;
1480
1481 if (!mp)
1482 return (NULL);
1483
1484 /* We view multicast and broadcast as the same.. */
1485 llbcast = (ira->ira_flags &
1486 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1487 ip6h = (ip6_t *)mp->b_rptr;
1488
1489 /* Check if source address uniquely identifies the host */
1490
1491 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1492 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1493 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1494 freemsg(mp);
1495 return (NULL);
1496 }
1497
1498 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1499 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1500 icmp6_t *icmp6;
1501
1502 if (mp->b_wptr - mp->b_rptr < len_needed) {
1503 if (!pullupmsg(mp, len_needed)) {
1504 BUMP_MIB(ill->ill_icmp6_mib,
1505 ipv6IfIcmpInErrors);
1506 freemsg(mp);
1507 return (NULL);
1508 }
1509 ip6h = (ip6_t *)mp->b_rptr;
1510 }
1511 icmp6 = (icmp6_t *)&ip6h[1];
1512 /* Explicitly do not generate errors in response to redirects */
1513 if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1514 icmp6->icmp6_type == ND_REDIRECT) {
1515 freemsg(mp);
1516 return (NULL);
1517 }
1518 }
1519 /*
1520 * Check that the destination is not multicast and that the packet
1521 * was not sent on link layer broadcast or multicast. (Exception
1522 * is Packet too big message as per the draft - when mcast_ok is set.)
1523 */
1524 if (!mcast_ok &&
1525 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1526 freemsg(mp);
1527 return (NULL);
1528 }
1529 /*
1530 * If this is a labeled system, then check to see if we're allowed to
1531 * send a response to this particular sender. If not, then just drop.
1532 */
1533 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1534 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1535 freemsg(mp);
1536 return (NULL);
1537 }
1538
1539 if (icmp_err_rate_limit(ipst)) {
1540 /*
1541 * Only send ICMP error packets every so often.
1542 * This should be done on a per port/source basis,
1543 * but for now this will suffice.
1544 */
1545 freemsg(mp);
1546 return (NULL);
1547 }
1548 return (mp);
1549 }
1550
1551 /*
1552 * Called when a packet was sent out the same link that it arrived on.
1553 * Check if it is ok to send a redirect and then send it.
1554 */
1555 void
ip_send_potential_redirect_v6(mblk_t * mp,ip6_t * ip6h,ire_t * ire,ip_recv_attr_t * ira)1556 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1557 ip_recv_attr_t *ira)
1558 {
1559 ill_t *ill = ira->ira_ill;
1560 ip_stack_t *ipst = ill->ill_ipst;
1561 in6_addr_t *v6targ;
1562 ire_t *src_ire_v6 = NULL;
1563 mblk_t *mp1;
1564 ire_t *nhop_ire = NULL;
1565
1566 /*
1567 * Don't send a redirect when forwarding a source
1568 * routed packet.
1569 */
1570 if (ip_source_routed_v6(ip6h, mp, ipst))
1571 return;
1572
1573 if (ire->ire_type & IRE_ONLINK) {
1574 /* Target is directly connected */
1575 v6targ = &ip6h->ip6_dst;
1576 } else {
1577 /* Determine the most specific IRE used to send the packets */
1578 nhop_ire = ire_nexthop(ire);
1579 if (nhop_ire == NULL)
1580 return;
1581
1582 /*
1583 * We won't send redirects to a router
1584 * that doesn't have a link local
1585 * address, but will forward.
1586 */
1587 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1588 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1589 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1590 ire_refrele(nhop_ire);
1591 return;
1592 }
1593 v6targ = &nhop_ire->ire_addr_v6;
1594 }
1595 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1596 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1597 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1598
1599 if (src_ire_v6 == NULL) {
1600 if (nhop_ire != NULL)
1601 ire_refrele(nhop_ire);
1602 return;
1603 }
1604
1605 /*
1606 * The source is directly connected.
1607 */
1608 mp1 = copymsg(mp);
1609 if (mp1 != NULL)
1610 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1611
1612 if (nhop_ire != NULL)
1613 ire_refrele(nhop_ire);
1614 ire_refrele(src_ire_v6);
1615 }
1616
1617 /*
1618 * Generate an ICMPv6 redirect message.
1619 * Include target link layer address option if it exits.
1620 * Always include redirect header.
1621 */
1622 static void
icmp_send_redirect_v6(mblk_t * mp,in6_addr_t * targetp,in6_addr_t * dest,ip_recv_attr_t * ira)1623 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1624 ip_recv_attr_t *ira)
1625 {
1626 nd_redirect_t *rd;
1627 nd_opt_rd_hdr_t *rdh;
1628 uchar_t *buf;
1629 ncec_t *ncec = NULL;
1630 nd_opt_hdr_t *opt;
1631 int len;
1632 int ll_opt_len = 0;
1633 int max_redir_hdr_data_len;
1634 int pkt_len;
1635 in6_addr_t *srcp;
1636 ill_t *ill;
1637 boolean_t need_refrele;
1638 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1639
1640 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1641 if (mp == NULL)
1642 return;
1643
1644 if (IS_UNDER_IPMP(ira->ira_ill)) {
1645 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1646 if (ill == NULL) {
1647 ill = ira->ira_ill;
1648 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1649 ip_drop_output("no IPMP ill for sending redirect",
1650 mp, ill);
1651 freemsg(mp);
1652 return;
1653 }
1654 need_refrele = B_TRUE;
1655 } else {
1656 ill = ira->ira_ill;
1657 need_refrele = B_FALSE;
1658 }
1659
1660 ncec = ncec_lookup_illgrp_v6(ill, targetp);
1661 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1662 ncec->ncec_lladdr != NULL) {
1663 ll_opt_len = (sizeof (nd_opt_hdr_t) +
1664 ill->ill_phys_addr_length + 7)/8 * 8;
1665 }
1666 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1667 ASSERT(len % 4 == 0);
1668 buf = kmem_alloc(len, KM_NOSLEEP);
1669 if (buf == NULL) {
1670 if (ncec != NULL)
1671 ncec_refrele(ncec);
1672 if (need_refrele)
1673 ill_refrele(ill);
1674 freemsg(mp);
1675 return;
1676 }
1677
1678 rd = (nd_redirect_t *)buf;
1679 rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1680 rd->nd_rd_code = 0;
1681 rd->nd_rd_reserved = 0;
1682 rd->nd_rd_target = *targetp;
1683 rd->nd_rd_dst = *dest;
1684
1685 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1686 if (ncec != NULL && ll_opt_len != 0) {
1687 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1688 opt->nd_opt_len = ll_opt_len/8;
1689 bcopy((char *)ncec->ncec_lladdr, &opt[1],
1690 ill->ill_phys_addr_length);
1691 }
1692 if (ncec != NULL)
1693 ncec_refrele(ncec);
1694 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1695 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1696 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1697 max_redir_hdr_data_len =
1698 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1699 pkt_len = msgdsize(mp);
1700 /* Make sure mp is 8 byte aligned */
1701 if (pkt_len > max_redir_hdr_data_len) {
1702 rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1703 sizeof (nd_opt_rd_hdr_t))/8;
1704 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1705 } else {
1706 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1707 (void) adjmsg(mp, -(pkt_len % 8));
1708 }
1709 rdh->nd_opt_rh_reserved1 = 0;
1710 rdh->nd_opt_rh_reserved2 = 0;
1711 /* ipif_v6lcl_addr contains the link-local source address */
1712 srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1713
1714 /* Redirects sent by router, and router is global zone */
1715 ASSERT(ira->ira_zoneid == ALL_ZONES);
1716 ira->ira_zoneid = GLOBAL_ZONEID;
1717 icmp_pkt_v6(mp, buf, len, srcp, ira);
1718 kmem_free(buf, len);
1719 if (need_refrele)
1720 ill_refrele(ill);
1721 }
1722
1723
1724 /* Generate an ICMP time exceeded message. (May be called as writer.) */
1725 void
icmp_time_exceeded_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1726 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1727 ip_recv_attr_t *ira)
1728 {
1729 icmp6_t icmp6;
1730
1731 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1732 if (mp == NULL)
1733 return;
1734
1735 bzero(&icmp6, sizeof (icmp6_t));
1736 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1737 icmp6.icmp6_code = code;
1738 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1739 }
1740
1741 /*
1742 * Generate an ICMP unreachable message.
1743 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1744 * constructed by the caller.
1745 */
1746 void
icmp_unreachable_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1747 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1748 ip_recv_attr_t *ira)
1749 {
1750 icmp6_t icmp6;
1751
1752 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1753 if (mp == NULL)
1754 return;
1755
1756 bzero(&icmp6, sizeof (icmp6_t));
1757 icmp6.icmp6_type = ICMP6_DST_UNREACH;
1758 icmp6.icmp6_code = code;
1759 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1760 }
1761
1762 /*
1763 * Generate an ICMP pkt too big message.
1764 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1765 * constructed by the caller.
1766 */
1767 void
icmp_pkt2big_v6(mblk_t * mp,uint32_t mtu,boolean_t mcast_ok,ip_recv_attr_t * ira)1768 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1769 ip_recv_attr_t *ira)
1770 {
1771 icmp6_t icmp6;
1772
1773 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1774 if (mp == NULL)
1775 return;
1776
1777 bzero(&icmp6, sizeof (icmp6_t));
1778 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1779 icmp6.icmp6_code = 0;
1780 icmp6.icmp6_mtu = htonl(mtu);
1781
1782 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1783 }
1784
1785 /*
1786 * Generate an ICMP parameter problem message. (May be called as writer.)
1787 * 'offset' is the offset from the beginning of the packet in error.
1788 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1789 * constructed by the caller.
1790 */
1791 static void
icmp_param_problem_v6(mblk_t * mp,uint8_t code,uint32_t offset,boolean_t mcast_ok,ip_recv_attr_t * ira)1792 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1793 boolean_t mcast_ok, ip_recv_attr_t *ira)
1794 {
1795 icmp6_t icmp6;
1796
1797 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1798 if (mp == NULL)
1799 return;
1800
1801 bzero((char *)&icmp6, sizeof (icmp6_t));
1802 icmp6.icmp6_type = ICMP6_PARAM_PROB;
1803 icmp6.icmp6_code = code;
1804 icmp6.icmp6_pptr = htonl(offset);
1805 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1806 }
1807
1808 void
icmp_param_problem_nexthdr_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1809 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1810 ip_recv_attr_t *ira)
1811 {
1812 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1813 uint16_t hdr_length;
1814 uint8_t *nexthdrp;
1815 uint32_t offset;
1816 ill_t *ill = ira->ira_ill;
1817
1818 /* Determine the offset of the bad nexthdr value */
1819 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1820 /* Malformed packet */
1821 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1822 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1823 freemsg(mp);
1824 return;
1825 }
1826
1827 offset = nexthdrp - mp->b_rptr;
1828 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1829 mcast_ok, ira);
1830 }
1831
1832 /*
1833 * Verify whether or not the IP address is a valid local address.
1834 * Could be a unicast, including one for a down interface.
1835 * If allow_mcbc then a multicast or broadcast address is also
1836 * acceptable.
1837 *
1838 * In the case of a multicast address, however, the
1839 * upper protocol is expected to reset the src address
1840 * to zero when we return IPVL_MCAST so that
1841 * no packets are emitted with multicast address as
1842 * source address.
1843 * The addresses valid for bind are:
1844 * (1) - in6addr_any
1845 * (2) - IP address of an UP interface
1846 * (3) - IP address of a DOWN interface
1847 * (4) - a multicast address. In this case
1848 * the conn will only receive packets destined to
1849 * the specified multicast address. Note: the
1850 * application still has to issue an
1851 * IPV6_JOIN_GROUP socket option.
1852 *
1853 * In all the above cases, the bound address must be valid in the current zone.
1854 * When the address is loopback or multicast, there might be many matching IREs
1855 * so bind has to look up based on the zone.
1856 */
1857 ip_laddr_t
ip_laddr_verify_v6(const in6_addr_t * v6src,zoneid_t zoneid,ip_stack_t * ipst,boolean_t allow_mcbc,uint_t scopeid)1858 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1859 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1860 {
1861 ire_t *src_ire;
1862 uint_t match_flags;
1863 ill_t *ill = NULL;
1864
1865 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1866 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1867
1868 match_flags = MATCH_IRE_ZONEONLY;
1869 if (scopeid != 0) {
1870 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1871 if (ill == NULL)
1872 return (IPVL_BAD);
1873 match_flags |= MATCH_IRE_ILL;
1874 }
1875
1876 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1877 ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1878 if (ill != NULL)
1879 ill_refrele(ill);
1880
1881 /*
1882 * If an address other than in6addr_any is requested,
1883 * we verify that it is a valid address for bind
1884 * Note: Following code is in if-else-if form for
1885 * readability compared to a condition check.
1886 */
1887 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1888 /*
1889 * (2) Bind to address of local UP interface
1890 */
1891 ire_refrele(src_ire);
1892 return (IPVL_UNICAST_UP);
1893 } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1894 /* (4) bind to multicast address. */
1895 if (src_ire != NULL)
1896 ire_refrele(src_ire);
1897
1898 /*
1899 * Note: caller should take IPV6_MULTICAST_IF
1900 * into account when selecting a real source address.
1901 */
1902 if (allow_mcbc)
1903 return (IPVL_MCAST);
1904 else
1905 return (IPVL_BAD);
1906 } else {
1907 ipif_t *ipif;
1908
1909 /*
1910 * (3) Bind to address of local DOWN interface?
1911 * (ipif_lookup_addr() looks up all interfaces
1912 * but we do not get here for UP interfaces
1913 * - case (2) above)
1914 */
1915 if (src_ire != NULL)
1916 ire_refrele(src_ire);
1917
1918 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1919 if (ipif == NULL)
1920 return (IPVL_BAD);
1921
1922 /* Not a useful source? */
1923 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1924 ipif_refrele(ipif);
1925 return (IPVL_BAD);
1926 }
1927 ipif_refrele(ipif);
1928 return (IPVL_UNICAST_DOWN);
1929 }
1930 }
1931
1932 /*
1933 * Verify that both the source and destination addresses are valid. If
1934 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1935 * i.e. have no route to it. Protocols like TCP want to verify destination
1936 * reachability, while tunnels do not.
1937 *
1938 * Determine the route, the interface, and (optionally) the source address
1939 * to use to reach a given destination.
1940 * Note that we allow connect to broadcast and multicast addresses when
1941 * IPDF_ALLOW_MCBC is set.
1942 * first_hop and dst_addr are normally the same, but if source routing
1943 * they will differ; in that case the first_hop is what we'll use for the
1944 * routing lookup but the dce and label checks will be done on dst_addr,
1945 *
1946 * If uinfo is set, then we fill in the best available information
1947 * we have for the destination. This is based on (in priority order) any
1948 * metrics and path MTU stored in a dce_t, route metrics, and finally the
1949 * ill_mtu/ill_mc_mtu.
1950 *
1951 * Tsol note: If we have a source route then dst_addr != firsthop. But we
1952 * always do the label check on dst_addr.
1953 *
1954 * Assumes that the caller has set ixa_scopeid for link-local communication.
1955 */
1956 int
ip_set_destination_v6(in6_addr_t * src_addrp,const in6_addr_t * dst_addr,const in6_addr_t * firsthop,ip_xmit_attr_t * ixa,iulp_t * uinfo,uint32_t flags,uint_t mac_mode)1957 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1958 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1959 uint32_t flags, uint_t mac_mode)
1960 {
1961 ire_t *ire;
1962 int error = 0;
1963 in6_addr_t setsrc; /* RTF_SETSRC */
1964 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */
1965 ip_stack_t *ipst = ixa->ixa_ipst;
1966 dce_t *dce;
1967 uint_t pmtu;
1968 uint_t ifindex;
1969 uint_t generation;
1970 nce_t *nce;
1971 ill_t *ill = NULL;
1972 boolean_t multirt = B_FALSE;
1973
1974 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1975
1976 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1977
1978 /*
1979 * We never send to zero; the ULPs map it to the loopback address.
1980 * We can't allow it since we use zero to mean unitialized in some
1981 * places.
1982 */
1983 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1984
1985 if (is_system_labeled()) {
1986 ts_label_t *tsl = NULL;
1987
1988 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1989 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1990 if (error != 0)
1991 return (error);
1992 if (tsl != NULL) {
1993 /* Update the label */
1994 ip_xmit_attr_replace_tsl(ixa, tsl);
1995 }
1996 }
1997
1998 setsrc = ipv6_all_zeros;
1999 /*
2000 * Select a route; For IPMP interfaces, we would only select
2001 * a "hidden" route (i.e., going through a specific under_ill)
2002 * if ixa_ifindex has been specified.
2003 */
2004 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2005 &setsrc, &error, &multirt);
2006 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
2007 if (error != 0)
2008 goto bad_addr;
2009
2010 /*
2011 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2012 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2013 * Otherwise the destination needn't be reachable.
2014 *
2015 * If we match on a reject or black hole, then we've got a
2016 * local failure. May as well fail out the connect() attempt,
2017 * since it's never going to succeed.
2018 */
2019 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2020 /*
2021 * If we're verifying destination reachability, we always want
2022 * to complain here.
2023 *
2024 * If we're not verifying destination reachability but the
2025 * destination has a route, we still want to fail on the
2026 * temporary address and broadcast address tests.
2027 *
2028 * In both cases do we let the code continue so some reasonable
2029 * information is returned to the caller. That enables the
2030 * caller to use (and even cache) the IRE. conn_ip_ouput will
2031 * use the generation mismatch path to check for the unreachable
2032 * case thereby avoiding any specific check in the main path.
2033 */
2034 ASSERT(generation == IRE_GENERATION_VERIFY);
2035 if (flags & IPDF_VERIFY_DST) {
2036 /*
2037 * Set errno but continue to set up ixa_ire to be
2038 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2039 * That allows callers to use ip_output to get an
2040 * ICMP error back.
2041 */
2042 if (!(ire->ire_type & IRE_HOST))
2043 error = ENETUNREACH;
2044 else
2045 error = EHOSTUNREACH;
2046 }
2047 }
2048
2049 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2050 !(flags & IPDF_ALLOW_MCBC)) {
2051 ire_refrele(ire);
2052 ire = ire_reject(ipst, B_FALSE);
2053 generation = IRE_GENERATION_VERIFY;
2054 error = ENETUNREACH;
2055 }
2056
2057 /* Cache things */
2058 if (ixa->ixa_ire != NULL)
2059 ire_refrele_notr(ixa->ixa_ire);
2060 #ifdef DEBUG
2061 ire_refhold_notr(ire);
2062 ire_refrele(ire);
2063 #endif
2064 ixa->ixa_ire = ire;
2065 ixa->ixa_ire_generation = generation;
2066
2067 /*
2068 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2069 * since some callers will send a packet to conn_ip_output() even if
2070 * there's an error.
2071 */
2072 ifindex = 0;
2073 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2074 /* If we are creating a DCE we'd better have an ifindex */
2075 if (ill != NULL)
2076 ifindex = ill->ill_phyint->phyint_ifindex;
2077 else
2078 flags &= ~IPDF_UNIQUE_DCE;
2079 }
2080
2081 if (flags & IPDF_UNIQUE_DCE) {
2082 /* Fallback to the default dce if allocation fails */
2083 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2084 if (dce != NULL) {
2085 generation = dce->dce_generation;
2086 } else {
2087 dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2088 &generation);
2089 }
2090 } else {
2091 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2092 }
2093 ASSERT(dce != NULL);
2094 if (ixa->ixa_dce != NULL)
2095 dce_refrele_notr(ixa->ixa_dce);
2096 #ifdef DEBUG
2097 dce_refhold_notr(dce);
2098 dce_refrele(dce);
2099 #endif
2100 ixa->ixa_dce = dce;
2101 ixa->ixa_dce_generation = generation;
2102
2103
2104 /*
2105 * For multicast with multirt we have a flag passed back from
2106 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2107 * possible multicast address.
2108 * We also need a flag for multicast since we can't check
2109 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2110 */
2111 if (multirt) {
2112 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2113 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2114 } else {
2115 ixa->ixa_postfragfn = ire->ire_postfragfn;
2116 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2117 }
2118 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2119 /* Get an nce to cache. */
2120 nce = ire_to_nce(ire, 0, firsthop);
2121 if (nce == NULL) {
2122 /* Allocation failure? */
2123 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2124 } else {
2125 if (ixa->ixa_nce != NULL)
2126 nce_refrele(ixa->ixa_nce);
2127 ixa->ixa_nce = nce;
2128 }
2129 }
2130
2131 /*
2132 * If the source address is a loopback address, the
2133 * destination had best be local or multicast.
2134 * If we are sending to an IRE_LOCAL using a loopback source then
2135 * it had better be the same zoneid.
2136 */
2137 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2138 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2139 ire = NULL; /* Stored in ixa_ire */
2140 error = EADDRNOTAVAIL;
2141 goto bad_addr;
2142 }
2143 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2144 ire = NULL; /* Stored in ixa_ire */
2145 error = EADDRNOTAVAIL;
2146 goto bad_addr;
2147 }
2148 }
2149
2150 /*
2151 * Does the caller want us to pick a source address?
2152 */
2153 if (flags & IPDF_SELECT_SRC) {
2154 in6_addr_t src_addr;
2155
2156 /*
2157 * We use use ire_nexthop_ill to avoid the under ipmp
2158 * interface for source address selection. Note that for ipmp
2159 * probe packets, ixa_ifindex would have been specified, and
2160 * the ip_select_route() invocation would have picked an ire
2161 * will ire_ill pointing at an under interface.
2162 */
2163 ill = ire_nexthop_ill(ire);
2164
2165 /* If unreachable we have no ill but need some source */
2166 if (ill == NULL) {
2167 src_addr = ipv6_loopback;
2168 /* Make sure we look for a better source address */
2169 generation = SRC_GENERATION_VERIFY;
2170 } else {
2171 error = ip_select_source_v6(ill, &setsrc, dst_addr,
2172 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2173 &src_addr, &generation, NULL);
2174 if (error != 0) {
2175 ire = NULL; /* Stored in ixa_ire */
2176 goto bad_addr;
2177 }
2178 }
2179
2180 /*
2181 * We allow the source address to to down.
2182 * However, we check that we don't use the loopback address
2183 * as a source when sending out on the wire.
2184 */
2185 if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2186 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2187 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2188 ire = NULL; /* Stored in ixa_ire */
2189 error = EADDRNOTAVAIL;
2190 goto bad_addr;
2191 }
2192
2193 *src_addrp = src_addr;
2194 ixa->ixa_src_generation = generation;
2195 }
2196
2197 /*
2198 * Make sure we don't leave an unreachable ixa_nce in place
2199 * since ip_select_route is used when we unplumb i.e., remove
2200 * references on ixa_ire, ixa_nce, and ixa_dce.
2201 */
2202 nce = ixa->ixa_nce;
2203 if (nce != NULL && nce->nce_is_condemned) {
2204 nce_refrele(nce);
2205 ixa->ixa_nce = NULL;
2206 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2207 }
2208
2209 /*
2210 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2211 * multicast. But pmtu discovery is only enabled for connected
2212 * sockets in general.
2213 */
2214
2215 /*
2216 * Set initial value for fragmentation limit. Either conn_ip_output
2217 * or ULP might updates it when there are routing changes.
2218 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2219 */
2220 pmtu = ip_get_pmtu(ixa);
2221 ixa->ixa_fragsize = pmtu;
2222 /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2223 if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2224 ixa->ixa_pmtu = pmtu;
2225
2226 /*
2227 * Extract information useful for some transports.
2228 * First we look for DCE metrics. Then we take what we have in
2229 * the metrics in the route, where the offlink is used if we have
2230 * one.
2231 */
2232 if (uinfo != NULL) {
2233 bzero(uinfo, sizeof (*uinfo));
2234
2235 if (dce->dce_flags & DCEF_UINFO)
2236 *uinfo = dce->dce_uinfo;
2237
2238 rts_merge_metrics(uinfo, &ire->ire_metrics);
2239
2240 /* Allow ire_metrics to decrease the path MTU from above */
2241 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2242 uinfo->iulp_mtu = pmtu;
2243
2244 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2245 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2246 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2247 }
2248
2249 if (ill != NULL)
2250 ill_refrele(ill);
2251
2252 return (error);
2253
2254 bad_addr:
2255 if (ire != NULL)
2256 ire_refrele(ire);
2257
2258 if (ill != NULL)
2259 ill_refrele(ill);
2260
2261 /*
2262 * Make sure we don't leave an unreachable ixa_nce in place
2263 * since ip_select_route is used when we unplumb i.e., remove
2264 * references on ixa_ire, ixa_nce, and ixa_dce.
2265 */
2266 nce = ixa->ixa_nce;
2267 if (nce != NULL && nce->nce_is_condemned) {
2268 nce_refrele(nce);
2269 ixa->ixa_nce = NULL;
2270 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2271 }
2272
2273 return (error);
2274 }
2275
2276 /*
2277 * Handle protocols with which IP is less intimate. There
2278 * can be more than one stream bound to a particular
2279 * protocol. When this is the case, normally each one gets a copy
2280 * of any incoming packets.
2281 *
2282 * Zones notes:
2283 * Packets will be distributed to conns in all zones. This is really only
2284 * useful for ICMPv6 as only applications in the global zone can create raw
2285 * sockets for other protocols.
2286 */
2287 void
ip_fanout_proto_v6(mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)2288 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2289 {
2290 mblk_t *mp1;
2291 in6_addr_t laddr = ip6h->ip6_dst;
2292 conn_t *connp, *first_connp, *next_connp;
2293 connf_t *connfp;
2294 ill_t *ill = ira->ira_ill;
2295 ip_stack_t *ipst = ill->ill_ipst;
2296
2297 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2298 mutex_enter(&connfp->connf_lock);
2299 connp = connfp->connf_head;
2300 for (connp = connfp->connf_head; connp != NULL;
2301 connp = connp->conn_next) {
2302 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2303 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2304 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2305 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2306 break;
2307 }
2308
2309 if (connp == NULL) {
2310 /*
2311 * No one bound to this port. Is
2312 * there a client that wants all
2313 * unclaimed datagrams?
2314 */
2315 mutex_exit(&connfp->connf_lock);
2316 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2317 ICMP6_PARAMPROB_NEXTHEADER, ira);
2318 return;
2319 }
2320
2321 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2322
2323 CONN_INC_REF(connp);
2324 first_connp = connp;
2325
2326 /*
2327 * XXX: Fix the multiple protocol listeners case. We should not
2328 * be walking the conn->conn_next list here.
2329 */
2330 connp = connp->conn_next;
2331 for (;;) {
2332 while (connp != NULL) {
2333 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2334 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2335 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2336 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2337 ira, connp)))
2338 break;
2339 connp = connp->conn_next;
2340 }
2341
2342 if (connp == NULL) {
2343 /* No more interested clients */
2344 connp = first_connp;
2345 break;
2346 }
2347 if (((mp1 = dupmsg(mp)) == NULL) &&
2348 ((mp1 = copymsg(mp)) == NULL)) {
2349 /* Memory allocation failed */
2350 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2351 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2352 connp = first_connp;
2353 break;
2354 }
2355
2356 CONN_INC_REF(connp);
2357 mutex_exit(&connfp->connf_lock);
2358
2359 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2360 ira);
2361
2362 mutex_enter(&connfp->connf_lock);
2363 /* Follow the next pointer before releasing the conn. */
2364 next_connp = connp->conn_next;
2365 CONN_DEC_REF(connp);
2366 connp = next_connp;
2367 }
2368
2369 /* Last one. Send it upstream. */
2370 mutex_exit(&connfp->connf_lock);
2371
2372 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2373
2374 CONN_DEC_REF(connp);
2375 }
2376
2377 /*
2378 * Called when it is conceptually a ULP that would sent the packet
2379 * e.g., port unreachable and nexthdr unknown. Check that the packet
2380 * would have passed the IPsec global policy before sending the error.
2381 *
2382 * Send an ICMP error after patching up the packet appropriately.
2383 * Uses ip_drop_input and bumps the appropriate MIB.
2384 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2385 */
2386 void
ip_fanout_send_icmp_v6(mblk_t * mp,uint_t icmp_type,uint8_t icmp_code,ip_recv_attr_t * ira)2387 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2388 ip_recv_attr_t *ira)
2389 {
2390 ip6_t *ip6h;
2391 boolean_t secure;
2392 ill_t *ill = ira->ira_ill;
2393 ip_stack_t *ipst = ill->ill_ipst;
2394 netstack_t *ns = ipst->ips_netstack;
2395 ipsec_stack_t *ipss = ns->netstack_ipsec;
2396
2397 secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2398
2399 /*
2400 * We are generating an icmp error for some inbound packet.
2401 * Called from all ip_fanout_(udp, tcp, proto) functions.
2402 * Before we generate an error, check with global policy
2403 * to see whether this is allowed to enter the system. As
2404 * there is no "conn", we are checking with global policy.
2405 */
2406 ip6h = (ip6_t *)mp->b_rptr;
2407 if (secure || ipss->ipsec_inbound_v6_policy_present) {
2408 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2409 if (mp == NULL)
2410 return;
2411 }
2412
2413 /* We never send errors for protocols that we do implement */
2414 if (ira->ira_protocol == IPPROTO_ICMPV6) {
2415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2416 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2417 freemsg(mp);
2418 return;
2419 }
2420
2421 switch (icmp_type) {
2422 case ICMP6_DST_UNREACH:
2423 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2424
2425 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2426 ip_drop_input("ipIfStatsNoPorts", mp, ill);
2427
2428 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2429 break;
2430 case ICMP6_PARAM_PROB:
2431 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2432
2433 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2434 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2435
2436 /* Let the system determine the offset for this one */
2437 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2438 break;
2439 default:
2440 #ifdef DEBUG
2441 panic("ip_fanout_send_icmp_v6: wrong type");
2442 /*NOTREACHED*/
2443 #else
2444 freemsg(mp);
2445 break;
2446 #endif
2447 }
2448 }
2449
2450 /*
2451 * Fanout for UDP packets that are multicast or ICMP errors.
2452 * (Unicast fanout is handled in ip_input_v6.)
2453 *
2454 * If SO_REUSEADDR is set all multicast packets
2455 * will be delivered to all conns bound to the same port.
2456 *
2457 * Fanout for UDP packets.
2458 * The caller puts <fport, lport> in the ports parameter.
2459 * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2460 *
2461 * If SO_REUSEADDR is set all multicast and broadcast packets
2462 * will be delivered to all conns bound to the same port.
2463 *
2464 * Zones notes:
2465 * Earlier in ip_input on a system with multiple shared-IP zones we
2466 * duplicate the multicast and broadcast packets and send them up
2467 * with each explicit zoneid that exists on that ill.
2468 * This means that here we can match the zoneid with SO_ALLZONES being special.
2469 */
2470 void
ip_fanout_udp_multi_v6(mblk_t * mp,ip6_t * ip6h,uint16_t lport,uint16_t fport,ip_recv_attr_t * ira)2471 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2472 ip_recv_attr_t *ira)
2473 {
2474 in6_addr_t laddr;
2475 conn_t *connp;
2476 connf_t *connfp;
2477 in6_addr_t faddr;
2478 ill_t *ill = ira->ira_ill;
2479 ip_stack_t *ipst = ill->ill_ipst;
2480
2481 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2482
2483 laddr = ip6h->ip6_dst;
2484 faddr = ip6h->ip6_src;
2485
2486 /* Attempt to find a client stream based on destination port. */
2487 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2488 mutex_enter(&connfp->connf_lock);
2489 connp = connfp->connf_head;
2490 while (connp != NULL) {
2491 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2492 conn_wantpacket_v6(connp, ira, ip6h) &&
2493 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2494 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2495 break;
2496 connp = connp->conn_next;
2497 }
2498
2499 if (connp == NULL)
2500 goto notfound;
2501
2502 CONN_INC_REF(connp);
2503
2504 if (connp->conn_reuseaddr) {
2505 conn_t *first_connp = connp;
2506 conn_t *next_connp;
2507 mblk_t *mp1;
2508
2509 connp = connp->conn_next;
2510 for (;;) {
2511 while (connp != NULL) {
2512 if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2513 fport, faddr) &&
2514 conn_wantpacket_v6(connp, ira, ip6h) &&
2515 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2516 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2517 ira, connp)))
2518 break;
2519 connp = connp->conn_next;
2520 }
2521 if (connp == NULL) {
2522 /* No more interested clients */
2523 connp = first_connp;
2524 break;
2525 }
2526 if (((mp1 = dupmsg(mp)) == NULL) &&
2527 ((mp1 = copymsg(mp)) == NULL)) {
2528 /* Memory allocation failed */
2529 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2530 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2531 connp = first_connp;
2532 break;
2533 }
2534
2535 CONN_INC_REF(connp);
2536 mutex_exit(&connfp->connf_lock);
2537
2538 IP6_STAT(ipst, ip6_udp_fanmb);
2539 ip_fanout_udp_conn(connp, mp1, NULL,
2540 (ip6_t *)mp1->b_rptr, ira);
2541
2542 mutex_enter(&connfp->connf_lock);
2543 /* Follow the next pointer before releasing the conn. */
2544 next_connp = connp->conn_next;
2545 IP6_STAT(ipst, ip6_udp_fanmb);
2546 CONN_DEC_REF(connp);
2547 connp = next_connp;
2548 }
2549 }
2550
2551 /* Last one. Send it upstream. */
2552 mutex_exit(&connfp->connf_lock);
2553
2554 IP6_STAT(ipst, ip6_udp_fanmb);
2555 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2556 CONN_DEC_REF(connp);
2557 return;
2558
2559 notfound:
2560 mutex_exit(&connfp->connf_lock);
2561 /*
2562 * No one bound to this port. Is
2563 * there a client that wants all
2564 * unclaimed datagrams?
2565 */
2566 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2567 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2568 ip_fanout_proto_v6(mp, ip6h, ira);
2569 } else {
2570 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2571 ICMP6_DST_UNREACH_NOPORT, ira);
2572 }
2573 }
2574
2575 /*
2576 * int ip_find_hdr_v6()
2577 *
2578 * This routine is used by the upper layer protocols, iptun, and IPsec:
2579 * - Set extension header pointers to appropriate locations
2580 * - Determine IPv6 header length and return it
2581 * - Return a pointer to the last nexthdr value
2582 *
2583 * The caller must initialize ipp_fields.
2584 * The upper layer protocols normally set label_separate which makes the
2585 * routine put the TX label in ipp_label_v6. If this is not set then
2586 * the hop-by-hop options including the label are placed in ipp_hopopts.
2587 *
2588 * NOTE: If multiple extension headers of the same type are present,
2589 * ip_find_hdr_v6() will set the respective extension header pointers
2590 * to the first one that it encounters in the IPv6 header. It also
2591 * skips fragment headers. This routine deals with malformed packets
2592 * of various sorts in which case the returned length is up to the
2593 * malformed part.
2594 */
2595 int
ip_find_hdr_v6(mblk_t * mp,ip6_t * ip6h,boolean_t label_separate,ip_pkt_t * ipp,uint8_t * nexthdrp)2596 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2597 uint8_t *nexthdrp)
2598 {
2599 uint_t length, ehdrlen;
2600 uint8_t nexthdr;
2601 uint8_t *whereptr, *endptr;
2602 ip6_dest_t *tmpdstopts;
2603 ip6_rthdr_t *tmprthdr;
2604 ip6_hbh_t *tmphopopts;
2605 ip6_frag_t *tmpfraghdr;
2606
2607 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2608 ipp->ipp_hoplimit = ip6h->ip6_hops;
2609 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2610 ipp->ipp_addr = ip6h->ip6_dst;
2611
2612 length = IPV6_HDR_LEN;
2613 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2614 endptr = mp->b_wptr;
2615
2616 nexthdr = ip6h->ip6_nxt;
2617 while (whereptr < endptr) {
2618 /* Is there enough left for len + nexthdr? */
2619 if (whereptr + MIN_EHDR_LEN > endptr)
2620 goto done;
2621
2622 switch (nexthdr) {
2623 case IPPROTO_HOPOPTS: {
2624 /* We check for any CIPSO */
2625 uchar_t *secopt;
2626 boolean_t hbh_needed;
2627 uchar_t *after_secopt;
2628
2629 tmphopopts = (ip6_hbh_t *)whereptr;
2630 ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2631 if ((uchar_t *)tmphopopts + ehdrlen > endptr)
2632 goto done;
2633 nexthdr = tmphopopts->ip6h_nxt;
2634
2635 if (!label_separate) {
2636 secopt = NULL;
2637 after_secopt = whereptr;
2638 } else {
2639 /*
2640 * We have dropped packets with bad options in
2641 * ip6_input. No need to check return value
2642 * here.
2643 */
2644 (void) tsol_find_secopt_v6(whereptr, ehdrlen,
2645 &secopt, &after_secopt, &hbh_needed);
2646 }
2647 if (secopt != NULL && after_secopt - whereptr > 0) {
2648 ipp->ipp_fields |= IPPF_LABEL_V6;
2649 ipp->ipp_label_v6 = secopt;
2650 ipp->ipp_label_len_v6 = after_secopt - whereptr;
2651 } else {
2652 ipp->ipp_label_len_v6 = 0;
2653 after_secopt = whereptr;
2654 hbh_needed = B_TRUE;
2655 }
2656 /* return only 1st hbh */
2657 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2658 ipp->ipp_fields |= IPPF_HOPOPTS;
2659 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2660 ipp->ipp_hopoptslen = ehdrlen -
2661 ipp->ipp_label_len_v6;
2662 }
2663 break;
2664 }
2665 case IPPROTO_DSTOPTS:
2666 tmpdstopts = (ip6_dest_t *)whereptr;
2667 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2668 if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
2669 goto done;
2670 nexthdr = tmpdstopts->ip6d_nxt;
2671 /*
2672 * ipp_dstopts is set to the destination header after a
2673 * routing header.
2674 * Assume it is a post-rthdr destination header
2675 * and adjust when we find an rthdr.
2676 */
2677 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2678 ipp->ipp_fields |= IPPF_DSTOPTS;
2679 ipp->ipp_dstopts = tmpdstopts;
2680 ipp->ipp_dstoptslen = ehdrlen;
2681 }
2682 break;
2683 case IPPROTO_ROUTING:
2684 tmprthdr = (ip6_rthdr_t *)whereptr;
2685 ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2686 if ((uchar_t *)tmprthdr + ehdrlen > endptr)
2687 goto done;
2688 nexthdr = tmprthdr->ip6r_nxt;
2689 /* return only 1st rthdr */
2690 if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2691 ipp->ipp_fields |= IPPF_RTHDR;
2692 ipp->ipp_rthdr = tmprthdr;
2693 ipp->ipp_rthdrlen = ehdrlen;
2694 }
2695 /*
2696 * Make any destination header we've seen be a
2697 * pre-rthdr destination header.
2698 */
2699 if (ipp->ipp_fields & IPPF_DSTOPTS) {
2700 ipp->ipp_fields &= ~IPPF_DSTOPTS;
2701 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2702 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2703 ipp->ipp_dstopts = NULL;
2704 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2705 ipp->ipp_dstoptslen = 0;
2706 }
2707 break;
2708 case IPPROTO_FRAGMENT:
2709 tmpfraghdr = (ip6_frag_t *)whereptr;
2710 ehdrlen = sizeof (ip6_frag_t);
2711 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2712 goto done;
2713 nexthdr = tmpfraghdr->ip6f_nxt;
2714 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2715 ipp->ipp_fields |= IPPF_FRAGHDR;
2716 ipp->ipp_fraghdr = tmpfraghdr;
2717 ipp->ipp_fraghdrlen = ehdrlen;
2718 }
2719 break;
2720 case IPPROTO_NONE:
2721 default:
2722 goto done;
2723 }
2724 length += ehdrlen;
2725 whereptr += ehdrlen;
2726 }
2727 done:
2728 if (nexthdrp != NULL)
2729 *nexthdrp = nexthdr;
2730 return (length);
2731 }
2732
2733 /*
2734 * Return the length of the IPv6 related headers (including extension headers)
2735 * If the packet is malformed, this returns the simple IPv6 header length.
2736 */
2737 uint16_t
ip_hdr_length_v6(mblk_t * mp,ip6_t * ip6h)2738 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2739 {
2740 uint16_t hdr_len;
2741
2742 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, NULL))
2743 hdr_len = sizeof (*ip6h);
2744 return (hdr_len);
2745 }
2746
2747 /*
2748 * Parse and process any hop-by-hop or destination options.
2749 *
2750 * Assumes that q is an ill read queue so that ICMP errors for link-local
2751 * destinations are sent out the correct interface.
2752 *
2753 * Returns -1 if there was an error and mp has been consumed.
2754 * Returns 0 if no special action is needed.
2755 * Returns 1 if the packet contained a router alert option for this node
2756 * which is verified to be "interesting/known" for our implementation.
2757 *
2758 * XXX Note: In future as more hbh or dest options are defined,
2759 * it may be better to have different routines for hbh and dest
2760 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2761 * may have same value in different namespaces. Or is it same namespace ??
2762 * Current code checks for each opt_type (other than pads) if it is in
2763 * the expected nexthdr (hbh or dest)
2764 */
2765 int
ip_process_options_v6(mblk_t * mp,ip6_t * ip6h,uint8_t * optptr,uint_t optlen,uint8_t hdr_type,ip_recv_attr_t * ira)2766 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2767 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2768 {
2769 uint8_t opt_type;
2770 uint_t optused = 0;
2771 int ret = 0;
2772 const char *errtype;
2773 ill_t *ill = ira->ira_ill;
2774 ip_stack_t *ipst = ill->ill_ipst;
2775
2776 while (optlen != 0) {
2777 opt_type = *optptr;
2778 if (opt_type == IP6OPT_PAD1) {
2779 optused = 1;
2780 } else {
2781 if (optlen < 2)
2782 goto bad_opt;
2783 errtype = "malformed";
2784 if (opt_type == ip6opt_ls) {
2785 optused = 2 + optptr[1];
2786 if (optused > optlen)
2787 goto bad_opt;
2788 } else switch (opt_type) {
2789 case IP6OPT_PADN:
2790 /*
2791 * Note:We don't verify that (N-2) pad octets
2792 * are zero as required by spec. Adhere to
2793 * "be liberal in what you accept..." part of
2794 * implementation philosophy (RFC791,RFC1122)
2795 */
2796 optused = 2 + optptr[1];
2797 if (optused > optlen)
2798 goto bad_opt;
2799 break;
2800
2801 case IP6OPT_JUMBO:
2802 if (hdr_type != IPPROTO_HOPOPTS)
2803 goto opt_error;
2804 goto opt_error; /* XXX Not implemented! */
2805
2806 case IP6OPT_ROUTER_ALERT: {
2807 struct ip6_opt_router *or;
2808
2809 if (hdr_type != IPPROTO_HOPOPTS)
2810 goto opt_error;
2811 optused = 2 + optptr[1];
2812 if (optused > optlen)
2813 goto bad_opt;
2814 or = (struct ip6_opt_router *)optptr;
2815 /* Check total length and alignment */
2816 if (optused != sizeof (*or) ||
2817 ((uintptr_t)or->ip6or_value & 0x1) != 0)
2818 goto opt_error;
2819 /* Check value */
2820 switch (*((uint16_t *)or->ip6or_value)) {
2821 case IP6_ALERT_MLD:
2822 case IP6_ALERT_RSVP:
2823 ret = 1;
2824 }
2825 break;
2826 }
2827 case IP6OPT_HOME_ADDRESS: {
2828 /*
2829 * Minimal support for the home address option
2830 * (which is required by all IPv6 nodes).
2831 * Implement by just swapping the home address
2832 * and source address.
2833 * XXX Note: this has IPsec implications since
2834 * AH needs to take this into account.
2835 * Also, when IPsec is used we need to ensure
2836 * that this is only processed once
2837 * in the received packet (to avoid swapping
2838 * back and forth).
2839 * NOTE:This option processing is considered
2840 * to be unsafe and prone to a denial of
2841 * service attack.
2842 * The current processing is not safe even with
2843 * IPsec secured IP packets. Since the home
2844 * address option processing requirement still
2845 * is in the IETF draft and in the process of
2846 * being redefined for its usage, it has been
2847 * decided to turn off the option by default.
2848 * If this section of code needs to be executed,
2849 * ndd variable ip6_ignore_home_address_opt
2850 * should be set to 0 at the user's own risk.
2851 */
2852 struct ip6_opt_home_address *oh;
2853 in6_addr_t tmp;
2854
2855 if (ipst->ips_ipv6_ignore_home_address_opt)
2856 goto opt_error;
2857
2858 if (hdr_type != IPPROTO_DSTOPTS)
2859 goto opt_error;
2860 optused = 2 + optptr[1];
2861 if (optused > optlen)
2862 goto bad_opt;
2863
2864 /*
2865 * We did this dest. opt the first time
2866 * around (i.e. before AH processing).
2867 * If we've done AH... stop now.
2868 */
2869 if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2870 ira->ira_ipsec_ah_sa != NULL)
2871 break;
2872
2873 oh = (struct ip6_opt_home_address *)optptr;
2874 /* Check total length and alignment */
2875 if (optused < sizeof (*oh) ||
2876 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2877 goto opt_error;
2878 /* Swap ip6_src and the home address */
2879 tmp = ip6h->ip6_src;
2880 /* XXX Note: only 8 byte alignment option */
2881 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2882 *(in6_addr_t *)oh->ip6oh_addr = tmp;
2883 break;
2884 }
2885
2886 case IP6OPT_TUNNEL_LIMIT:
2887 if (hdr_type != IPPROTO_DSTOPTS) {
2888 goto opt_error;
2889 }
2890 optused = 2 + optptr[1];
2891 if (optused > optlen) {
2892 goto bad_opt;
2893 }
2894 if (optused != 3) {
2895 goto opt_error;
2896 }
2897 break;
2898
2899 default:
2900 errtype = "unknown";
2901 /* FALLTHROUGH */
2902 opt_error:
2903 /* Determine which zone should send error */
2904 switch (IP6OPT_TYPE(opt_type)) {
2905 case IP6OPT_TYPE_SKIP:
2906 optused = 2 + optptr[1];
2907 if (optused > optlen)
2908 goto bad_opt;
2909 ip1dbg(("ip_process_options_v6: %s "
2910 "opt 0x%x skipped\n",
2911 errtype, opt_type));
2912 break;
2913 case IP6OPT_TYPE_DISCARD:
2914 ip1dbg(("ip_process_options_v6: %s "
2915 "opt 0x%x; packet dropped\n",
2916 errtype, opt_type));
2917 BUMP_MIB(ill->ill_ip_mib,
2918 ipIfStatsInHdrErrors);
2919 ip_drop_input("ipIfStatsInHdrErrors",
2920 mp, ill);
2921 freemsg(mp);
2922 return (-1);
2923 case IP6OPT_TYPE_ICMP:
2924 BUMP_MIB(ill->ill_ip_mib,
2925 ipIfStatsInHdrErrors);
2926 ip_drop_input("ipIfStatsInHdrErrors",
2927 mp, ill);
2928 icmp_param_problem_v6(mp,
2929 ICMP6_PARAMPROB_OPTION,
2930 (uint32_t)(optptr -
2931 (uint8_t *)ip6h),
2932 B_FALSE, ira);
2933 return (-1);
2934 case IP6OPT_TYPE_FORCEICMP:
2935 BUMP_MIB(ill->ill_ip_mib,
2936 ipIfStatsInHdrErrors);
2937 ip_drop_input("ipIfStatsInHdrErrors",
2938 mp, ill);
2939 icmp_param_problem_v6(mp,
2940 ICMP6_PARAMPROB_OPTION,
2941 (uint32_t)(optptr -
2942 (uint8_t *)ip6h),
2943 B_TRUE, ira);
2944 return (-1);
2945 default:
2946 ASSERT(0);
2947 }
2948 }
2949 }
2950 optlen -= optused;
2951 optptr += optused;
2952 }
2953 return (ret);
2954
2955 bad_opt:
2956 /* Determine which zone should send error */
2957 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2958 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
2959 (uint32_t)(optptr - (uint8_t *)ip6h),
2960 B_FALSE, ira);
2961 return (-1);
2962 }
2963
2964 /*
2965 * Process a routing header that is not yet empty.
2966 * Because of RFC 5095, we now reject all route headers.
2967 */
2968 void
ip_process_rthdr(mblk_t * mp,ip6_t * ip6h,ip6_rthdr_t * rth,ip_recv_attr_t * ira)2969 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
2970 ip_recv_attr_t *ira)
2971 {
2972 ill_t *ill = ira->ira_ill;
2973 ip_stack_t *ipst = ill->ill_ipst;
2974
2975 ASSERT(rth->ip6r_segleft != 0);
2976
2977 if (!ipst->ips_ipv6_forward_src_routed) {
2978 /* XXX Check for source routed out same interface? */
2979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
2980 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
2981 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
2982 freemsg(mp);
2983 return;
2984 }
2985
2986 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2987 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
2988 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
2989 B_FALSE, ira);
2990 }
2991
2992 /*
2993 * Read side put procedure for IPv6 module.
2994 */
2995 int
ip_rput_v6(queue_t * q,mblk_t * mp)2996 ip_rput_v6(queue_t *q, mblk_t *mp)
2997 {
2998 ill_t *ill;
2999
3000 ill = (ill_t *)q->q_ptr;
3001 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3002 union DL_primitives *dl;
3003
3004 dl = (union DL_primitives *)mp->b_rptr;
3005 /*
3006 * Things are opening or closing - only accept DLPI
3007 * ack messages. If the stream is closing and ip_wsrv
3008 * has completed, ip_close is out of the qwait, but has
3009 * not yet completed qprocsoff. Don't proceed any further
3010 * because the ill has been cleaned up and things hanging
3011 * off the ill have been freed.
3012 */
3013 if ((mp->b_datap->db_type != M_PCPROTO) ||
3014 (dl->dl_primitive == DL_UNITDATA_IND)) {
3015 inet_freemsg(mp);
3016 return (0);
3017 }
3018 }
3019 if (DB_TYPE(mp) == M_DATA) {
3020 struct mac_header_info_s mhi;
3021
3022 ip_mdata_to_mhi(ill, mp, &mhi);
3023 ip_input_v6(ill, NULL, mp, &mhi);
3024 } else {
3025 ip_rput_notdata(ill, mp);
3026 }
3027 return (0);
3028 }
3029
3030 /*
3031 * Walk through the IPv6 packet in mp and see if there's an AH header
3032 * in it. See if the AH header needs to get done before other headers in
3033 * the packet. (Worker function for ipsec_early_ah_v6().)
3034 */
3035 #define IPSEC_HDR_DONT_PROCESS 0
3036 #define IPSEC_HDR_PROCESS 1
3037 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
3038 static int
ipsec_needs_processing_v6(mblk_t * mp,uint8_t * nexthdr)3039 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3040 {
3041 uint_t length;
3042 uint_t ehdrlen;
3043 uint8_t *whereptr;
3044 uint8_t *endptr;
3045 uint8_t *nexthdrp;
3046 ip6_dest_t *desthdr;
3047 ip6_rthdr_t *rthdr;
3048 ip6_t *ip6h;
3049
3050 /*
3051 * For now just pullup everything. In general, the less pullups,
3052 * the better, but there's so much squirrelling through anyway,
3053 * it's just easier this way.
3054 */
3055 if (!pullupmsg(mp, -1)) {
3056 return (IPSEC_MEMORY_ERROR);
3057 }
3058
3059 ip6h = (ip6_t *)mp->b_rptr;
3060 length = IPV6_HDR_LEN;
3061 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3062 endptr = mp->b_wptr;
3063
3064 /*
3065 * We can't just use the argument nexthdr in the place
3066 * of nexthdrp becaue we don't dereference nexthdrp
3067 * till we confirm whether it is a valid address.
3068 */
3069 nexthdrp = &ip6h->ip6_nxt;
3070 while (whereptr < endptr) {
3071 /* Is there enough left for len + nexthdr? */
3072 if (whereptr + MIN_EHDR_LEN > endptr)
3073 return (IPSEC_MEMORY_ERROR);
3074
3075 switch (*nexthdrp) {
3076 case IPPROTO_HOPOPTS:
3077 case IPPROTO_DSTOPTS:
3078 /* Assumes the headers are identical for hbh and dst */
3079 desthdr = (ip6_dest_t *)whereptr;
3080 ehdrlen = 8 * (desthdr->ip6d_len + 1);
3081 if ((uchar_t *)desthdr + ehdrlen > endptr)
3082 return (IPSEC_MEMORY_ERROR);
3083 /*
3084 * Return DONT_PROCESS because the destination
3085 * options header may be for each hop in a
3086 * routing-header, and we only want AH if we're
3087 * finished with routing headers.
3088 */
3089 if (*nexthdrp == IPPROTO_DSTOPTS)
3090 return (IPSEC_HDR_DONT_PROCESS);
3091 nexthdrp = &desthdr->ip6d_nxt;
3092 break;
3093 case IPPROTO_ROUTING:
3094 rthdr = (ip6_rthdr_t *)whereptr;
3095
3096 /*
3097 * If there's more hops left on the routing header,
3098 * return now with DON'T PROCESS.
3099 */
3100 if (rthdr->ip6r_segleft > 0)
3101 return (IPSEC_HDR_DONT_PROCESS);
3102
3103 ehdrlen = 8 * (rthdr->ip6r_len + 1);
3104 if ((uchar_t *)rthdr + ehdrlen > endptr)
3105 return (IPSEC_MEMORY_ERROR);
3106 nexthdrp = &rthdr->ip6r_nxt;
3107 break;
3108 case IPPROTO_FRAGMENT:
3109 /* Wait for reassembly */
3110 return (IPSEC_HDR_DONT_PROCESS);
3111 case IPPROTO_AH:
3112 *nexthdr = IPPROTO_AH;
3113 return (IPSEC_HDR_PROCESS);
3114 case IPPROTO_NONE:
3115 /* No next header means we're finished */
3116 default:
3117 return (IPSEC_HDR_DONT_PROCESS);
3118 }
3119 length += ehdrlen;
3120 whereptr += ehdrlen;
3121 }
3122 /*
3123 * Malformed/truncated packet.
3124 */
3125 return (IPSEC_MEMORY_ERROR);
3126 }
3127
3128 /*
3129 * Path for AH if options are present.
3130 * Returns NULL if the mblk was consumed.
3131 *
3132 * Sometimes AH needs to be done before other IPv6 headers for security
3133 * reasons. This function (and its ipsec_needs_processing_v6() above)
3134 * indicates if that is so, and fans out to the appropriate IPsec protocol
3135 * for the datagram passed in.
3136 */
3137 mblk_t *
ipsec_early_ah_v6(mblk_t * mp,ip_recv_attr_t * ira)3138 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3139 {
3140 uint8_t nexthdr;
3141 ah_t *ah;
3142 ill_t *ill = ira->ira_ill;
3143 ip_stack_t *ipst = ill->ill_ipst;
3144 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
3145
3146 switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3147 case IPSEC_MEMORY_ERROR:
3148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3149 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3150 freemsg(mp);
3151 return (NULL);
3152 case IPSEC_HDR_DONT_PROCESS:
3153 return (mp);
3154 }
3155
3156 /* Default means send it to AH! */
3157 ASSERT(nexthdr == IPPROTO_AH);
3158
3159 if (!ipsec_loaded(ipss)) {
3160 ip_proto_not_sup(mp, ira);
3161 return (NULL);
3162 }
3163
3164 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3165 if (mp == NULL)
3166 return (NULL);
3167 ASSERT(ah != NULL);
3168 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3169 ASSERT(ira->ira_ipsec_ah_sa != NULL);
3170 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3171 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3172
3173 if (mp == NULL) {
3174 /*
3175 * Either it failed or is pending. In the former case
3176 * ipIfStatsInDiscards was increased.
3177 */
3178 return (NULL);
3179 }
3180
3181 /* we're done with IPsec processing, send it up */
3182 ip_input_post_ipsec(mp, ira);
3183 return (NULL);
3184 }
3185
3186 /*
3187 * Reassemble fragment.
3188 * When it returns a completed message the first mblk will only contain
3189 * the headers prior to the fragment header, with the nexthdr value updated
3190 * to be the header after the fragment header.
3191 */
3192 mblk_t *
ip_input_fragment_v6(mblk_t * mp,ip6_t * ip6h,ip6_frag_t * fraghdr,uint_t remlen,ip_recv_attr_t * ira)3193 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3194 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3195 {
3196 uint32_t ident = ntohl(fraghdr->ip6f_ident);
3197 uint16_t offset;
3198 boolean_t more_frags;
3199 uint8_t nexthdr = fraghdr->ip6f_nxt;
3200 in6_addr_t *v6dst_ptr;
3201 in6_addr_t *v6src_ptr;
3202 uint_t end;
3203 uint_t hdr_length;
3204 size_t count;
3205 ipf_t *ipf;
3206 ipf_t **ipfp;
3207 ipfb_t *ipfb;
3208 mblk_t *mp1;
3209 uint8_t ecn_info = 0;
3210 size_t msg_len;
3211 mblk_t *tail_mp;
3212 mblk_t *t_mp;
3213 boolean_t pruned = B_FALSE;
3214 uint32_t sum_val;
3215 uint16_t sum_flags;
3216 ill_t *ill = ira->ira_ill;
3217 ip_stack_t *ipst = ill->ill_ipst;
3218 uint_t prev_nexthdr_offset;
3219 uint8_t prev_nexthdr;
3220 uint8_t *ptr;
3221 uint32_t packet_size;
3222
3223 /*
3224 * We utilize hardware computed checksum info only for UDP since
3225 * IP fragmentation is a normal occurence for the protocol. In
3226 * addition, checksum offload support for IP fragments carrying
3227 * UDP payload is commonly implemented across network adapters.
3228 */
3229 ASSERT(ira->ira_rill != NULL);
3230 if (nexthdr == IPPROTO_UDP && dohwcksum &&
3231 ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3232 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3233 mblk_t *mp1 = mp->b_cont;
3234 int32_t len;
3235
3236 /* Record checksum information from the packet */
3237 sum_val = (uint32_t)DB_CKSUM16(mp);
3238 sum_flags = DB_CKSUMFLAGS(mp);
3239
3240 /* fragmented payload offset from beginning of mblk */
3241 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3242
3243 if ((sum_flags & HCK_PARTIALCKSUM) &&
3244 (mp1 == NULL || mp1->b_cont == NULL) &&
3245 offset >= DB_CKSUMSTART(mp) &&
3246 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3247 uint32_t adj;
3248 /*
3249 * Partial checksum has been calculated by hardware
3250 * and attached to the packet; in addition, any
3251 * prepended extraneous data is even byte aligned.
3252 * If any such data exists, we adjust the checksum;
3253 * this would also handle any postpended data.
3254 */
3255 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3256 mp, mp1, len, adj);
3257
3258 /* One's complement subtract extraneous checksum */
3259 if (adj >= sum_val)
3260 sum_val = ~(adj - sum_val) & 0xFFFF;
3261 else
3262 sum_val -= adj;
3263 }
3264 } else {
3265 sum_val = 0;
3266 sum_flags = 0;
3267 }
3268
3269 /* Clear hardware checksumming flag */
3270 DB_CKSUMFLAGS(mp) = 0;
3271
3272 /*
3273 * Determine the offset (from the begining of the IP header)
3274 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3275 * this when removing the fragment header from the packet.
3276 * This packet consists of the IPv6 header, a potential
3277 * hop-by-hop options header, a potential pre-routing-header
3278 * destination options header, and a potential routing header.
3279 */
3280 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3281 prev_nexthdr = ip6h->ip6_nxt;
3282 ptr = (uint8_t *)&ip6h[1];
3283
3284 if (prev_nexthdr == IPPROTO_HOPOPTS) {
3285 ip6_hbh_t *hbh_hdr;
3286 uint_t hdr_len;
3287
3288 hbh_hdr = (ip6_hbh_t *)ptr;
3289 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3290 prev_nexthdr = hbh_hdr->ip6h_nxt;
3291 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3292 - (uint8_t *)ip6h;
3293 ptr += hdr_len;
3294 }
3295 if (prev_nexthdr == IPPROTO_DSTOPTS) {
3296 ip6_dest_t *dest_hdr;
3297 uint_t hdr_len;
3298
3299 dest_hdr = (ip6_dest_t *)ptr;
3300 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3301 prev_nexthdr = dest_hdr->ip6d_nxt;
3302 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3303 - (uint8_t *)ip6h;
3304 ptr += hdr_len;
3305 }
3306 if (prev_nexthdr == IPPROTO_ROUTING) {
3307 ip6_rthdr_t *rthdr;
3308 uint_t hdr_len;
3309
3310 rthdr = (ip6_rthdr_t *)ptr;
3311 prev_nexthdr = rthdr->ip6r_nxt;
3312 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3313 - (uint8_t *)ip6h;
3314 hdr_len = 8 * (rthdr->ip6r_len + 1);
3315 ptr += hdr_len;
3316 }
3317 if (prev_nexthdr != IPPROTO_FRAGMENT) {
3318 /* Can't handle other headers before the fragment header */
3319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3320 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3321 freemsg(mp);
3322 return (NULL);
3323 }
3324
3325 /*
3326 * Note: Fragment offset in header is in 8-octet units.
3327 * Clearing least significant 3 bits not only extracts
3328 * it but also gets it in units of octets.
3329 */
3330 offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3331 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3332
3333 /*
3334 * Is the more frags flag on and the payload length not a multiple
3335 * of eight?
3336 */
3337 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3338 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3339 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3340 (uint32_t)((char *)&ip6h->ip6_plen -
3341 (char *)ip6h), B_FALSE, ira);
3342 return (NULL);
3343 }
3344
3345 v6src_ptr = &ip6h->ip6_src;
3346 v6dst_ptr = &ip6h->ip6_dst;
3347 end = remlen;
3348
3349 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3350 end += offset;
3351
3352 /*
3353 * Would fragment cause reassembled packet to have a payload length
3354 * greater than IP_MAXPACKET - the max payload size?
3355 */
3356 if (end > IP_MAXPACKET) {
3357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3358 ip_drop_input("Reassembled packet too large", mp, ill);
3359 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3360 (uint32_t)((char *)&fraghdr->ip6f_offlg -
3361 (char *)ip6h), B_FALSE, ira);
3362 return (NULL);
3363 }
3364
3365 /*
3366 * This packet just has one fragment. Reassembly not
3367 * needed.
3368 */
3369 if (!more_frags && offset == 0) {
3370 goto reass_done;
3371 }
3372
3373 /*
3374 * Drop the fragmented as early as possible, if
3375 * we don't have resource(s) to re-assemble.
3376 */
3377 if (ipst->ips_ip_reass_queue_bytes == 0) {
3378 freemsg(mp);
3379 return (NULL);
3380 }
3381
3382 /* Record the ECN field info. */
3383 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3384 /*
3385 * If this is not the first fragment, dump the unfragmentable
3386 * portion of the packet.
3387 */
3388 if (offset)
3389 mp->b_rptr = (uchar_t *)&fraghdr[1];
3390
3391 /*
3392 * Fragmentation reassembly. Each ILL has a hash table for
3393 * queueing packets undergoing reassembly for all IPIFs
3394 * associated with the ILL. The hash is based on the packet
3395 * IP ident field. The ILL frag hash table was allocated
3396 * as a timer block at the time the ILL was created. Whenever
3397 * there is anything on the reassembly queue, the timer will
3398 * be running.
3399 */
3400 /* Handle vnic loopback of fragments */
3401 if (mp->b_datap->db_ref > 2)
3402 msg_len = 0;
3403 else
3404 msg_len = MBLKSIZE(mp);
3405
3406 tail_mp = mp;
3407 while (tail_mp->b_cont != NULL) {
3408 tail_mp = tail_mp->b_cont;
3409 if (tail_mp->b_datap->db_ref <= 2)
3410 msg_len += MBLKSIZE(tail_mp);
3411 }
3412 /*
3413 * If the reassembly list for this ILL will get too big
3414 * prune it.
3415 */
3416
3417 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3418 ipst->ips_ip_reass_queue_bytes) {
3419 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3420 uint_t, ill->ill_frag_count,
3421 uint_t, ipst->ips_ip_reass_queue_bytes);
3422 ill_frag_prune(ill,
3423 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3424 (ipst->ips_ip_reass_queue_bytes - msg_len));
3425 pruned = B_TRUE;
3426 }
3427
3428 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3429 mutex_enter(&ipfb->ipfb_lock);
3430
3431 ipfp = &ipfb->ipfb_ipf;
3432 /* Try to find an existing fragment queue for this packet. */
3433 for (;;) {
3434 ipf = ipfp[0];
3435 if (ipf) {
3436 /*
3437 * It has to match on ident, source address, and
3438 * dest address.
3439 */
3440 if (ipf->ipf_ident == ident &&
3441 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3442 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3443
3444 /*
3445 * If we have received too many
3446 * duplicate fragments for this packet
3447 * free it.
3448 */
3449 if (ipf->ipf_num_dups > ip_max_frag_dups) {
3450 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3451 freemsg(mp);
3452 mutex_exit(&ipfb->ipfb_lock);
3453 return (NULL);
3454 }
3455
3456 break;
3457 }
3458 ipfp = &ipf->ipf_hash_next;
3459 continue;
3460 }
3461
3462
3463 /*
3464 * If we pruned the list, do we want to store this new
3465 * fragment?. We apply an optimization here based on the
3466 * fact that most fragments will be received in order.
3467 * So if the offset of this incoming fragment is zero,
3468 * it is the first fragment of a new packet. We will
3469 * keep it. Otherwise drop the fragment, as we have
3470 * probably pruned the packet already (since the
3471 * packet cannot be found).
3472 */
3473
3474 if (pruned && offset != 0) {
3475 mutex_exit(&ipfb->ipfb_lock);
3476 freemsg(mp);
3477 return (NULL);
3478 }
3479
3480 /* New guy. Allocate a frag message. */
3481 mp1 = allocb(sizeof (*ipf), BPRI_MED);
3482 if (!mp1) {
3483 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3484 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3485 freemsg(mp);
3486 partial_reass_done:
3487 mutex_exit(&ipfb->ipfb_lock);
3488 return (NULL);
3489 }
3490
3491 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) {
3492 /*
3493 * Too many fragmented packets in this hash bucket.
3494 * Free the oldest.
3495 */
3496 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3497 }
3498
3499 mp1->b_cont = mp;
3500
3501 /* Initialize the fragment header. */
3502 ipf = (ipf_t *)mp1->b_rptr;
3503 ipf->ipf_mp = mp1;
3504 ipf->ipf_ptphn = ipfp;
3505 ipfp[0] = ipf;
3506 ipf->ipf_hash_next = NULL;
3507 ipf->ipf_ident = ident;
3508 ipf->ipf_v6src = *v6src_ptr;
3509 ipf->ipf_v6dst = *v6dst_ptr;
3510 /* Record reassembly start time. */
3511 ipf->ipf_timestamp = gethrestime_sec();
3512 /* Record ipf generation and account for frag header */
3513 ipf->ipf_gen = ill->ill_ipf_gen++;
3514 ipf->ipf_count = MBLKSIZE(mp1);
3515 ipf->ipf_protocol = nexthdr;
3516 ipf->ipf_nf_hdr_len = 0;
3517 ipf->ipf_prev_nexthdr_offset = 0;
3518 ipf->ipf_last_frag_seen = B_FALSE;
3519 ipf->ipf_ecn = ecn_info;
3520 ipf->ipf_num_dups = 0;
3521 ipfb->ipfb_frag_pkts++;
3522 ipf->ipf_checksum = 0;
3523 ipf->ipf_checksum_flags = 0;
3524
3525 /* Store checksum value in fragment header */
3526 if (sum_flags != 0) {
3527 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3528 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3529 ipf->ipf_checksum = sum_val;
3530 ipf->ipf_checksum_flags = sum_flags;
3531 }
3532
3533 /*
3534 * We handle reassembly two ways. In the easy case,
3535 * where all the fragments show up in order, we do
3536 * minimal bookkeeping, and just clip new pieces on
3537 * the end. If we ever see a hole, then we go off
3538 * to ip_reassemble which has to mark the pieces and
3539 * keep track of the number of holes, etc. Obviously,
3540 * the point of having both mechanisms is so we can
3541 * handle the easy case as efficiently as possible.
3542 */
3543 if (offset == 0) {
3544 /* Easy case, in-order reassembly so far. */
3545 /* Update the byte count */
3546 ipf->ipf_count += msg_len;
3547 ipf->ipf_tail_mp = tail_mp;
3548 /*
3549 * Keep track of next expected offset in
3550 * ipf_end.
3551 */
3552 ipf->ipf_end = end;
3553 ipf->ipf_nf_hdr_len = hdr_length;
3554 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3555 } else {
3556 /* Hard case, hole at the beginning. */
3557 ipf->ipf_tail_mp = NULL;
3558 /*
3559 * ipf_end == 0 means that we have given up
3560 * on easy reassembly.
3561 */
3562 ipf->ipf_end = 0;
3563
3564 /* Forget checksum offload from now on */
3565 ipf->ipf_checksum_flags = 0;
3566
3567 /*
3568 * ipf_hole_cnt is set by ip_reassemble.
3569 * ipf_count is updated by ip_reassemble.
3570 * No need to check for return value here
3571 * as we don't expect reassembly to complete or
3572 * fail for the first fragment itself.
3573 */
3574 (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3575 msg_len);
3576 }
3577 /* Update per ipfb and ill byte counts */
3578 ipfb->ipfb_count += ipf->ipf_count;
3579 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3580 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3581 /* If the frag timer wasn't already going, start it. */
3582 mutex_enter(&ill->ill_lock);
3583 ill_frag_timer_start(ill);
3584 mutex_exit(&ill->ill_lock);
3585 goto partial_reass_done;
3586 }
3587
3588 /*
3589 * If the packet's flag has changed (it could be coming up
3590 * from an interface different than the previous, therefore
3591 * possibly different checksum capability), then forget about
3592 * any stored checksum states. Otherwise add the value to
3593 * the existing one stored in the fragment header.
3594 */
3595 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3596 sum_val += ipf->ipf_checksum;
3597 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3598 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3599 ipf->ipf_checksum = sum_val;
3600 } else if (ipf->ipf_checksum_flags != 0) {
3601 /* Forget checksum offload from now on */
3602 ipf->ipf_checksum_flags = 0;
3603 }
3604
3605 /*
3606 * We have a new piece of a datagram which is already being
3607 * reassembled. Update the ECN info if all IP fragments
3608 * are ECN capable. If there is one which is not, clear
3609 * all the info. If there is at least one which has CE
3610 * code point, IP needs to report that up to transport.
3611 */
3612 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3613 if (ecn_info == IPH_ECN_CE)
3614 ipf->ipf_ecn = IPH_ECN_CE;
3615 } else {
3616 ipf->ipf_ecn = IPH_ECN_NECT;
3617 }
3618
3619 if (offset && ipf->ipf_end == offset) {
3620 /* The new fragment fits at the end */
3621 ipf->ipf_tail_mp->b_cont = mp;
3622 /* Update the byte count */
3623 ipf->ipf_count += msg_len;
3624 /* Update per ipfb and ill byte counts */
3625 ipfb->ipfb_count += msg_len;
3626 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3627 atomic_add_32(&ill->ill_frag_count, msg_len);
3628 if (more_frags) {
3629 /* More to come. */
3630 ipf->ipf_end = end;
3631 ipf->ipf_tail_mp = tail_mp;
3632 goto partial_reass_done;
3633 }
3634 } else {
3635 /*
3636 * Go do the hard cases.
3637 * Call ip_reassemble().
3638 */
3639 int ret;
3640
3641 if (offset == 0) {
3642 if (ipf->ipf_prev_nexthdr_offset == 0) {
3643 ipf->ipf_nf_hdr_len = hdr_length;
3644 ipf->ipf_prev_nexthdr_offset =
3645 prev_nexthdr_offset;
3646 }
3647 }
3648 /* Save current byte count */
3649 count = ipf->ipf_count;
3650 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3651
3652 /* Count of bytes added and subtracted (freeb()ed) */
3653 count = ipf->ipf_count - count;
3654 if (count) {
3655 /* Update per ipfb and ill byte counts */
3656 ipfb->ipfb_count += count;
3657 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3658 atomic_add_32(&ill->ill_frag_count, count);
3659 }
3660 if (ret == IP_REASS_PARTIAL) {
3661 goto partial_reass_done;
3662 } else if (ret == IP_REASS_FAILED) {
3663 /* Reassembly failed. Free up all resources */
3664 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3665 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3666 IP_REASS_SET_START(t_mp, 0);
3667 IP_REASS_SET_END(t_mp, 0);
3668 }
3669 freemsg(mp);
3670 goto partial_reass_done;
3671 }
3672
3673 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3674 }
3675 /*
3676 * We have completed reassembly. Unhook the frag header from
3677 * the reassembly list.
3678 *
3679 * Grab the unfragmentable header length next header value out
3680 * of the first fragment
3681 */
3682 ASSERT(ipf->ipf_nf_hdr_len != 0);
3683 hdr_length = ipf->ipf_nf_hdr_len;
3684
3685 /*
3686 * Before we free the frag header, record the ECN info
3687 * to report back to the transport.
3688 */
3689 ecn_info = ipf->ipf_ecn;
3690
3691 /*
3692 * Store the nextheader field in the header preceding the fragment
3693 * header
3694 */
3695 nexthdr = ipf->ipf_protocol;
3696 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3697 ipfp = ipf->ipf_ptphn;
3698
3699 /* We need to supply these to caller */
3700 if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3701 sum_val = ipf->ipf_checksum;
3702 else
3703 sum_val = 0;
3704
3705 mp1 = ipf->ipf_mp;
3706 count = ipf->ipf_count;
3707 ipf = ipf->ipf_hash_next;
3708 if (ipf)
3709 ipf->ipf_ptphn = ipfp;
3710 ipfp[0] = ipf;
3711 atomic_add_32(&ill->ill_frag_count, -count);
3712 ASSERT(ipfb->ipfb_count >= count);
3713 ipfb->ipfb_count -= count;
3714 ipfb->ipfb_frag_pkts--;
3715 mutex_exit(&ipfb->ipfb_lock);
3716 /* Ditch the frag header. */
3717 mp = mp1->b_cont;
3718 freeb(mp1);
3719
3720 /*
3721 * Make sure the packet is good by doing some sanity
3722 * check. If bad we can silentely drop the packet.
3723 */
3724 reass_done:
3725 if (hdr_length < sizeof (ip6_frag_t)) {
3726 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3727 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3728 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3729 freemsg(mp);
3730 return (NULL);
3731 }
3732
3733 /*
3734 * Remove the fragment header from the initial header by
3735 * splitting the mblk into the non-fragmentable header and
3736 * everthing after the fragment extension header. This has the
3737 * side effect of putting all the headers that need destination
3738 * processing into the b_cont block-- on return this fact is
3739 * used in order to avoid having to look at the extensions
3740 * already processed.
3741 *
3742 * Note that this code assumes that the unfragmentable portion
3743 * of the header is in the first mblk and increments
3744 * the read pointer past it. If this assumption is broken
3745 * this code fails badly.
3746 */
3747 if (mp->b_rptr + hdr_length != mp->b_wptr) {
3748 mblk_t *nmp;
3749
3750 if (!(nmp = dupb(mp))) {
3751 ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3753 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3754 freemsg(mp);
3755 return (NULL);
3756 }
3757 nmp->b_cont = mp->b_cont;
3758 mp->b_cont = nmp;
3759 nmp->b_rptr += hdr_length;
3760 }
3761 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3762
3763 ip6h = (ip6_t *)mp->b_rptr;
3764 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3765
3766 /* Restore original IP length in header. */
3767 packet_size = msgdsize(mp);
3768 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3769 /* Record the ECN info. */
3770 ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3771 ip6h->ip6_vcf |= htonl(ecn_info << 20);
3772
3773 /* Update the receive attributes */
3774 ira->ira_pktlen = packet_size;
3775 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3776 ira->ira_protocol = nexthdr;
3777
3778 /* Reassembly is successful; set checksum information in packet */
3779 DB_CKSUM16(mp) = (uint16_t)sum_val;
3780 DB_CKSUMFLAGS(mp) = sum_flags;
3781 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3782
3783 return (mp);
3784 }
3785
3786 /*
3787 * Given an mblk and a ptr, find the destination address in an IPv6 routing
3788 * header.
3789 */
3790 static in6_addr_t
pluck_out_dst(const mblk_t * mp,uint8_t * whereptr,in6_addr_t oldrv)3791 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3792 {
3793 ip6_rthdr0_t *rt0;
3794 int segleft, numaddr;
3795 in6_addr_t *ap, rv = oldrv;
3796
3797 rt0 = (ip6_rthdr0_t *)whereptr;
3798 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3799 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3800 uint8_t *, whereptr);
3801 return (rv);
3802 }
3803 segleft = rt0->ip6r0_segleft;
3804 numaddr = rt0->ip6r0_len / 2;
3805
3806 if ((rt0->ip6r0_len & 0x1) ||
3807 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3808 (segleft > rt0->ip6r0_len / 2)) {
3809 /*
3810 * Corrupt packet. Either the routing header length is odd
3811 * (can't happen) or mismatched compared to the packet, or the
3812 * number of addresses is. Return what we can. This will
3813 * only be a problem on forwarded packets that get squeezed
3814 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3815 */
3816 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3817 whereptr);
3818 return (rv);
3819 }
3820
3821 if (segleft != 0) {
3822 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3823 rv = ap[numaddr - 1];
3824 }
3825
3826 return (rv);
3827 }
3828
3829 /*
3830 * Walk through the options to see if there is a routing header.
3831 * If present get the destination which is the last address of
3832 * the option.
3833 * mp needs to be provided in cases when the extension headers might span
3834 * b_cont; mp is never modified by this function.
3835 */
3836 in6_addr_t
ip_get_dst_v6(ip6_t * ip6h,const mblk_t * mp,boolean_t * is_fragment)3837 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3838 {
3839 const mblk_t *current_mp = mp;
3840 uint8_t nexthdr;
3841 uint8_t *whereptr;
3842 int ehdrlen;
3843 in6_addr_t rv;
3844
3845 whereptr = (uint8_t *)ip6h;
3846 ehdrlen = sizeof (ip6_t);
3847
3848 /* We assume at least the IPv6 base header is within one mblk. */
3849 ASSERT(mp == NULL ||
3850 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3851
3852 rv = ip6h->ip6_dst;
3853 nexthdr = ip6h->ip6_nxt;
3854 if (is_fragment != NULL)
3855 *is_fragment = B_FALSE;
3856
3857 /*
3858 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3859 * no extension headers will be split across mblks.
3860 */
3861
3862 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3863 nexthdr == IPPROTO_ROUTING) {
3864 if (nexthdr == IPPROTO_ROUTING)
3865 rv = pluck_out_dst(current_mp, whereptr, rv);
3866
3867 /*
3868 * All IPv6 extension headers have the next-header in byte
3869 * 0, and the (length - 8) in 8-byte-words.
3870 */
3871 while (current_mp != NULL &&
3872 whereptr + ehdrlen >= current_mp->b_wptr) {
3873 ehdrlen -= (current_mp->b_wptr - whereptr);
3874 current_mp = current_mp->b_cont;
3875 if (current_mp == NULL) {
3876 /* Bad packet. Return what we can. */
3877 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3878 mp, mblk_t *, current_mp, ip6_t *, ip6h);
3879 goto done;
3880 }
3881 whereptr = current_mp->b_rptr;
3882 }
3883 whereptr += ehdrlen;
3884
3885 nexthdr = *whereptr;
3886 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3887 ehdrlen = (*(whereptr + 1) + 1) * 8;
3888 }
3889
3890 done:
3891 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3892 *is_fragment = B_TRUE;
3893 return (rv);
3894 }
3895
3896 /*
3897 * ip_source_routed_v6:
3898 * This function is called by redirect code (called from ip_input_v6) to
3899 * know whether this packet is source routed through this node i.e
3900 * whether this node (router) is part of the journey. This
3901 * function is called under two cases :
3902 *
3903 * case 1 : Routing header was processed by this node and
3904 * ip_process_rthdr replaced ip6_dst with the next hop
3905 * and we are forwarding the packet to the next hop.
3906 *
3907 * case 2 : Routing header was not processed by this node and we
3908 * are just forwarding the packet.
3909 *
3910 * For case (1) we don't want to send redirects. For case(2) we
3911 * want to send redirects.
3912 */
3913 static boolean_t
ip_source_routed_v6(ip6_t * ip6h,mblk_t * mp,ip_stack_t * ipst)3914 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
3915 {
3916 uint8_t nexthdr;
3917 in6_addr_t *addrptr;
3918 ip6_rthdr0_t *rthdr;
3919 uint8_t numaddr;
3920 ip6_hbh_t *hbhhdr;
3921 uint_t ehdrlen;
3922 uint8_t *byteptr;
3923
3924 ip2dbg(("ip_source_routed_v6\n"));
3925 nexthdr = ip6h->ip6_nxt;
3926 ehdrlen = IPV6_HDR_LEN;
3927
3928 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
3929 while (nexthdr == IPPROTO_HOPOPTS ||
3930 nexthdr == IPPROTO_DSTOPTS) {
3931 byteptr = (uint8_t *)ip6h + ehdrlen;
3932 /*
3933 * Check if we have already processed
3934 * packets or we are just a forwarding
3935 * router which only pulled up msgs up
3936 * to IPV6HDR and one HBH ext header
3937 */
3938 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
3939 ip2dbg(("ip_source_routed_v6: Extension"
3940 " headers not processed\n"));
3941 return (B_FALSE);
3942 }
3943 hbhhdr = (ip6_hbh_t *)byteptr;
3944 nexthdr = hbhhdr->ip6h_nxt;
3945 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
3946 }
3947 switch (nexthdr) {
3948 case IPPROTO_ROUTING:
3949 byteptr = (uint8_t *)ip6h + ehdrlen;
3950 /*
3951 * If for some reason, we haven't pulled up
3952 * the routing hdr data mblk, then we must
3953 * not have processed it at all. So for sure
3954 * we are not part of the source routed journey.
3955 */
3956 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
3957 ip2dbg(("ip_source_routed_v6: Routing"
3958 " header not processed\n"));
3959 return (B_FALSE);
3960 }
3961 rthdr = (ip6_rthdr0_t *)byteptr;
3962 /*
3963 * Either we are an intermediate router or the
3964 * last hop before destination and we have
3965 * already processed the routing header.
3966 * If segment_left is greater than or equal to zero,
3967 * then we must be the (numaddr - segleft) entry
3968 * of the routing header. Although ip6r0_segleft
3969 * is a unit8_t variable, we still check for zero
3970 * or greater value, if in case the data type
3971 * is changed someday in future.
3972 */
3973 if (rthdr->ip6r0_segleft > 0 ||
3974 rthdr->ip6r0_segleft == 0) {
3975 numaddr = rthdr->ip6r0_len / 2;
3976 addrptr = (in6_addr_t *)((char *)rthdr +
3977 sizeof (*rthdr));
3978 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
3979 if (addrptr != NULL) {
3980 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
3981 return (B_TRUE);
3982 ip1dbg(("ip_source_routed_v6: Not local\n"));
3983 }
3984 }
3985 /* FALLTHROUGH */
3986 default:
3987 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
3988 return (B_FALSE);
3989 }
3990 }
3991
3992 /*
3993 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
3994 * We have not optimized this in terms of number of mblks
3995 * allocated. For instance, for each fragment sent we always allocate a
3996 * mblk to hold the IPv6 header and fragment header.
3997 *
3998 * Assumes that all the extension headers are contained in the first mblk
3999 * and that the fragment header has has already been added by calling
4000 * ip_fraghdr_add_v6.
4001 */
4002 int
ip_fragment_v6(mblk_t * mp,nce_t * nce,iaflags_t ixaflags,uint_t pkt_len,uint32_t max_frag,uint32_t xmit_hint,zoneid_t szone,zoneid_t nolzid,pfirepostfrag_t postfragfn,uintptr_t * ixa_cookie)4003 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4004 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4005 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4006 {
4007 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4008 ip6_t *fip6h;
4009 mblk_t *hmp;
4010 mblk_t *hmp0;
4011 mblk_t *dmp;
4012 ip6_frag_t *fraghdr;
4013 size_t unfragmentable_len;
4014 size_t mlen;
4015 size_t max_chunk;
4016 uint16_t off_flags;
4017 uint16_t offset = 0;
4018 ill_t *ill = nce->nce_ill;
4019 uint8_t nexthdr;
4020 uint8_t *ptr;
4021 ip_stack_t *ipst = ill->ill_ipst;
4022 uint_t priority = mp->b_band;
4023 int error = 0;
4024
4025 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4026 if (max_frag == 0) {
4027 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4028 ip_drop_output("FragFails: zero max_frag", mp, ill);
4029 freemsg(mp);
4030 return (EINVAL);
4031 }
4032
4033 /*
4034 * Caller should have added fraghdr_t to pkt_len, and also
4035 * updated ip6_plen.
4036 */
4037 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4038 ASSERT(msgdsize(mp) == pkt_len);
4039
4040 /*
4041 * Determine the length of the unfragmentable portion of this
4042 * datagram. This consists of the IPv6 header, a potential
4043 * hop-by-hop options header, a potential pre-routing-header
4044 * destination options header, and a potential routing header.
4045 */
4046 nexthdr = ip6h->ip6_nxt;
4047 ptr = (uint8_t *)&ip6h[1];
4048
4049 if (nexthdr == IPPROTO_HOPOPTS) {
4050 ip6_hbh_t *hbh_hdr;
4051 uint_t hdr_len;
4052
4053 hbh_hdr = (ip6_hbh_t *)ptr;
4054 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4055 nexthdr = hbh_hdr->ip6h_nxt;
4056 ptr += hdr_len;
4057 }
4058 if (nexthdr == IPPROTO_DSTOPTS) {
4059 ip6_dest_t *dest_hdr;
4060 uint_t hdr_len;
4061
4062 dest_hdr = (ip6_dest_t *)ptr;
4063 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4064 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4065 nexthdr = dest_hdr->ip6d_nxt;
4066 ptr += hdr_len;
4067 }
4068 }
4069 if (nexthdr == IPPROTO_ROUTING) {
4070 ip6_rthdr_t *rthdr;
4071 uint_t hdr_len;
4072
4073 rthdr = (ip6_rthdr_t *)ptr;
4074 nexthdr = rthdr->ip6r_nxt;
4075 hdr_len = 8 * (rthdr->ip6r_len + 1);
4076 ptr += hdr_len;
4077 }
4078 if (nexthdr != IPPROTO_FRAGMENT) {
4079 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4080 ip_drop_output("FragFails: bad nexthdr", mp, ill);
4081 freemsg(mp);
4082 return (EINVAL);
4083 }
4084 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4085 unfragmentable_len += sizeof (ip6_frag_t);
4086
4087 max_chunk = (max_frag - unfragmentable_len) & ~7;
4088
4089 /*
4090 * Allocate an mblk with enough room for the link-layer
4091 * header and the unfragmentable part of the datagram, which includes
4092 * the fragment header. This (or a copy) will be used as the
4093 * first mblk for each fragment we send.
4094 */
4095 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4096 if (hmp == NULL) {
4097 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4098 ip_drop_output("FragFails: no hmp", mp, ill);
4099 freemsg(mp);
4100 return (ENOBUFS);
4101 }
4102 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4103 hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4104
4105 fip6h = (ip6_t *)hmp->b_rptr;
4106 bcopy(ip6h, fip6h, unfragmentable_len);
4107
4108 /*
4109 * pkt_len is set to the total length of the fragmentable data in this
4110 * datagram. For each fragment sent, we will decrement pkt_len
4111 * by the amount of fragmentable data sent in that fragment
4112 * until len reaches zero.
4113 */
4114 pkt_len -= unfragmentable_len;
4115
4116 /*
4117 * Move read ptr past unfragmentable portion, we don't want this part
4118 * of the data in our fragments.
4119 */
4120 mp->b_rptr += unfragmentable_len;
4121 if (mp->b_rptr == mp->b_wptr) {
4122 mblk_t *mp1 = mp->b_cont;
4123 freeb(mp);
4124 mp = mp1;
4125 }
4126
4127 while (pkt_len != 0) {
4128 mlen = MIN(pkt_len, max_chunk);
4129 pkt_len -= mlen;
4130 if (pkt_len != 0) {
4131 /* Not last */
4132 hmp0 = copyb(hmp);
4133 if (hmp0 == NULL) {
4134 BUMP_MIB(ill->ill_ip_mib,
4135 ipIfStatsOutFragFails);
4136 ip_drop_output("FragFails: copyb failed",
4137 mp, ill);
4138 freeb(hmp);
4139 freemsg(mp);
4140 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4141 return (ENOBUFS);
4142 }
4143 off_flags = IP6F_MORE_FRAG;
4144 } else {
4145 /* Last fragment */
4146 hmp0 = hmp;
4147 hmp = NULL;
4148 off_flags = 0;
4149 }
4150 fip6h = (ip6_t *)(hmp0->b_rptr);
4151 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4152 sizeof (ip6_frag_t));
4153
4154 fip6h->ip6_plen = htons((uint16_t)(mlen +
4155 unfragmentable_len - IPV6_HDR_LEN));
4156 /*
4157 * Note: Optimization alert.
4158 * In IPv6 (and IPv4) protocol header, Fragment Offset
4159 * ("offset") is 13 bits wide and in 8-octet units.
4160 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4161 * it occupies the most significant 13 bits.
4162 * (least significant 13 bits in IPv4).
4163 * We do not do any shifts here. Not shifting is same effect
4164 * as taking offset value in octet units, dividing by 8 and
4165 * then shifting 3 bits left to line it up in place in proper
4166 * place protocol header.
4167 */
4168 fraghdr->ip6f_offlg = htons(offset) | off_flags;
4169
4170 if (!(dmp = ip_carve_mp(&mp, mlen))) {
4171 /* mp has already been freed by ip_carve_mp() */
4172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4173 ip_drop_output("FragFails: could not carve mp",
4174 hmp0, ill);
4175 if (hmp != NULL)
4176 freeb(hmp);
4177 freeb(hmp0);
4178 ip1dbg(("ip_carve_mp: failed\n"));
4179 return (ENOBUFS);
4180 }
4181 hmp0->b_cont = dmp;
4182 /* Get the priority marking, if any */
4183 hmp0->b_band = priority;
4184
4185 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4186
4187 error = postfragfn(hmp0, nce, ixaflags,
4188 mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4189 ixa_cookie);
4190 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4191 /* No point in sending the other fragments */
4192 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4193 ip_drop_output("FragFails: postfragfn failed",
4194 hmp, ill);
4195 freeb(hmp);
4196 freemsg(mp);
4197 return (error);
4198 }
4199 /* No need to redo state machine in loop */
4200 ixaflags &= ~IXAF_REACH_CONF;
4201
4202 offset += mlen;
4203 }
4204 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4205 return (error);
4206 }
4207
4208 /*
4209 * Add a fragment header to an IPv6 packet.
4210 * Assumes that all the extension headers are contained in the first mblk.
4211 *
4212 * The fragment header is inserted after an hop-by-hop options header
4213 * and after [an optional destinations header followed by] a routing header.
4214 */
4215 mblk_t *
ip_fraghdr_add_v6(mblk_t * mp,uint32_t ident,ip_xmit_attr_t * ixa)4216 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4217 {
4218 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4219 ip6_t *fip6h;
4220 mblk_t *hmp;
4221 ip6_frag_t *fraghdr;
4222 size_t unfragmentable_len;
4223 uint8_t nexthdr;
4224 uint_t prev_nexthdr_offset;
4225 uint8_t *ptr;
4226 uint_t priority = mp->b_band;
4227 ip_stack_t *ipst = ixa->ixa_ipst;
4228
4229 /*
4230 * Determine the length of the unfragmentable portion of this
4231 * datagram. This consists of the IPv6 header, a potential
4232 * hop-by-hop options header, a potential pre-routing-header
4233 * destination options header, and a potential routing header.
4234 */
4235 nexthdr = ip6h->ip6_nxt;
4236 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4237 ptr = (uint8_t *)&ip6h[1];
4238
4239 if (nexthdr == IPPROTO_HOPOPTS) {
4240 ip6_hbh_t *hbh_hdr;
4241 uint_t hdr_len;
4242
4243 hbh_hdr = (ip6_hbh_t *)ptr;
4244 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4245 nexthdr = hbh_hdr->ip6h_nxt;
4246 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4247 - (uint8_t *)ip6h;
4248 ptr += hdr_len;
4249 }
4250 if (nexthdr == IPPROTO_DSTOPTS) {
4251 ip6_dest_t *dest_hdr;
4252 uint_t hdr_len;
4253
4254 dest_hdr = (ip6_dest_t *)ptr;
4255 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4256 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4257 nexthdr = dest_hdr->ip6d_nxt;
4258 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4259 - (uint8_t *)ip6h;
4260 ptr += hdr_len;
4261 }
4262 }
4263 if (nexthdr == IPPROTO_ROUTING) {
4264 ip6_rthdr_t *rthdr;
4265 uint_t hdr_len;
4266
4267 rthdr = (ip6_rthdr_t *)ptr;
4268 nexthdr = rthdr->ip6r_nxt;
4269 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4270 - (uint8_t *)ip6h;
4271 hdr_len = 8 * (rthdr->ip6r_len + 1);
4272 ptr += hdr_len;
4273 }
4274 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4275
4276 /*
4277 * Allocate an mblk with enough room for the link-layer
4278 * header, the unfragmentable part of the datagram, and the
4279 * fragment header.
4280 */
4281 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4282 ipst->ips_ip_wroff_extra, mp);
4283 if (hmp == NULL) {
4284 ill_t *ill = ixa->ixa_nce->nce_ill;
4285
4286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4287 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4288 freemsg(mp);
4289 return (NULL);
4290 }
4291 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4292 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4293
4294 fip6h = (ip6_t *)hmp->b_rptr;
4295 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4296
4297 bcopy(ip6h, fip6h, unfragmentable_len);
4298 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4299 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4300
4301 fraghdr->ip6f_nxt = nexthdr;
4302 fraghdr->ip6f_reserved = 0;
4303 fraghdr->ip6f_offlg = 0;
4304 fraghdr->ip6f_ident = htonl(ident);
4305
4306 /* Get the priority marking, if any */
4307 hmp->b_band = priority;
4308
4309 /*
4310 * Move read ptr past unfragmentable portion, we don't want this part
4311 * of the data in our fragments.
4312 */
4313 mp->b_rptr += unfragmentable_len;
4314 hmp->b_cont = mp;
4315 return (hmp);
4316 }
4317
4318 /*
4319 * Determine if the ill and multicast aspects of that packets
4320 * "matches" the conn.
4321 */
4322 boolean_t
conn_wantpacket_v6(conn_t * connp,ip_recv_attr_t * ira,ip6_t * ip6h)4323 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4324 {
4325 ill_t *ill = ira->ira_rill;
4326 zoneid_t zoneid = ira->ira_zoneid;
4327 uint_t in_ifindex;
4328 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst;
4329 in6_addr_t *v6src_ptr = &ip6h->ip6_src;
4330
4331 /*
4332 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4333 * scopeid. This is used to limit
4334 * unicast and multicast reception to conn_incoming_ifindex.
4335 * conn_wantpacket_v6 is called both for unicast and
4336 * multicast packets.
4337 */
4338 in_ifindex = connp->conn_incoming_ifindex;
4339
4340 /* mpathd can bind to the under IPMP interface, which we allow */
4341 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4342 if (!IS_UNDER_IPMP(ill))
4343 return (B_FALSE);
4344
4345 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4346 return (B_FALSE);
4347 }
4348
4349 if (!IPCL_ZONE_MATCH(connp, zoneid))
4350 return (B_FALSE);
4351
4352 if (!(ira->ira_flags & IRAF_MULTICAST))
4353 return (B_TRUE);
4354
4355 if (connp->conn_multi_router)
4356 return (B_TRUE);
4357
4358 if (ira->ira_protocol == IPPROTO_RSVP)
4359 return (B_TRUE);
4360
4361 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4362 ira->ira_ill));
4363 }
4364
4365 /*
4366 * pr_addr_dbg function provides the needed buffer space to call
4367 * inet_ntop() function's 3rd argument. This function should be
4368 * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4369 * stack buffer space in it's own stack frame. This function uses
4370 * a buffer from it's own stack and prints the information.
4371 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4372 *
4373 * Note: This function can call inet_ntop() once.
4374 */
4375 void
pr_addr_dbg(char * fmt1,int af,const void * addr)4376 pr_addr_dbg(char *fmt1, int af, const void *addr)
4377 {
4378 char buf[INET6_ADDRSTRLEN];
4379
4380 if (fmt1 == NULL) {
4381 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4382 return;
4383 }
4384
4385 /*
4386 * This does not compare debug level and just prints
4387 * out. Thus it is the responsibility of the caller
4388 * to check the appropriate debug-level before calling
4389 * this function.
4390 */
4391 if (ip_debug > 0) {
4392 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4393 }
4394
4395
4396 }
4397
4398
4399 /*
4400 * Return the length in bytes of the IPv6 headers (base header
4401 * extension headers) that will be needed based on the
4402 * ip_pkt_t structure passed by the caller.
4403 *
4404 * The returned length does not include the length of the upper level
4405 * protocol (ULP) header.
4406 */
4407 int
ip_total_hdrs_len_v6(const ip_pkt_t * ipp)4408 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4409 {
4410 int len;
4411
4412 len = IPV6_HDR_LEN;
4413
4414 /*
4415 * If there's a security label here, then we ignore any hop-by-hop
4416 * options the user may try to set.
4417 */
4418 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4419 uint_t hopoptslen;
4420 /*
4421 * Note that ipp_label_len_v6 is just the option - not
4422 * the hopopts extension header. It also needs to be padded
4423 * to a multiple of 8 bytes.
4424 */
4425 ASSERT(ipp->ipp_label_len_v6 != 0);
4426 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4427 hopoptslen = (hopoptslen + 7)/8 * 8;
4428 len += hopoptslen;
4429 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4430 ASSERT(ipp->ipp_hopoptslen != 0);
4431 len += ipp->ipp_hopoptslen;
4432 }
4433
4434 /*
4435 * En-route destination options
4436 * Only do them if there's a routing header as well
4437 */
4438 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4439 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4440 ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4441 len += ipp->ipp_rthdrdstoptslen;
4442 }
4443 if (ipp->ipp_fields & IPPF_RTHDR) {
4444 ASSERT(ipp->ipp_rthdrlen != 0);
4445 len += ipp->ipp_rthdrlen;
4446 }
4447 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4448 ASSERT(ipp->ipp_dstoptslen != 0);
4449 len += ipp->ipp_dstoptslen;
4450 }
4451 return (len);
4452 }
4453
4454 /*
4455 * All-purpose routine to build a header chain of an IPv6 header
4456 * followed by any required extension headers and a proto header.
4457 *
4458 * The caller has to set the source and destination address as well as
4459 * ip6_plen. The caller has to massage any routing header and compensate
4460 * for the ULP pseudo-header checksum due to the source route.
4461 *
4462 * The extension headers will all be fully filled in.
4463 */
4464 void
ip_build_hdrs_v6(uchar_t * buf,uint_t buf_len,const ip_pkt_t * ipp,uint8_t protocol,uint32_t flowinfo)4465 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4466 uint8_t protocol, uint32_t flowinfo)
4467 {
4468 uint8_t *nxthdr_ptr;
4469 uint8_t *cp;
4470 ip6_t *ip6h = (ip6_t *)buf;
4471
4472 /* Initialize IPv6 header */
4473 ip6h->ip6_vcf =
4474 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4475 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4476
4477 if (ipp->ipp_fields & IPPF_TCLASS) {
4478 /* Overrides the class part of flowinfo */
4479 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4480 ipp->ipp_tclass);
4481 }
4482
4483 if (ipp->ipp_fields & IPPF_HOPLIMIT)
4484 ip6h->ip6_hops = ipp->ipp_hoplimit;
4485 else
4486 ip6h->ip6_hops = ipp->ipp_unicast_hops;
4487
4488 if ((ipp->ipp_fields & IPPF_ADDR) &&
4489 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4490 ip6h->ip6_src = ipp->ipp_addr;
4491
4492 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4493 cp = (uint8_t *)&ip6h[1];
4494 /*
4495 * Here's where we have to start stringing together
4496 * any extension headers in the right order:
4497 * Hop-by-hop, destination, routing, and final destination opts.
4498 */
4499 /*
4500 * If there's a security label here, then we ignore any hop-by-hop
4501 * options the user may try to set.
4502 */
4503 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4504 /*
4505 * Hop-by-hop options with the label.
4506 * Note that ipp_label_v6 is just the option - not
4507 * the hopopts extension header. It also needs to be padded
4508 * to a multiple of 8 bytes.
4509 */
4510 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4511 uint_t hopoptslen;
4512 uint_t padlen;
4513
4514 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4515 hopoptslen = (padlen + 7)/8 * 8;
4516 padlen = hopoptslen - padlen;
4517
4518 *nxthdr_ptr = IPPROTO_HOPOPTS;
4519 nxthdr_ptr = &hbh->ip6h_nxt;
4520 hbh->ip6h_len = hopoptslen/8 - 1;
4521 cp += sizeof (ip6_hbh_t);
4522 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4523 cp += ipp->ipp_label_len_v6;
4524
4525 ASSERT(padlen <= 7);
4526 switch (padlen) {
4527 case 0:
4528 break;
4529 case 1:
4530 cp[0] = IP6OPT_PAD1;
4531 break;
4532 default:
4533 cp[0] = IP6OPT_PADN;
4534 cp[1] = padlen - 2;
4535 bzero(&cp[2], padlen - 2);
4536 break;
4537 }
4538 cp += padlen;
4539 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4540 /* Hop-by-hop options */
4541 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4542
4543 *nxthdr_ptr = IPPROTO_HOPOPTS;
4544 nxthdr_ptr = &hbh->ip6h_nxt;
4545
4546 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4547 cp += ipp->ipp_hopoptslen;
4548 }
4549 /*
4550 * En-route destination options
4551 * Only do them if there's a routing header as well
4552 */
4553 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4554 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4555 ip6_dest_t *dst = (ip6_dest_t *)cp;
4556
4557 *nxthdr_ptr = IPPROTO_DSTOPTS;
4558 nxthdr_ptr = &dst->ip6d_nxt;
4559
4560 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4561 cp += ipp->ipp_rthdrdstoptslen;
4562 }
4563 /*
4564 * Routing header next
4565 */
4566 if (ipp->ipp_fields & IPPF_RTHDR) {
4567 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4568
4569 *nxthdr_ptr = IPPROTO_ROUTING;
4570 nxthdr_ptr = &rt->ip6r_nxt;
4571
4572 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4573 cp += ipp->ipp_rthdrlen;
4574 }
4575 /*
4576 * Do ultimate destination options
4577 */
4578 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4579 ip6_dest_t *dest = (ip6_dest_t *)cp;
4580
4581 *nxthdr_ptr = IPPROTO_DSTOPTS;
4582 nxthdr_ptr = &dest->ip6d_nxt;
4583
4584 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4585 cp += ipp->ipp_dstoptslen;
4586 }
4587 /*
4588 * Now set the last header pointer to the proto passed in
4589 */
4590 *nxthdr_ptr = protocol;
4591 ASSERT((int)(cp - buf) == buf_len);
4592 }
4593
4594 /*
4595 * Return a pointer to the routing header extension header
4596 * in the IPv6 header(s) chain passed in.
4597 * If none found, return NULL
4598 * Assumes that all extension headers are in same mblk as the v6 header
4599 */
4600 ip6_rthdr_t *
ip_find_rthdr_v6(ip6_t * ip6h,uint8_t * endptr)4601 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4602 {
4603 ip6_dest_t *desthdr;
4604 ip6_frag_t *fraghdr;
4605 uint_t hdrlen;
4606 uint8_t nexthdr;
4607 uint8_t *ptr = (uint8_t *)&ip6h[1];
4608
4609 if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4610 return ((ip6_rthdr_t *)ptr);
4611
4612 /*
4613 * The routing header will precede all extension headers
4614 * other than the hop-by-hop and destination options
4615 * extension headers, so if we see anything other than those,
4616 * we're done and didn't find it.
4617 * We could see a destination options header alone but no
4618 * routing header, in which case we'll return NULL as soon as
4619 * we see anything after that.
4620 * Hop-by-hop and destination option headers are identical,
4621 * so we can use either one we want as a template.
4622 */
4623 nexthdr = ip6h->ip6_nxt;
4624 while (ptr < endptr) {
4625 /* Is there enough left for len + nexthdr? */
4626 if (ptr + MIN_EHDR_LEN > endptr)
4627 return (NULL);
4628
4629 switch (nexthdr) {
4630 case IPPROTO_HOPOPTS:
4631 case IPPROTO_DSTOPTS:
4632 /* Assumes the headers are identical for hbh and dst */
4633 desthdr = (ip6_dest_t *)ptr;
4634 hdrlen = 8 * (desthdr->ip6d_len + 1);
4635 nexthdr = desthdr->ip6d_nxt;
4636 break;
4637
4638 case IPPROTO_ROUTING:
4639 return ((ip6_rthdr_t *)ptr);
4640
4641 case IPPROTO_FRAGMENT:
4642 fraghdr = (ip6_frag_t *)ptr;
4643 hdrlen = sizeof (ip6_frag_t);
4644 nexthdr = fraghdr->ip6f_nxt;
4645 break;
4646
4647 default:
4648 return (NULL);
4649 }
4650 ptr += hdrlen;
4651 }
4652 return (NULL);
4653 }
4654
4655 /*
4656 * Called for source-routed packets originating on this node.
4657 * Manipulates the original routing header by moving every entry up
4658 * one slot, placing the first entry in the v6 header's v6_dst field,
4659 * and placing the ultimate destination in the routing header's last
4660 * slot.
4661 *
4662 * Returns the checksum diference between the ultimate destination
4663 * (last hop in the routing header when the packet is sent) and
4664 * the first hop (ip6_dst when the packet is sent)
4665 */
4666 /* ARGSUSED2 */
4667 uint32_t
ip_massage_options_v6(ip6_t * ip6h,ip6_rthdr_t * rth,netstack_t * ns)4668 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4669 {
4670 uint_t numaddr;
4671 uint_t i;
4672 in6_addr_t *addrptr;
4673 in6_addr_t tmp;
4674 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth;
4675 uint32_t cksm;
4676 uint32_t addrsum = 0;
4677 uint16_t *ptr;
4678
4679 /*
4680 * Perform any processing needed for source routing.
4681 * We know that all extension headers will be in the same mblk
4682 * as the IPv6 header.
4683 */
4684
4685 /*
4686 * If no segments left in header, or the header length field is zero,
4687 * don't move hop addresses around;
4688 * Checksum difference is zero.
4689 */
4690 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4691 return (0);
4692
4693 ptr = (uint16_t *)&ip6h->ip6_dst;
4694 cksm = 0;
4695 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4696 cksm += ptr[i];
4697 }
4698 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4699
4700 /*
4701 * Here's where the fun begins - we have to
4702 * move all addresses up one spot, take the
4703 * first hop and make it our first ip6_dst,
4704 * and place the ultimate destination in the
4705 * newly-opened last slot.
4706 */
4707 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4708 numaddr = rthdr->ip6r0_len / 2;
4709 tmp = *addrptr;
4710 for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4711 *addrptr = addrptr[1];
4712 }
4713 *addrptr = ip6h->ip6_dst;
4714 ip6h->ip6_dst = tmp;
4715
4716 /*
4717 * From the checksummed ultimate destination subtract the checksummed
4718 * current ip6_dst (the first hop address). Return that number.
4719 * (In the v4 case, the second part of this is done in each routine
4720 * that calls ip_massage_options(). We do it all in this one place
4721 * for v6).
4722 */
4723 ptr = (uint16_t *)&ip6h->ip6_dst;
4724 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4725 addrsum += ptr[i];
4726 }
4727 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4728 if ((int)cksm < 0)
4729 cksm--;
4730 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4731
4732 return (cksm);
4733 }
4734
4735 void
ip6_kstat_init(netstackid_t stackid,ip6_stat_t * ip6_statisticsp)4736 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4737 {
4738 kstat_t *ksp;
4739
4740 ip6_stat_t template = {
4741 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
4742 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
4743 { "ip6_recv_pullup", KSTAT_DATA_UINT64 },
4744 { "ip6_db_ref", KSTAT_DATA_UINT64 },
4745 { "ip6_notaligned", KSTAT_DATA_UINT64 },
4746 { "ip6_multimblk", KSTAT_DATA_UINT64 },
4747 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 },
4748 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
4749 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
4750 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
4751 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4752 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4753 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4754 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4755 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4756 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4757 };
4758 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4759 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4760 KSTAT_FLAG_VIRTUAL, stackid);
4761
4762 if (ksp == NULL)
4763 return (NULL);
4764
4765 bcopy(&template, ip6_statisticsp, sizeof (template));
4766 ksp->ks_data = (void *)ip6_statisticsp;
4767 ksp->ks_private = (void *)(uintptr_t)stackid;
4768
4769 kstat_install(ksp);
4770 return (ksp);
4771 }
4772
4773 void
ip6_kstat_fini(netstackid_t stackid,kstat_t * ksp)4774 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4775 {
4776 if (ksp != NULL) {
4777 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4778 kstat_delete_netstack(ksp, stackid);
4779 }
4780 }
4781
4782 /*
4783 * The following two functions set and get the value for the
4784 * IPV6_SRC_PREFERENCES socket option.
4785 */
4786 int
ip6_set_src_preferences(ip_xmit_attr_t * ixa,uint32_t prefs)4787 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4788 {
4789 /*
4790 * We only support preferences that are covered by
4791 * IPV6_PREFER_SRC_MASK.
4792 */
4793 if (prefs & ~IPV6_PREFER_SRC_MASK)
4794 return (EINVAL);
4795
4796 /*
4797 * Look for conflicting preferences or default preferences. If
4798 * both bits of a related pair are clear, the application wants the
4799 * system's default value for that pair. Both bits in a pair can't
4800 * be set.
4801 */
4802 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4803 prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4804 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4805 IPV6_PREFER_SRC_MIPMASK) {
4806 return (EINVAL);
4807 }
4808 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4809 prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4810 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4811 IPV6_PREFER_SRC_TMPMASK) {
4812 return (EINVAL);
4813 }
4814 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4815 prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4816 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4817 IPV6_PREFER_SRC_CGAMASK) {
4818 return (EINVAL);
4819 }
4820
4821 ixa->ixa_src_preferences = prefs;
4822 return (0);
4823 }
4824
4825 size_t
ip6_get_src_preferences(ip_xmit_attr_t * ixa,uint32_t * val)4826 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4827 {
4828 *val = ixa->ixa_src_preferences;
4829 return (sizeof (ixa->ixa_src_preferences));
4830 }
4831
4832 /*
4833 * Get the size of the IP options (including the IP headers size)
4834 * without including the AH header's size. If till_ah is B_FALSE,
4835 * and if AH header is present, dest options beyond AH header will
4836 * also be included in the returned size.
4837 */
4838 int
ipsec_ah_get_hdr_size_v6(mblk_t * mp,boolean_t till_ah)4839 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4840 {
4841 ip6_t *ip6h;
4842 uint8_t nexthdr;
4843 uint8_t *whereptr;
4844 ip6_hbh_t *hbhhdr;
4845 ip6_dest_t *dsthdr;
4846 ip6_rthdr_t *rthdr;
4847 int ehdrlen;
4848 int size;
4849 ah_t *ah;
4850
4851 ip6h = (ip6_t *)mp->b_rptr;
4852 size = IPV6_HDR_LEN;
4853 nexthdr = ip6h->ip6_nxt;
4854 whereptr = (uint8_t *)&ip6h[1];
4855 for (;;) {
4856 /* Assume IP has already stripped it */
4857 ASSERT(nexthdr != IPPROTO_FRAGMENT);
4858 switch (nexthdr) {
4859 case IPPROTO_HOPOPTS:
4860 hbhhdr = (ip6_hbh_t *)whereptr;
4861 nexthdr = hbhhdr->ip6h_nxt;
4862 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4863 break;
4864 case IPPROTO_DSTOPTS:
4865 dsthdr = (ip6_dest_t *)whereptr;
4866 nexthdr = dsthdr->ip6d_nxt;
4867 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4868 break;
4869 case IPPROTO_ROUTING:
4870 rthdr = (ip6_rthdr_t *)whereptr;
4871 nexthdr = rthdr->ip6r_nxt;
4872 ehdrlen = 8 * (rthdr->ip6r_len + 1);
4873 break;
4874 default :
4875 if (till_ah) {
4876 ASSERT(nexthdr == IPPROTO_AH);
4877 return (size);
4878 }
4879 /*
4880 * If we don't have a AH header to traverse,
4881 * return now. This happens normally for
4882 * outbound datagrams where we have not inserted
4883 * the AH header.
4884 */
4885 if (nexthdr != IPPROTO_AH) {
4886 return (size);
4887 }
4888
4889 /*
4890 * We don't include the AH header's size
4891 * to be symmetrical with other cases where
4892 * we either don't have a AH header (outbound)
4893 * or peek into the AH header yet (inbound and
4894 * not pulled up yet).
4895 */
4896 ah = (ah_t *)whereptr;
4897 nexthdr = ah->ah_nexthdr;
4898 ehdrlen = (ah->ah_length << 2) + 8;
4899
4900 if (nexthdr == IPPROTO_DSTOPTS) {
4901 if (whereptr + ehdrlen >= mp->b_wptr) {
4902 /*
4903 * The destination options header
4904 * is not part of the first mblk.
4905 */
4906 whereptr = mp->b_cont->b_rptr;
4907 } else {
4908 whereptr += ehdrlen;
4909 }
4910
4911 dsthdr = (ip6_dest_t *)whereptr;
4912 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4913 size += ehdrlen;
4914 }
4915 return (size);
4916 }
4917 whereptr += ehdrlen;
4918 size += ehdrlen;
4919 }
4920 }
4921
4922 /*
4923 * Utility routine that checks if `v6srcp' is a valid address on underlying
4924 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
4925 * associated with `v6srcp' on success. NOTE: if this is not called from
4926 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
4927 * group during or after this lookup.
4928 */
4929 boolean_t
ipif_lookup_testaddr_v6(ill_t * ill,const in6_addr_t * v6srcp,ipif_t ** ipifp)4930 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
4931 {
4932 ipif_t *ipif;
4933
4934
4935 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
4936 if (ipif != NULL) {
4937 if (ipifp != NULL)
4938 *ipifp = ipif;
4939 else
4940 ipif_refrele(ipif);
4941 return (B_TRUE);
4942 }
4943
4944 if (ip_debug > 2) {
4945 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
4946 "src %s\n", AF_INET6, v6srcp);
4947 }
4948 return (B_FALSE);
4949 }
4950