1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 */
25
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/sysmacros.h>
31 #include <sys/strsun.h>
32 #include <sys/strlog.h>
33 #include <sys/strsubr.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/sdt.h>
41 #include <sys/kobj.h>
42 #include <sys/zone.h>
43 #include <sys/neti.h>
44 #include <sys/hook.h>
45
46 #include <sys/kmem.h>
47 #include <sys/systm.h>
48 #include <sys/param.h>
49 #include <sys/socket.h>
50 #include <sys/vtrace.h>
51 #include <sys/isa_defs.h>
52 #include <sys/atomic.h>
53 #include <sys/policy.h>
54 #include <sys/mac.h>
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_dl.h>
59 #include <sys/sockio.h>
60 #include <netinet/in.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/sctp.h>
64
65 #include <inet/common.h>
66 #include <inet/mi.h>
67 #include <inet/optcom.h>
68 #include <inet/mib2.h>
69 #include <inet/nd.h>
70 #include <inet/arp.h>
71
72 #include <inet/ip.h>
73 #include <inet/ip_impl.h>
74 #include <inet/ip6.h>
75 #include <inet/ip6_asp.h>
76 #include <inet/tcp.h>
77 #include <inet/tcp_impl.h>
78 #include <inet/udp_impl.h>
79 #include <inet/ipp_common.h>
80
81 #include <inet/ip_multi.h>
82 #include <inet/ip_if.h>
83 #include <inet/ip_ire.h>
84 #include <inet/ip_rts.h>
85 #include <inet/ip_ndp.h>
86 #include <net/pfkeyv2.h>
87 #include <inet/sadb.h>
88 #include <inet/ipsec_impl.h>
89 #include <inet/iptun/iptun_impl.h>
90 #include <inet/sctp_ip.h>
91 #include <sys/pattr.h>
92 #include <inet/ipclassifier.h>
93 #include <inet/ipsecah.h>
94 #include <inet/rawip_impl.h>
95 #include <inet/rts_impl.h>
96 #include <sys/squeue_impl.h>
97 #include <sys/squeue.h>
98
99 #include <sys/tsol/label.h>
100 #include <sys/tsol/tnet.h>
101
102 /* Temporary; for CR 6451644 work-around */
103 #include <sys/ethernet.h>
104
105 /*
106 * Naming conventions:
107 * These rules should be judiciously applied
108 * if there is a need to identify something as IPv6 versus IPv4
109 * IPv6 funcions will end with _v6 in the ip module.
110 * IPv6 funcions will end with _ipv6 in the transport modules.
111 * IPv6 macros:
112 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
113 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
114 * And then there are ..V4_PART_OF_V6.
115 * The intent is that macros in the ip module end with _V6.
116 * IPv6 global variables will start with ipv6_
117 * IPv6 structures will start with ipv6
118 * IPv6 defined constants should start with IPV6_
119 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
120 */
121
122 /*
123 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
124 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
125 * from IANA. This mechanism will remain in effect until an official
126 * number is obtained.
127 */
128 uchar_t ip6opt_ls;
129
130 const in6_addr_t ipv6_all_ones =
131 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
132 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
133
134 #ifdef _BIG_ENDIAN
135 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
136 #else /* _BIG_ENDIAN */
137 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
138 #endif /* _BIG_ENDIAN */
139
140 #ifdef _BIG_ENDIAN
141 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
142 #else /* _BIG_ENDIAN */
143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
144 #endif /* _BIG_ENDIAN */
145
146 #ifdef _BIG_ENDIAN
147 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
148 #else /* _BIG_ENDIAN */
149 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
150 #endif /* _BIG_ENDIAN */
151
152 #ifdef _BIG_ENDIAN
153 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
154 #else /* _BIG_ENDIAN */
155 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
156 #endif /* _BIG_ENDIAN */
157
158 #ifdef _BIG_ENDIAN
159 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
160 #else /* _BIG_ENDIAN */
161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
162 #endif /* _BIG_ENDIAN */
163
164 #ifdef _BIG_ENDIAN
165 const in6_addr_t ipv6_solicited_node_mcast =
166 { 0xff020000U, 0, 0x00000001U, 0xff000000U };
167 #else /* _BIG_ENDIAN */
168 const in6_addr_t ipv6_solicited_node_mcast =
169 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
170 #endif /* _BIG_ENDIAN */
171
172 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
173 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
174 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
175 ip_recv_attr_t *);
176 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
177 ip_recv_attr_t *);
178 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
179 in6_addr_t *, ip_recv_attr_t *);
180 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
181 ip_recv_attr_t *);
182 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
183
184 /*
185 * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
186 * If the ICMP message is consumed by IP, i.e., it should not be delivered
187 * to any IPPROTO_ICMP raw sockets, then it returns NULL.
188 * Likewise, if the ICMP error is misformed (too short, etc), then it
189 * returns NULL. The caller uses this to determine whether or not to send
190 * to raw sockets.
191 *
192 * All error messages are passed to the matching transport stream.
193 *
194 * See comment for icmp_inbound_v4() on how IPsec is handled.
195 */
196 mblk_t *
icmp_inbound_v6(mblk_t * mp,ip_recv_attr_t * ira)197 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
198 {
199 icmp6_t *icmp6;
200 ip6_t *ip6h; /* Outer header */
201 int ip_hdr_length; /* Outer header length */
202 boolean_t interested;
203 ill_t *ill = ira->ira_ill;
204 ip_stack_t *ipst = ill->ill_ipst;
205 mblk_t *mp_ret = NULL;
206
207 ip6h = (ip6_t *)mp->b_rptr;
208
209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
210
211 /* Check for Martian packets */
212 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
214 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
215 freemsg(mp);
216 return (NULL);
217 }
218
219 /* Make sure ira_l2src is set for ndp_input */
220 if (!(ira->ira_flags & IRAF_L2SRC_SET))
221 ip_setl2src(mp, ira, ira->ira_rill);
222
223 ip_hdr_length = ira->ira_ip_hdr_length;
224 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
225 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
226 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
227 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
228 freemsg(mp);
229 return (NULL);
230 }
231 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
232 if (ip6h == NULL) {
233 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
234 freemsg(mp);
235 return (NULL);
236 }
237 }
238
239 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
240 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
241 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
242 icmp6->icmp6_code));
243
244 /*
245 * We will set "interested" to "true" if we should pass a copy to
246 * the transport i.e., if it is an error message.
247 */
248 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
249
250 switch (icmp6->icmp6_type) {
251 case ICMP6_DST_UNREACH:
252 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
253 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
255 break;
256
257 case ICMP6_TIME_EXCEEDED:
258 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
259 break;
260
261 case ICMP6_PARAM_PROB:
262 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
263 break;
264
265 case ICMP6_PACKET_TOO_BIG:
266 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
267 break;
268
269 case ICMP6_ECHO_REQUEST:
270 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
271 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
272 !ipst->ips_ipv6_resp_echo_mcast)
273 break;
274
275 /*
276 * We must have exclusive use of the mblk to convert it to
277 * a response.
278 * If not, we copy it.
279 */
280 if (mp->b_datap->db_ref > 1) {
281 mblk_t *mp1;
282
283 mp1 = copymsg(mp);
284 if (mp1 == NULL) {
285 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
286 ip_drop_input("ipIfStatsInDiscards - copymsg",
287 mp, ill);
288 freemsg(mp);
289 return (NULL);
290 }
291 freemsg(mp);
292 mp = mp1;
293 ip6h = (ip6_t *)mp->b_rptr;
294 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
295 }
296
297 icmp6->icmp6_type = ICMP6_ECHO_REPLY;
298 icmp_send_reply_v6(mp, ip6h, icmp6, ira);
299 return (NULL);
300
301 case ICMP6_ECHO_REPLY:
302 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
303 break;
304
305 case ND_ROUTER_SOLICIT:
306 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
307 break;
308
309 case ND_ROUTER_ADVERT:
310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
311 break;
312
313 case ND_NEIGHBOR_SOLICIT:
314 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
315 ndp_input(mp, ira);
316 return (NULL);
317
318 case ND_NEIGHBOR_ADVERT:
319 BUMP_MIB(ill->ill_icmp6_mib,
320 ipv6IfIcmpInNeighborAdvertisements);
321 ndp_input(mp, ira);
322 return (NULL);
323
324 case ND_REDIRECT:
325 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
326
327 if (ipst->ips_ipv6_ignore_redirect)
328 break;
329
330 /* We now allow a RAW socket to receive this. */
331 interested = B_TRUE;
332 break;
333
334 /*
335 * The next three icmp messages will be handled by MLD.
336 * Pass all valid MLD packets up to any process(es)
337 * listening on a raw ICMP socket.
338 */
339 case MLD_LISTENER_QUERY:
340 case MLD_LISTENER_REPORT:
341 case MLD_LISTENER_REDUCTION:
342 mp = mld_input(mp, ira);
343 return (mp);
344 default:
345 break;
346 }
347 /*
348 * See if there is an ICMP client to avoid an extra copymsg/freemsg
349 * if there isn't one.
350 */
351 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
352 /* If there is an ICMP client and we want one too, copy it. */
353
354 if (!interested) {
355 /* Caller will deliver to RAW sockets */
356 return (mp);
357 }
358 mp_ret = copymsg(mp);
359 if (mp_ret == NULL) {
360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
361 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
362 }
363 } else if (!interested) {
364 /* Neither we nor raw sockets are interested. Drop packet now */
365 freemsg(mp);
366 return (NULL);
367 }
368
369 /*
370 * ICMP error or redirect packet. Make sure we have enough of
371 * the header and that db_ref == 1 since we might end up modifying
372 * the packet.
373 */
374 if (mp->b_cont != NULL) {
375 if (ip_pullup(mp, -1, ira) == NULL) {
376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
377 ip_drop_input("ipIfStatsInDiscards - ip_pullup",
378 mp, ill);
379 freemsg(mp);
380 return (mp_ret);
381 }
382 }
383
384 if (mp->b_datap->db_ref > 1) {
385 mblk_t *mp1;
386
387 mp1 = copymsg(mp);
388 if (mp1 == NULL) {
389 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
390 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
391 freemsg(mp);
392 return (mp_ret);
393 }
394 freemsg(mp);
395 mp = mp1;
396 }
397
398 /*
399 * In case mp has changed, verify the message before any further
400 * processes.
401 */
402 ip6h = (ip6_t *)mp->b_rptr;
403 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
404 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
405 freemsg(mp);
406 return (mp_ret);
407 }
408
409 switch (icmp6->icmp6_type) {
410 case ND_REDIRECT:
411 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
412 break;
413 case ICMP6_PACKET_TOO_BIG:
414 /* Update DCE and adjust MTU is icmp header if needed */
415 icmp_inbound_too_big_v6(icmp6, ira);
416 /* FALLTHRU */
417 default:
418 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
419 break;
420 }
421
422 return (mp_ret);
423 }
424
425 /*
426 * Send an ICMP echo reply.
427 * The caller has already updated the payload part of the packet.
428 * We handle the ICMP checksum, IP source address selection and feed
429 * the packet into ip_output_simple.
430 */
431 static void
icmp_send_reply_v6(mblk_t * mp,ip6_t * ip6h,icmp6_t * icmp6,ip_recv_attr_t * ira)432 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
433 ip_recv_attr_t *ira)
434 {
435 uint_t ip_hdr_length = ira->ira_ip_hdr_length;
436 ill_t *ill = ira->ira_ill;
437 ip_stack_t *ipst = ill->ill_ipst;
438 ip_xmit_attr_t ixas;
439 in6_addr_t origsrc;
440
441 /*
442 * Remove any extension headers (do not reverse a source route)
443 * and clear the flow id (keep traffic class for now).
444 */
445 if (ip_hdr_length != IPV6_HDR_LEN) {
446 int i;
447
448 for (i = 0; i < IPV6_HDR_LEN; i++) {
449 mp->b_rptr[ip_hdr_length - i - 1] =
450 mp->b_rptr[IPV6_HDR_LEN - i - 1];
451 }
452 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
453 ip6h = (ip6_t *)mp->b_rptr;
454 ip6h->ip6_nxt = IPPROTO_ICMPV6;
455 i = ntohs(ip6h->ip6_plen);
456 i -= (ip_hdr_length - IPV6_HDR_LEN);
457 ip6h->ip6_plen = htons(i);
458 ip_hdr_length = IPV6_HDR_LEN;
459 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
460 }
461 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
462
463 /* Reverse the source and destination addresses. */
464 origsrc = ip6h->ip6_src;
465 ip6h->ip6_src = ip6h->ip6_dst;
466 ip6h->ip6_dst = origsrc;
467
468 /* set the hop limit */
469 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
470
471 /*
472 * Prepare for checksum by putting icmp length in the icmp
473 * checksum field. The checksum is calculated in ip_output
474 */
475 icmp6->icmp6_cksum = ip6h->ip6_plen;
476
477 bzero(&ixas, sizeof (ixas));
478 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
479 ixas.ixa_zoneid = ira->ira_zoneid;
480 ixas.ixa_cred = kcred;
481 ixas.ixa_cpid = NOPID;
482 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
483 ixas.ixa_ifindex = 0;
484 ixas.ixa_ipst = ipst;
485 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
486
487 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
488 /*
489 * This packet should go out the same way as it
490 * came in i.e in clear, independent of the IPsec
491 * policy for transmitting packets.
492 */
493 ixas.ixa_flags |= IXAF_NO_IPSEC;
494 } else {
495 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
497 /* Note: mp already consumed and ip_drop_packet done */
498 return;
499 }
500 }
501
502 /* Was the destination (now source) link-local? Send out same group */
503 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
504 ixas.ixa_flags |= IXAF_SCOPEID_SET;
505 if (IS_UNDER_IPMP(ill))
506 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
507 else
508 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
509 }
510
511 if (ira->ira_flags & IRAF_MULTIBROADCAST) {
512 /*
513 * Not one or our addresses (IRE_LOCALs), thus we let
514 * ip_output_simple pick the source.
515 */
516 ip6h->ip6_src = ipv6_all_zeros;
517 ixas.ixa_flags |= IXAF_SET_SOURCE;
518 }
519
520 /* Should we send using dce_pmtu? */
521 if (ipst->ips_ipv6_icmp_return_pmtu)
522 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
523
524 (void) ip_output_simple(mp, &ixas);
525 ixa_cleanup(&ixas);
526
527 }
528
529 /*
530 * Verify the ICMP messages for either for ICMP error or redirect packet.
531 * The caller should have fully pulled up the message. If it's a redirect
532 * packet, only basic checks on IP header will be done; otherwise, verify
533 * the packet by looking at the included ULP header.
534 *
535 * Called before icmp_inbound_error_fanout_v6 is called.
536 */
537 static boolean_t
icmp_inbound_verify_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)538 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
539 {
540 ill_t *ill = ira->ira_ill;
541 uint16_t hdr_length;
542 uint8_t *nexthdrp;
543 uint8_t nexthdr;
544 ip_stack_t *ipst = ill->ill_ipst;
545 conn_t *connp;
546 ip6_t *ip6h; /* Inner header */
547
548 ip6h = (ip6_t *)&icmp6[1];
549 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
550 goto truncated;
551
552 if (icmp6->icmp6_type == ND_REDIRECT) {
553 hdr_length = sizeof (nd_redirect_t);
554 } else {
555 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
556 goto discard_pkt;
557 hdr_length = IPV6_HDR_LEN;
558 }
559
560 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
561 goto truncated;
562
563 /*
564 * Stop here for ICMP_REDIRECT.
565 */
566 if (icmp6->icmp6_type == ND_REDIRECT)
567 return (B_TRUE);
568
569 /*
570 * ICMP errors only.
571 */
572 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
573 goto discard_pkt;
574 nexthdr = *nexthdrp;
575
576 /* Try to pass the ICMP message to clients who need it */
577 switch (nexthdr) {
578 case IPPROTO_UDP:
579 /*
580 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
581 * transport header.
582 */
583 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
584 mp->b_wptr)
585 goto truncated;
586 break;
587 case IPPROTO_TCP: {
588 tcpha_t *tcpha;
589
590 /*
591 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
592 * transport header.
593 */
594 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
595 mp->b_wptr)
596 goto truncated;
597
598 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
599 /*
600 * With IPMP we need to match across group, which we do
601 * since we have the upper ill from ira_ill.
602 */
603 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
604 ill->ill_phyint->phyint_ifindex, ipst);
605 if (connp == NULL)
606 goto discard_pkt;
607
608 if ((connp->conn_verifyicmp != NULL) &&
609 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
610 CONN_DEC_REF(connp);
611 goto discard_pkt;
612 }
613 CONN_DEC_REF(connp);
614 break;
615 }
616 case IPPROTO_SCTP:
617 /*
618 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
619 * transport header.
620 */
621 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
622 mp->b_wptr)
623 goto truncated;
624 break;
625 case IPPROTO_ESP:
626 case IPPROTO_AH:
627 break;
628 case IPPROTO_ENCAP:
629 case IPPROTO_IPV6: {
630 /* Look for self-encapsulated packets that caused an error */
631 ip6_t *in_ip6h;
632
633 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
634 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
635 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
636 goto truncated;
637 break;
638 }
639 default:
640 break;
641 }
642
643 return (B_TRUE);
644
645 discard_pkt:
646 /* Bogus ICMP error. */
647 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
648 return (B_FALSE);
649
650 truncated:
651 /* We pulled up everthing already. Must be truncated */
652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
653 return (B_FALSE);
654 }
655
656 /*
657 * Process received IPv6 ICMP Packet too big.
658 * The caller is responsible for validating the packet before passing it in
659 * and also to fanout the ICMP error to any matching transport conns. Assumes
660 * the message has been fully pulled up.
661 *
662 * Before getting here, the caller has called icmp_inbound_verify_v6()
663 * that should have verified with ULP to prevent undoing the changes we're
664 * going to make to DCE. For example, TCP might have verified that the packet
665 * which generated error is in the send window.
666 *
667 * In some cases modified this MTU in the ICMP header packet; the caller
668 * should pass to the matching ULP after this returns.
669 */
670 static void
icmp_inbound_too_big_v6(icmp6_t * icmp6,ip_recv_attr_t * ira)671 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
672 {
673 uint32_t mtu;
674 dce_t *dce;
675 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */
676 ip_stack_t *ipst = ill->ill_ipst;
677 int old_max_frag;
678 in6_addr_t final_dst;
679 ip6_t *ip6h; /* Inner IP header */
680
681 /* Caller has already pulled up everything. */
682 ip6h = (ip6_t *)&icmp6[1];
683 final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
684
685 /*
686 * For link local destinations matching simply on address is not
687 * sufficient. Same link local addresses for different ILL's is
688 * possible.
689 */
690 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
691 dce = dce_lookup_and_add_v6(&final_dst,
692 ill->ill_phyint->phyint_ifindex, ipst);
693 } else {
694 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
695 }
696 if (dce == NULL) {
697 /* Couldn't add a unique one - ENOMEM */
698 if (ip_debug > 2) {
699 /* ip1dbg */
700 pr_addr_dbg("icmp_inbound_too_big_v6:"
701 "no dce for dst %s\n", AF_INET6,
702 &final_dst);
703 }
704 return;
705 }
706
707 mtu = ntohl(icmp6->icmp6_mtu);
708
709 mutex_enter(&dce->dce_lock);
710 if (dce->dce_flags & DCEF_PMTU)
711 old_max_frag = dce->dce_pmtu;
712 else if (IN6_IS_ADDR_MULTICAST(&final_dst))
713 old_max_frag = ill->ill_mc_mtu;
714 else
715 old_max_frag = ill->ill_mtu;
716
717 if (mtu < IPV6_MIN_MTU) {
718 ip1dbg(("Received mtu less than IPv6 "
719 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
720 mtu = IPV6_MIN_MTU;
721 /*
722 * If an mtu less than IPv6 min mtu is received,
723 * we must include a fragment header in
724 * subsequent packets.
725 */
726 dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
727 } else {
728 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
729 }
730 ip1dbg(("Received mtu from router: %d\n", mtu));
731 dce->dce_pmtu = MIN(old_max_frag, mtu);
732
733 /* Prepare to send the new max frag size for the ULP. */
734 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
735 /*
736 * If we need a fragment header in every packet
737 * (above case or multirouting), make sure the
738 * ULP takes it into account when computing the
739 * payload size.
740 */
741 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
742 } else {
743 icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
744 }
745 /* We now have a PMTU for sure */
746 dce->dce_flags |= DCEF_PMTU;
747 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
748 mutex_exit(&dce->dce_lock);
749 /*
750 * After dropping the lock the new value is visible to everyone.
751 * Then we bump the generation number so any cached values reinspect
752 * the dce_t.
753 */
754 dce_increment_generation(dce);
755 dce_refrele(dce);
756 }
757
758 /*
759 * Fanout received ICMPv6 error packets to the transports.
760 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
761 *
762 * The caller must have called icmp_inbound_verify_v6.
763 */
764 void
icmp_inbound_error_fanout_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)765 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
766 {
767 uint16_t *up; /* Pointer to ports in ULP header */
768 uint32_t ports; /* reversed ports for fanout */
769 ip6_t rip6h; /* With reversed addresses */
770 ip6_t *ip6h; /* Inner IP header */
771 uint16_t hdr_length; /* Inner IP header length */
772 uint8_t *nexthdrp;
773 uint8_t nexthdr;
774 tcpha_t *tcpha;
775 conn_t *connp;
776 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */
777 ip_stack_t *ipst = ill->ill_ipst;
778 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
779
780 /* Caller has already pulled up everything. */
781 ip6h = (ip6_t *)&icmp6[1];
782 ASSERT(mp->b_cont == NULL);
783 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
784
785 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
786 goto drop_pkt;
787 nexthdr = *nexthdrp;
788 ira->ira_protocol = nexthdr;
789
790 /*
791 * We need a separate IP header with the source and destination
792 * addresses reversed to do fanout/classification because the ip6h in
793 * the ICMPv6 error is in the form we sent it out.
794 */
795 rip6h.ip6_src = ip6h->ip6_dst;
796 rip6h.ip6_dst = ip6h->ip6_src;
797 rip6h.ip6_nxt = nexthdr;
798
799 /* Try to pass the ICMP message to clients who need it */
800 switch (nexthdr) {
801 case IPPROTO_UDP: {
802 /* Attempt to find a client stream based on port. */
803 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
804
805 /* Note that we send error to all matches. */
806 ira->ira_flags |= IRAF_ICMP_ERROR;
807 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
808 ira->ira_flags &= ~IRAF_ICMP_ERROR;
809 return;
810 }
811 case IPPROTO_TCP: {
812 /*
813 * Attempt to find a client stream based on port.
814 * Note that we do a reverse lookup since the header is
815 * in the form we sent it out.
816 */
817 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
818 /*
819 * With IPMP we need to match across group, which we do
820 * since we have the upper ill from ira_ill.
821 */
822 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
823 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
824 if (connp == NULL) {
825 goto drop_pkt;
826 }
827
828 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
829 (ira->ira_flags & IRAF_IPSEC_SECURE)) {
830 mp = ipsec_check_inbound_policy(mp, connp,
831 NULL, ip6h, ira);
832 if (mp == NULL) {
833 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
834 /* Note that mp is NULL */
835 ip_drop_input("ipIfStatsInDiscards", mp, ill);
836 CONN_DEC_REF(connp);
837 return;
838 }
839 }
840
841 ira->ira_flags |= IRAF_ICMP_ERROR;
842 if (IPCL_IS_TCP(connp)) {
843 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
844 connp->conn_recvicmp, connp, ira, SQ_FILL,
845 SQTAG_TCP6_INPUT_ICMP_ERR);
846 } else {
847 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
848 ill_t *rill = ira->ira_rill;
849
850 ira->ira_ill = ira->ira_rill = NULL;
851 (connp->conn_recv)(connp, mp, NULL, ira);
852 CONN_DEC_REF(connp);
853 ira->ira_ill = ill;
854 ira->ira_rill = rill;
855 }
856 ira->ira_flags &= ~IRAF_ICMP_ERROR;
857 return;
858
859 }
860 case IPPROTO_SCTP:
861 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
862 /* Find a SCTP client stream for this packet. */
863 ((uint16_t *)&ports)[0] = up[1];
864 ((uint16_t *)&ports)[1] = up[0];
865
866 ira->ira_flags |= IRAF_ICMP_ERROR;
867 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
868 ira->ira_flags &= ~IRAF_ICMP_ERROR;
869 return;
870
871 case IPPROTO_ESP:
872 case IPPROTO_AH:
873 if (!ipsec_loaded(ipss)) {
874 ip_proto_not_sup(mp, ira);
875 return;
876 }
877
878 if (nexthdr == IPPROTO_ESP)
879 mp = ipsecesp_icmp_error(mp, ira);
880 else
881 mp = ipsecah_icmp_error(mp, ira);
882 if (mp == NULL)
883 return;
884
885 /* Just in case ipsec didn't preserve the NULL b_cont */
886 if (mp->b_cont != NULL) {
887 if (!pullupmsg(mp, -1))
888 goto drop_pkt;
889 }
890
891 /*
892 * If succesful, the mp has been modified to not include
893 * the ESP/AH header so we can fanout to the ULP's icmp
894 * error handler.
895 */
896 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
897 goto drop_pkt;
898
899 ip6h = (ip6_t *)mp->b_rptr;
900 /* Don't call hdr_length_v6() unless you have to. */
901 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
902 hdr_length = ip_hdr_length_v6(mp, ip6h);
903 else
904 hdr_length = IPV6_HDR_LEN;
905
906 /* Verify the modified message before any further processes. */
907 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
908 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
909 freemsg(mp);
910 return;
911 }
912
913 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
914 return;
915
916 case IPPROTO_IPV6: {
917 /* Look for self-encapsulated packets that caused an error */
918 ip6_t *in_ip6h;
919
920 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
921
922 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
923 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
924 /*
925 * Self-encapsulated case. As in the ipv4 case,
926 * we need to strip the 2nd IP header. Since mp
927 * is already pulled-up, we can simply bcopy
928 * the 3rd header + data over the 2nd header.
929 */
930 uint16_t unused_len;
931
932 /*
933 * Make sure we don't do recursion more than once.
934 */
935 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
936 &unused_len, &nexthdrp) ||
937 *nexthdrp == IPPROTO_IPV6) {
938 goto drop_pkt;
939 }
940
941 /*
942 * Copy the 3rd header + remaining data on top
943 * of the 2nd header.
944 */
945 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
946
947 /*
948 * Subtract length of the 2nd header.
949 */
950 mp->b_wptr -= hdr_length;
951
952 ip6h = (ip6_t *)mp->b_rptr;
953 /* Don't call hdr_length_v6() unless you have to. */
954 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
955 hdr_length = ip_hdr_length_v6(mp, ip6h);
956 else
957 hdr_length = IPV6_HDR_LEN;
958
959 /*
960 * Verify the modified message before any further
961 * processes.
962 */
963 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
964 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
965 freemsg(mp);
966 return;
967 }
968
969 /*
970 * Now recurse, and see what I _really_ should be
971 * doing here.
972 */
973 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
974 return;
975 }
976 /* FALLTHRU */
977 }
978 case IPPROTO_ENCAP:
979 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
980 &rip6h.ip6_dst, ipst)) != NULL) {
981 ira->ira_flags |= IRAF_ICMP_ERROR;
982 connp->conn_recvicmp(connp, mp, NULL, ira);
983 CONN_DEC_REF(connp);
984 ira->ira_flags &= ~IRAF_ICMP_ERROR;
985 return;
986 }
987 /*
988 * No IP tunnel is interested, fallthrough and see
989 * if a raw socket will want it.
990 */
991 /* FALLTHRU */
992 default:
993 ira->ira_flags |= IRAF_ICMP_ERROR;
994 ASSERT(ira->ira_protocol == nexthdr);
995 ip_fanout_proto_v6(mp, &rip6h, ira);
996 ira->ira_flags &= ~IRAF_ICMP_ERROR;
997 return;
998 }
999 /* NOTREACHED */
1000 drop_pkt:
1001 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1002 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
1003 freemsg(mp);
1004 }
1005
1006 /*
1007 * Process received IPv6 ICMP Redirect messages.
1008 * Assumes the caller has verified that the headers are in the pulled up mblk.
1009 * Consumes mp.
1010 */
1011 /* ARGSUSED */
1012 static void
icmp_redirect_v6(mblk_t * mp,ip6_t * ip6h,nd_redirect_t * rd,ip_recv_attr_t * ira)1013 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1014 ip_recv_attr_t *ira)
1015 {
1016 ire_t *ire, *nire;
1017 ire_t *prev_ire = NULL;
1018 ire_t *redir_ire;
1019 in6_addr_t *src, *dst, *gateway;
1020 nd_opt_hdr_t *opt;
1021 nce_t *nce;
1022 int ncec_flags = 0;
1023 int err = 0;
1024 boolean_t redirect_to_router = B_FALSE;
1025 int len;
1026 int optlen;
1027 ill_t *ill = ira->ira_rill;
1028 ill_t *rill = ira->ira_rill;
1029 ip_stack_t *ipst = ill->ill_ipst;
1030
1031 /*
1032 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1033 * and make it be the IPMP upper so avoid being confused by a packet
1034 * addressed to a unicast address on a different ill.
1035 */
1036 if (IS_UNDER_IPMP(rill)) {
1037 rill = ipmp_ill_hold_ipmp_ill(rill);
1038 if (rill == NULL) {
1039 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1040 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1041 mp, ill);
1042 freemsg(mp);
1043 return;
1044 }
1045 ASSERT(rill != ira->ira_rill);
1046 }
1047
1048 len = mp->b_wptr - (uchar_t *)rd;
1049 src = &ip6h->ip6_src;
1050 dst = &rd->nd_rd_dst;
1051 gateway = &rd->nd_rd_target;
1052
1053 /* Verify if it is a valid redirect */
1054 if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1055 (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1056 (rd->nd_rd_code != 0) ||
1057 (len < sizeof (nd_redirect_t)) ||
1058 (IN6_IS_ADDR_V4MAPPED(dst)) ||
1059 (IN6_IS_ADDR_MULTICAST(dst))) {
1060 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1061 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1062 goto fail_redirect;
1063 }
1064
1065 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1066 IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1067 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1068 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1069 mp, ill);
1070 goto fail_redirect;
1071 }
1072
1073 optlen = len - sizeof (nd_redirect_t);
1074 if (optlen != 0) {
1075 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1076 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1077 ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1078 mp, ill);
1079 goto fail_redirect;
1080 }
1081 }
1082
1083 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1084 redirect_to_router = B_TRUE;
1085 ncec_flags |= NCE_F_ISROUTER;
1086 } else {
1087 gateway = dst; /* Add nce for dst */
1088 }
1089
1090
1091 /*
1092 * Verify that the IP source address of the redirect is
1093 * the same as the current first-hop router for the specified
1094 * ICMP destination address.
1095 * Also, Make sure we had a route for the dest in question and
1096 * that route was pointing to the old gateway (the source of the
1097 * redirect packet.)
1098 * We do longest match and then compare ire_gateway_addr_v6 below.
1099 */
1100 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1101 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1102
1103 /*
1104 * Check that
1105 * the redirect was not from ourselves
1106 * old gateway is still directly reachable
1107 */
1108 if (prev_ire == NULL ||
1109 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1110 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1111 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1113 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1114 goto fail_redirect;
1115 }
1116
1117 ASSERT(prev_ire->ire_ill != NULL);
1118 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1119 ncec_flags |= NCE_F_NONUD;
1120
1121 opt = (nd_opt_hdr_t *)&rd[1];
1122 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1123 if (opt != NULL) {
1124 err = nce_lookup_then_add_v6(rill,
1125 (uchar_t *)&opt[1], /* Link layer address */
1126 rill->ill_phys_addr_length,
1127 gateway, ncec_flags, ND_STALE, &nce);
1128 switch (err) {
1129 case 0:
1130 nce_refrele(nce);
1131 break;
1132 case EEXIST:
1133 /*
1134 * Check to see if link layer address has changed and
1135 * process the ncec_state accordingly.
1136 */
1137 nce_process(nce->nce_common,
1138 (uchar_t *)&opt[1], 0, B_FALSE);
1139 nce_refrele(nce);
1140 break;
1141 default:
1142 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1143 err));
1144 goto fail_redirect;
1145 }
1146 }
1147 if (redirect_to_router) {
1148 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1149
1150 /*
1151 * Create a Route Association. This will allow us to remember
1152 * a router told us to use the particular gateway.
1153 */
1154 ire = ire_create_v6(
1155 dst,
1156 &ipv6_all_ones, /* mask */
1157 gateway, /* gateway addr */
1158 IRE_HOST,
1159 prev_ire->ire_ill,
1160 ALL_ZONES,
1161 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1162 NULL,
1163 ipst);
1164 } else {
1165 ipif_t *ipif;
1166 in6_addr_t gw;
1167
1168 /*
1169 * Just create an on link entry, i.e. interface route.
1170 * The gateway field is our link-local on the ill.
1171 */
1172 mutex_enter(&rill->ill_lock);
1173 for (ipif = rill->ill_ipif; ipif != NULL;
1174 ipif = ipif->ipif_next) {
1175 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1176 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1177 break;
1178 }
1179 if (ipif == NULL) {
1180 /* We have no link-local address! */
1181 mutex_exit(&rill->ill_lock);
1182 goto fail_redirect;
1183 }
1184 gw = ipif->ipif_v6lcl_addr;
1185 mutex_exit(&rill->ill_lock);
1186
1187 ire = ire_create_v6(
1188 dst, /* gateway == dst */
1189 &ipv6_all_ones, /* mask */
1190 &gw, /* gateway addr */
1191 rill->ill_net_type, /* IF_[NO]RESOLVER */
1192 prev_ire->ire_ill,
1193 ALL_ZONES,
1194 (RTF_DYNAMIC | RTF_HOST),
1195 NULL,
1196 ipst);
1197 }
1198
1199 if (ire == NULL)
1200 goto fail_redirect;
1201
1202 nire = ire_add(ire);
1203 /* Check if it was a duplicate entry */
1204 if (nire != NULL && nire != ire) {
1205 ASSERT(nire->ire_identical_ref > 1);
1206 ire_delete(nire);
1207 ire_refrele(nire);
1208 nire = NULL;
1209 }
1210 ire = nire;
1211 if (ire != NULL) {
1212 ire_refrele(ire); /* Held in ire_add */
1213
1214 /* tell routing sockets that we received a redirect */
1215 ip_rts_change_v6(RTM_REDIRECT,
1216 &rd->nd_rd_dst,
1217 &rd->nd_rd_target,
1218 &ipv6_all_ones, 0, src,
1219 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1220 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1221
1222 /*
1223 * Delete any existing IRE_HOST type ires for this destination.
1224 * This together with the added IRE has the effect of
1225 * modifying an existing redirect.
1226 */
1227 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1228 prev_ire->ire_ill, ALL_ZONES, NULL,
1229 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1230 NULL);
1231
1232 if (redir_ire != NULL) {
1233 if (redir_ire->ire_flags & RTF_DYNAMIC)
1234 ire_delete(redir_ire);
1235 ire_refrele(redir_ire);
1236 }
1237 }
1238
1239 ire_refrele(prev_ire);
1240 prev_ire = NULL;
1241
1242 fail_redirect:
1243 if (prev_ire != NULL)
1244 ire_refrele(prev_ire);
1245 freemsg(mp);
1246 if (rill != ira->ira_rill)
1247 ill_refrele(rill);
1248 }
1249
1250 /*
1251 * Build and ship an IPv6 ICMP message using the packet data in mp,
1252 * and the ICMP header pointed to by "stuff". (May be called as
1253 * writer.)
1254 * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1255 * verify that an icmp error packet can be sent.
1256 *
1257 * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1258 * source address (see above function).
1259 */
1260 static void
icmp_pkt_v6(mblk_t * mp,void * stuff,size_t len,const in6_addr_t * v6src_ptr,ip_recv_attr_t * ira)1261 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1262 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1263 {
1264 ip6_t *ip6h;
1265 in6_addr_t v6dst;
1266 size_t len_needed;
1267 size_t msg_len;
1268 mblk_t *mp1;
1269 icmp6_t *icmp6;
1270 in6_addr_t v6src;
1271 ill_t *ill = ira->ira_ill;
1272 ip_stack_t *ipst = ill->ill_ipst;
1273 ip_xmit_attr_t ixas;
1274
1275 ip6h = (ip6_t *)mp->b_rptr;
1276
1277 bzero(&ixas, sizeof (ixas));
1278 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1279 ixas.ixa_zoneid = ira->ira_zoneid;
1280 ixas.ixa_ifindex = 0;
1281 ixas.ixa_ipst = ipst;
1282 ixas.ixa_cred = kcred;
1283 ixas.ixa_cpid = NOPID;
1284 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
1285 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1286
1287 /*
1288 * If the source of the original packet was link-local, then
1289 * make sure we send on the same ill (group) as we received it on.
1290 */
1291 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1292 ixas.ixa_flags |= IXAF_SCOPEID_SET;
1293 if (IS_UNDER_IPMP(ill))
1294 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1295 else
1296 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1297 }
1298
1299 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1300 /*
1301 * Apply IPsec based on how IPsec was applied to
1302 * the packet that had the error.
1303 *
1304 * If it was an outbound packet that caused the ICMP
1305 * error, then the caller will have setup the IRA
1306 * appropriately.
1307 */
1308 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1309 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1310 /* Note: mp already consumed and ip_drop_packet done */
1311 return;
1312 }
1313 } else {
1314 /*
1315 * This is in clear. The icmp message we are building
1316 * here should go out in clear, independent of our policy.
1317 */
1318 ixas.ixa_flags |= IXAF_NO_IPSEC;
1319 }
1320
1321 /*
1322 * If the caller specified the source we use that.
1323 * Otherwise, if the packet was for one of our unicast addresses, make
1324 * sure we respond with that as the source. Otherwise
1325 * have ip_output_simple pick the source address.
1326 */
1327 if (v6src_ptr != NULL) {
1328 v6src = *v6src_ptr;
1329 } else {
1330 ire_t *ire;
1331 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1332
1333 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1334 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1335 match_flags |= MATCH_IRE_ILL;
1336
1337 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1338 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1339 match_flags, 0, ipst, NULL);
1340 if (ire != NULL) {
1341 v6src = ip6h->ip6_dst;
1342 ire_refrele(ire);
1343 } else {
1344 v6src = ipv6_all_zeros;
1345 ixas.ixa_flags |= IXAF_SET_SOURCE;
1346 }
1347 }
1348 v6dst = ip6h->ip6_src;
1349 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1350 msg_len = msgdsize(mp);
1351 if (msg_len > len_needed) {
1352 if (!adjmsg(mp, len_needed - msg_len)) {
1353 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1354 freemsg(mp);
1355 return;
1356 }
1357 msg_len = len_needed;
1358 }
1359 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1360 if (mp1 == NULL) {
1361 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1362 freemsg(mp);
1363 return;
1364 }
1365 mp1->b_cont = mp;
1366 mp = mp1;
1367
1368 /*
1369 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1370 * node generates be accepted in peace by all on-host destinations.
1371 * If we do NOT assume that all on-host destinations trust
1372 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1373 * (Look for IXAF_TRUSTED_ICMP).
1374 */
1375 ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1376
1377 ip6h = (ip6_t *)mp->b_rptr;
1378 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1379
1380 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1381 ip6h->ip6_nxt = IPPROTO_ICMPV6;
1382 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1383 ip6h->ip6_dst = v6dst;
1384 ip6h->ip6_src = v6src;
1385 msg_len += IPV6_HDR_LEN + len;
1386 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1387 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1388 msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1389 }
1390 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1391 icmp6 = (icmp6_t *)&ip6h[1];
1392 bcopy(stuff, (char *)icmp6, len);
1393 /*
1394 * Prepare for checksum by putting icmp length in the icmp
1395 * checksum field. The checksum is calculated in ip_output_wire_v6.
1396 */
1397 icmp6->icmp6_cksum = ip6h->ip6_plen;
1398 if (icmp6->icmp6_type == ND_REDIRECT) {
1399 ip6h->ip6_hops = IPV6_MAX_HOPS;
1400 }
1401
1402 (void) ip_output_simple(mp, &ixas);
1403 ixa_cleanup(&ixas);
1404 }
1405
1406 /*
1407 * Update the output mib when ICMPv6 packets are sent.
1408 */
1409 void
icmp_update_out_mib_v6(ill_t * ill,icmp6_t * icmp6)1410 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1411 {
1412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1413
1414 switch (icmp6->icmp6_type) {
1415 case ICMP6_DST_UNREACH:
1416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1417 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1418 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1419 break;
1420
1421 case ICMP6_TIME_EXCEEDED:
1422 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1423 break;
1424
1425 case ICMP6_PARAM_PROB:
1426 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1427 break;
1428
1429 case ICMP6_PACKET_TOO_BIG:
1430 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1431 break;
1432
1433 case ICMP6_ECHO_REQUEST:
1434 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1435 break;
1436
1437 case ICMP6_ECHO_REPLY:
1438 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1439 break;
1440
1441 case ND_ROUTER_SOLICIT:
1442 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1443 break;
1444
1445 case ND_ROUTER_ADVERT:
1446 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1447 break;
1448
1449 case ND_NEIGHBOR_SOLICIT:
1450 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1451 break;
1452
1453 case ND_NEIGHBOR_ADVERT:
1454 BUMP_MIB(ill->ill_icmp6_mib,
1455 ipv6IfIcmpOutNeighborAdvertisements);
1456 break;
1457
1458 case ND_REDIRECT:
1459 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1460 break;
1461
1462 case MLD_LISTENER_QUERY:
1463 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1464 break;
1465
1466 case MLD_LISTENER_REPORT:
1467 case MLD_V2_LISTENER_REPORT:
1468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1469 break;
1470
1471 case MLD_LISTENER_REDUCTION:
1472 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1473 break;
1474 }
1475 }
1476
1477 /*
1478 * Check if it is ok to send an ICMPv6 error packet in
1479 * response to the IP packet in mp.
1480 * Free the message and return null if no
1481 * ICMP error packet should be sent.
1482 */
1483 static mblk_t *
icmp_pkt_err_ok_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1484 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1485 {
1486 ill_t *ill = ira->ira_ill;
1487 ip_stack_t *ipst = ill->ill_ipst;
1488 boolean_t llbcast;
1489 ip6_t *ip6h;
1490
1491 if (!mp)
1492 return (NULL);
1493
1494 /* We view multicast and broadcast as the same.. */
1495 llbcast = (ira->ira_flags &
1496 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1497 ip6h = (ip6_t *)mp->b_rptr;
1498
1499 /* Check if source address uniquely identifies the host */
1500
1501 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1502 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1503 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1504 freemsg(mp);
1505 return (NULL);
1506 }
1507
1508 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1509 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1510 icmp6_t *icmp6;
1511
1512 if (mp->b_wptr - mp->b_rptr < len_needed) {
1513 if (!pullupmsg(mp, len_needed)) {
1514 BUMP_MIB(ill->ill_icmp6_mib,
1515 ipv6IfIcmpInErrors);
1516 freemsg(mp);
1517 return (NULL);
1518 }
1519 ip6h = (ip6_t *)mp->b_rptr;
1520 }
1521 icmp6 = (icmp6_t *)&ip6h[1];
1522 /* Explicitly do not generate errors in response to redirects */
1523 if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1524 icmp6->icmp6_type == ND_REDIRECT) {
1525 freemsg(mp);
1526 return (NULL);
1527 }
1528 }
1529 /*
1530 * Check that the destination is not multicast and that the packet
1531 * was not sent on link layer broadcast or multicast. (Exception
1532 * is Packet too big message as per the draft - when mcast_ok is set.)
1533 */
1534 if (!mcast_ok &&
1535 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1536 freemsg(mp);
1537 return (NULL);
1538 }
1539 /*
1540 * If this is a labeled system, then check to see if we're allowed to
1541 * send a response to this particular sender. If not, then just drop.
1542 */
1543 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1544 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1545 freemsg(mp);
1546 return (NULL);
1547 }
1548
1549 if (icmp_err_rate_limit(ipst)) {
1550 /*
1551 * Only send ICMP error packets every so often.
1552 * This should be done on a per port/source basis,
1553 * but for now this will suffice.
1554 */
1555 freemsg(mp);
1556 return (NULL);
1557 }
1558 return (mp);
1559 }
1560
1561 /*
1562 * Called when a packet was sent out the same link that it arrived on.
1563 * Check if it is ok to send a redirect and then send it.
1564 */
1565 void
ip_send_potential_redirect_v6(mblk_t * mp,ip6_t * ip6h,ire_t * ire,ip_recv_attr_t * ira)1566 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1567 ip_recv_attr_t *ira)
1568 {
1569 ill_t *ill = ira->ira_ill;
1570 ip_stack_t *ipst = ill->ill_ipst;
1571 in6_addr_t *v6targ;
1572 ire_t *src_ire_v6 = NULL;
1573 mblk_t *mp1;
1574 ire_t *nhop_ire = NULL;
1575
1576 /*
1577 * Don't send a redirect when forwarding a source
1578 * routed packet.
1579 */
1580 if (ip_source_routed_v6(ip6h, mp, ipst))
1581 return;
1582
1583 if (ire->ire_type & IRE_ONLINK) {
1584 /* Target is directly connected */
1585 v6targ = &ip6h->ip6_dst;
1586 } else {
1587 /* Determine the most specific IRE used to send the packets */
1588 nhop_ire = ire_nexthop(ire);
1589 if (nhop_ire == NULL)
1590 return;
1591
1592 /*
1593 * We won't send redirects to a router
1594 * that doesn't have a link local
1595 * address, but will forward.
1596 */
1597 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1598 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1599 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1600 ire_refrele(nhop_ire);
1601 return;
1602 }
1603 v6targ = &nhop_ire->ire_addr_v6;
1604 }
1605 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1606 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1607 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1608
1609 if (src_ire_v6 == NULL) {
1610 if (nhop_ire != NULL)
1611 ire_refrele(nhop_ire);
1612 return;
1613 }
1614
1615 /*
1616 * The source is directly connected.
1617 */
1618 mp1 = copymsg(mp);
1619 if (mp1 != NULL)
1620 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1621
1622 if (nhop_ire != NULL)
1623 ire_refrele(nhop_ire);
1624 ire_refrele(src_ire_v6);
1625 }
1626
1627 /*
1628 * Generate an ICMPv6 redirect message.
1629 * Include target link layer address option if it exits.
1630 * Always include redirect header.
1631 */
1632 static void
icmp_send_redirect_v6(mblk_t * mp,in6_addr_t * targetp,in6_addr_t * dest,ip_recv_attr_t * ira)1633 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1634 ip_recv_attr_t *ira)
1635 {
1636 nd_redirect_t *rd;
1637 nd_opt_rd_hdr_t *rdh;
1638 uchar_t *buf;
1639 ncec_t *ncec = NULL;
1640 nd_opt_hdr_t *opt;
1641 int len;
1642 int ll_opt_len = 0;
1643 int max_redir_hdr_data_len;
1644 int pkt_len;
1645 in6_addr_t *srcp;
1646 ill_t *ill;
1647 boolean_t need_refrele;
1648 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1649
1650 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1651 if (mp == NULL)
1652 return;
1653
1654 if (IS_UNDER_IPMP(ira->ira_ill)) {
1655 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1656 if (ill == NULL) {
1657 ill = ira->ira_ill;
1658 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1659 ip_drop_output("no IPMP ill for sending redirect",
1660 mp, ill);
1661 freemsg(mp);
1662 return;
1663 }
1664 need_refrele = B_TRUE;
1665 } else {
1666 ill = ira->ira_ill;
1667 need_refrele = B_FALSE;
1668 }
1669
1670 ncec = ncec_lookup_illgrp_v6(ill, targetp);
1671 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1672 ncec->ncec_lladdr != NULL) {
1673 ll_opt_len = (sizeof (nd_opt_hdr_t) +
1674 ill->ill_phys_addr_length + 7)/8 * 8;
1675 }
1676 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1677 ASSERT(len % 4 == 0);
1678 buf = kmem_alloc(len, KM_NOSLEEP);
1679 if (buf == NULL) {
1680 if (ncec != NULL)
1681 ncec_refrele(ncec);
1682 if (need_refrele)
1683 ill_refrele(ill);
1684 freemsg(mp);
1685 return;
1686 }
1687
1688 rd = (nd_redirect_t *)buf;
1689 rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1690 rd->nd_rd_code = 0;
1691 rd->nd_rd_reserved = 0;
1692 rd->nd_rd_target = *targetp;
1693 rd->nd_rd_dst = *dest;
1694
1695 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1696 if (ncec != NULL && ll_opt_len != 0) {
1697 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1698 opt->nd_opt_len = ll_opt_len/8;
1699 bcopy((char *)ncec->ncec_lladdr, &opt[1],
1700 ill->ill_phys_addr_length);
1701 }
1702 if (ncec != NULL)
1703 ncec_refrele(ncec);
1704 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1705 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1706 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1707 max_redir_hdr_data_len =
1708 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1709 pkt_len = msgdsize(mp);
1710 /* Make sure mp is 8 byte aligned */
1711 if (pkt_len > max_redir_hdr_data_len) {
1712 rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1713 sizeof (nd_opt_rd_hdr_t))/8;
1714 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1715 } else {
1716 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1717 (void) adjmsg(mp, -(pkt_len % 8));
1718 }
1719 rdh->nd_opt_rh_reserved1 = 0;
1720 rdh->nd_opt_rh_reserved2 = 0;
1721 /* ipif_v6lcl_addr contains the link-local source address */
1722 srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1723
1724 /* Redirects sent by router, and router is global zone */
1725 ASSERT(ira->ira_zoneid == ALL_ZONES);
1726 ira->ira_zoneid = GLOBAL_ZONEID;
1727 icmp_pkt_v6(mp, buf, len, srcp, ira);
1728 kmem_free(buf, len);
1729 if (need_refrele)
1730 ill_refrele(ill);
1731 }
1732
1733
1734 /* Generate an ICMP time exceeded message. (May be called as writer.) */
1735 void
icmp_time_exceeded_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1736 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1737 ip_recv_attr_t *ira)
1738 {
1739 icmp6_t icmp6;
1740
1741 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1742 if (mp == NULL)
1743 return;
1744
1745 bzero(&icmp6, sizeof (icmp6_t));
1746 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1747 icmp6.icmp6_code = code;
1748 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1749 }
1750
1751 /*
1752 * Generate an ICMP unreachable message.
1753 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1754 * constructed by the caller.
1755 */
1756 void
icmp_unreachable_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1757 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1758 ip_recv_attr_t *ira)
1759 {
1760 icmp6_t icmp6;
1761
1762 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1763 if (mp == NULL)
1764 return;
1765
1766 bzero(&icmp6, sizeof (icmp6_t));
1767 icmp6.icmp6_type = ICMP6_DST_UNREACH;
1768 icmp6.icmp6_code = code;
1769 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1770 }
1771
1772 /*
1773 * Generate an ICMP pkt too big message.
1774 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1775 * constructed by the caller.
1776 */
1777 void
icmp_pkt2big_v6(mblk_t * mp,uint32_t mtu,boolean_t mcast_ok,ip_recv_attr_t * ira)1778 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1779 ip_recv_attr_t *ira)
1780 {
1781 icmp6_t icmp6;
1782
1783 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1784 if (mp == NULL)
1785 return;
1786
1787 bzero(&icmp6, sizeof (icmp6_t));
1788 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1789 icmp6.icmp6_code = 0;
1790 icmp6.icmp6_mtu = htonl(mtu);
1791
1792 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1793 }
1794
1795 /*
1796 * Generate an ICMP parameter problem message. (May be called as writer.)
1797 * 'offset' is the offset from the beginning of the packet in error.
1798 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1799 * constructed by the caller.
1800 */
1801 static void
icmp_param_problem_v6(mblk_t * mp,uint8_t code,uint32_t offset,boolean_t mcast_ok,ip_recv_attr_t * ira)1802 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1803 boolean_t mcast_ok, ip_recv_attr_t *ira)
1804 {
1805 icmp6_t icmp6;
1806
1807 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1808 if (mp == NULL)
1809 return;
1810
1811 bzero((char *)&icmp6, sizeof (icmp6_t));
1812 icmp6.icmp6_type = ICMP6_PARAM_PROB;
1813 icmp6.icmp6_code = code;
1814 icmp6.icmp6_pptr = htonl(offset);
1815 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1816 }
1817
1818 void
icmp_param_problem_nexthdr_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1819 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1820 ip_recv_attr_t *ira)
1821 {
1822 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1823 uint16_t hdr_length;
1824 uint8_t *nexthdrp;
1825 uint32_t offset;
1826 ill_t *ill = ira->ira_ill;
1827
1828 /* Determine the offset of the bad nexthdr value */
1829 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1830 /* Malformed packet */
1831 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1832 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1833 freemsg(mp);
1834 return;
1835 }
1836
1837 offset = nexthdrp - mp->b_rptr;
1838 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1839 mcast_ok, ira);
1840 }
1841
1842 /*
1843 * Verify whether or not the IP address is a valid local address.
1844 * Could be a unicast, including one for a down interface.
1845 * If allow_mcbc then a multicast or broadcast address is also
1846 * acceptable.
1847 *
1848 * In the case of a multicast address, however, the
1849 * upper protocol is expected to reset the src address
1850 * to zero when we return IPVL_MCAST so that
1851 * no packets are emitted with multicast address as
1852 * source address.
1853 * The addresses valid for bind are:
1854 * (1) - in6addr_any
1855 * (2) - IP address of an UP interface
1856 * (3) - IP address of a DOWN interface
1857 * (4) - a multicast address. In this case
1858 * the conn will only receive packets destined to
1859 * the specified multicast address. Note: the
1860 * application still has to issue an
1861 * IPV6_JOIN_GROUP socket option.
1862 *
1863 * In all the above cases, the bound address must be valid in the current zone.
1864 * When the address is loopback or multicast, there might be many matching IREs
1865 * so bind has to look up based on the zone.
1866 */
1867 ip_laddr_t
ip_laddr_verify_v6(const in6_addr_t * v6src,zoneid_t zoneid,ip_stack_t * ipst,boolean_t allow_mcbc,uint_t scopeid)1868 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1869 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1870 {
1871 ire_t *src_ire;
1872 uint_t match_flags;
1873 ill_t *ill = NULL;
1874
1875 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1876 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1877
1878 match_flags = MATCH_IRE_ZONEONLY;
1879 if (scopeid != 0) {
1880 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1881 if (ill == NULL)
1882 return (IPVL_BAD);
1883 match_flags |= MATCH_IRE_ILL;
1884 }
1885
1886 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1887 ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1888 if (ill != NULL)
1889 ill_refrele(ill);
1890
1891 /*
1892 * If an address other than in6addr_any is requested,
1893 * we verify that it is a valid address for bind
1894 * Note: Following code is in if-else-if form for
1895 * readability compared to a condition check.
1896 */
1897 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1898 /*
1899 * (2) Bind to address of local UP interface
1900 */
1901 ire_refrele(src_ire);
1902 return (IPVL_UNICAST_UP);
1903 } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1904 /* (4) bind to multicast address. */
1905 if (src_ire != NULL)
1906 ire_refrele(src_ire);
1907
1908 /*
1909 * Note: caller should take IPV6_MULTICAST_IF
1910 * into account when selecting a real source address.
1911 */
1912 if (allow_mcbc)
1913 return (IPVL_MCAST);
1914 else
1915 return (IPVL_BAD);
1916 } else {
1917 ipif_t *ipif;
1918
1919 /*
1920 * (3) Bind to address of local DOWN interface?
1921 * (ipif_lookup_addr() looks up all interfaces
1922 * but we do not get here for UP interfaces
1923 * - case (2) above)
1924 */
1925 if (src_ire != NULL)
1926 ire_refrele(src_ire);
1927
1928 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1929 if (ipif == NULL)
1930 return (IPVL_BAD);
1931
1932 /* Not a useful source? */
1933 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1934 ipif_refrele(ipif);
1935 return (IPVL_BAD);
1936 }
1937 ipif_refrele(ipif);
1938 return (IPVL_UNICAST_DOWN);
1939 }
1940 }
1941
1942 /*
1943 * Verify that both the source and destination addresses are valid. If
1944 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1945 * i.e. have no route to it. Protocols like TCP want to verify destination
1946 * reachability, while tunnels do not.
1947 *
1948 * Determine the route, the interface, and (optionally) the source address
1949 * to use to reach a given destination.
1950 * Note that we allow connect to broadcast and multicast addresses when
1951 * IPDF_ALLOW_MCBC is set.
1952 * first_hop and dst_addr are normally the same, but if source routing
1953 * they will differ; in that case the first_hop is what we'll use for the
1954 * routing lookup but the dce and label checks will be done on dst_addr,
1955 *
1956 * If uinfo is set, then we fill in the best available information
1957 * we have for the destination. This is based on (in priority order) any
1958 * metrics and path MTU stored in a dce_t, route metrics, and finally the
1959 * ill_mtu/ill_mc_mtu.
1960 *
1961 * Tsol note: If we have a source route then dst_addr != firsthop. But we
1962 * always do the label check on dst_addr.
1963 *
1964 * Assumes that the caller has set ixa_scopeid for link-local communication.
1965 */
1966 int
ip_set_destination_v6(in6_addr_t * src_addrp,const in6_addr_t * dst_addr,const in6_addr_t * firsthop,ip_xmit_attr_t * ixa,iulp_t * uinfo,uint32_t flags,uint_t mac_mode)1967 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1968 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1969 uint32_t flags, uint_t mac_mode)
1970 {
1971 ire_t *ire;
1972 int error = 0;
1973 in6_addr_t setsrc; /* RTF_SETSRC */
1974 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */
1975 ip_stack_t *ipst = ixa->ixa_ipst;
1976 dce_t *dce;
1977 uint_t pmtu;
1978 uint_t ifindex;
1979 uint_t generation;
1980 nce_t *nce;
1981 ill_t *ill = NULL;
1982 boolean_t multirt = B_FALSE;
1983
1984 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1985
1986 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1987
1988 /*
1989 * We never send to zero; the ULPs map it to the loopback address.
1990 * We can't allow it since we use zero to mean unitialized in some
1991 * places.
1992 */
1993 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1994
1995 if (is_system_labeled()) {
1996 ts_label_t *tsl = NULL;
1997
1998 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1999 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
2000 if (error != 0)
2001 return (error);
2002 if (tsl != NULL) {
2003 /* Update the label */
2004 ip_xmit_attr_replace_tsl(ixa, tsl);
2005 }
2006 }
2007
2008 setsrc = ipv6_all_zeros;
2009 /*
2010 * Select a route; For IPMP interfaces, we would only select
2011 * a "hidden" route (i.e., going through a specific under_ill)
2012 * if ixa_ifindex has been specified.
2013 */
2014 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2015 &setsrc, &error, &multirt);
2016 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
2017 if (error != 0)
2018 goto bad_addr;
2019
2020 /*
2021 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2022 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2023 * Otherwise the destination needn't be reachable.
2024 *
2025 * If we match on a reject or black hole, then we've got a
2026 * local failure. May as well fail out the connect() attempt,
2027 * since it's never going to succeed.
2028 */
2029 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2030 /*
2031 * If we're verifying destination reachability, we always want
2032 * to complain here.
2033 *
2034 * If we're not verifying destination reachability but the
2035 * destination has a route, we still want to fail on the
2036 * temporary address and broadcast address tests.
2037 *
2038 * In both cases do we let the code continue so some reasonable
2039 * information is returned to the caller. That enables the
2040 * caller to use (and even cache) the IRE. conn_ip_ouput will
2041 * use the generation mismatch path to check for the unreachable
2042 * case thereby avoiding any specific check in the main path.
2043 */
2044 ASSERT(generation == IRE_GENERATION_VERIFY);
2045 if (flags & IPDF_VERIFY_DST) {
2046 /*
2047 * Set errno but continue to set up ixa_ire to be
2048 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2049 * That allows callers to use ip_output to get an
2050 * ICMP error back.
2051 */
2052 if (!(ire->ire_type & IRE_HOST))
2053 error = ENETUNREACH;
2054 else
2055 error = EHOSTUNREACH;
2056 }
2057 }
2058
2059 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2060 !(flags & IPDF_ALLOW_MCBC)) {
2061 ire_refrele(ire);
2062 ire = ire_reject(ipst, B_FALSE);
2063 generation = IRE_GENERATION_VERIFY;
2064 error = ENETUNREACH;
2065 }
2066
2067 /* Cache things */
2068 if (ixa->ixa_ire != NULL)
2069 ire_refrele_notr(ixa->ixa_ire);
2070 #ifdef DEBUG
2071 ire_refhold_notr(ire);
2072 ire_refrele(ire);
2073 #endif
2074 ixa->ixa_ire = ire;
2075 ixa->ixa_ire_generation = generation;
2076
2077 /*
2078 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2079 * since some callers will send a packet to conn_ip_output() even if
2080 * there's an error.
2081 */
2082 ifindex = 0;
2083 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2084 /* If we are creating a DCE we'd better have an ifindex */
2085 if (ill != NULL)
2086 ifindex = ill->ill_phyint->phyint_ifindex;
2087 else
2088 flags &= ~IPDF_UNIQUE_DCE;
2089 }
2090
2091 if (flags & IPDF_UNIQUE_DCE) {
2092 /* Fallback to the default dce if allocation fails */
2093 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2094 if (dce != NULL) {
2095 generation = dce->dce_generation;
2096 } else {
2097 dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2098 &generation);
2099 }
2100 } else {
2101 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2102 }
2103 ASSERT(dce != NULL);
2104 if (ixa->ixa_dce != NULL)
2105 dce_refrele_notr(ixa->ixa_dce);
2106 #ifdef DEBUG
2107 dce_refhold_notr(dce);
2108 dce_refrele(dce);
2109 #endif
2110 ixa->ixa_dce = dce;
2111 ixa->ixa_dce_generation = generation;
2112
2113
2114 /*
2115 * For multicast with multirt we have a flag passed back from
2116 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2117 * possible multicast address.
2118 * We also need a flag for multicast since we can't check
2119 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2120 */
2121 if (multirt) {
2122 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2123 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2124 } else {
2125 ixa->ixa_postfragfn = ire->ire_postfragfn;
2126 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2127 }
2128 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2129 /* Get an nce to cache. */
2130 nce = ire_to_nce(ire, NULL, firsthop);
2131 if (nce == NULL) {
2132 /* Allocation failure? */
2133 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2134 } else {
2135 if (ixa->ixa_nce != NULL)
2136 nce_refrele(ixa->ixa_nce);
2137 ixa->ixa_nce = nce;
2138 }
2139 }
2140
2141 /*
2142 * If the source address is a loopback address, the
2143 * destination had best be local or multicast.
2144 * If we are sending to an IRE_LOCAL using a loopback source then
2145 * it had better be the same zoneid.
2146 */
2147 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2148 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2149 ire = NULL; /* Stored in ixa_ire */
2150 error = EADDRNOTAVAIL;
2151 goto bad_addr;
2152 }
2153 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2154 ire = NULL; /* Stored in ixa_ire */
2155 error = EADDRNOTAVAIL;
2156 goto bad_addr;
2157 }
2158 }
2159
2160 /*
2161 * Does the caller want us to pick a source address?
2162 */
2163 if (flags & IPDF_SELECT_SRC) {
2164 in6_addr_t src_addr;
2165
2166 /*
2167 * We use use ire_nexthop_ill to avoid the under ipmp
2168 * interface for source address selection. Note that for ipmp
2169 * probe packets, ixa_ifindex would have been specified, and
2170 * the ip_select_route() invocation would have picked an ire
2171 * will ire_ill pointing at an under interface.
2172 */
2173 ill = ire_nexthop_ill(ire);
2174
2175 /* If unreachable we have no ill but need some source */
2176 if (ill == NULL) {
2177 src_addr = ipv6_loopback;
2178 /* Make sure we look for a better source address */
2179 generation = SRC_GENERATION_VERIFY;
2180 } else {
2181 error = ip_select_source_v6(ill, &setsrc, dst_addr,
2182 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2183 &src_addr, &generation, NULL);
2184 if (error != 0) {
2185 ire = NULL; /* Stored in ixa_ire */
2186 goto bad_addr;
2187 }
2188 }
2189
2190 /*
2191 * We allow the source address to to down.
2192 * However, we check that we don't use the loopback address
2193 * as a source when sending out on the wire.
2194 */
2195 if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2196 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2197 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2198 ire = NULL; /* Stored in ixa_ire */
2199 error = EADDRNOTAVAIL;
2200 goto bad_addr;
2201 }
2202
2203 *src_addrp = src_addr;
2204 ixa->ixa_src_generation = generation;
2205 }
2206
2207 /*
2208 * Make sure we don't leave an unreachable ixa_nce in place
2209 * since ip_select_route is used when we unplumb i.e., remove
2210 * references on ixa_ire, ixa_nce, and ixa_dce.
2211 */
2212 nce = ixa->ixa_nce;
2213 if (nce != NULL && nce->nce_is_condemned) {
2214 nce_refrele(nce);
2215 ixa->ixa_nce = NULL;
2216 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2217 }
2218
2219 /*
2220 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2221 * multicast. But pmtu discovery is only enabled for connected
2222 * sockets in general.
2223 */
2224
2225 /*
2226 * Set initial value for fragmentation limit. Either conn_ip_output
2227 * or ULP might updates it when there are routing changes.
2228 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2229 */
2230 pmtu = ip_get_pmtu(ixa);
2231 ixa->ixa_fragsize = pmtu;
2232 /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2233 if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2234 ixa->ixa_pmtu = pmtu;
2235
2236 /*
2237 * Extract information useful for some transports.
2238 * First we look for DCE metrics. Then we take what we have in
2239 * the metrics in the route, where the offlink is used if we have
2240 * one.
2241 */
2242 if (uinfo != NULL) {
2243 bzero(uinfo, sizeof (*uinfo));
2244
2245 if (dce->dce_flags & DCEF_UINFO)
2246 *uinfo = dce->dce_uinfo;
2247
2248 rts_merge_metrics(uinfo, &ire->ire_metrics);
2249
2250 /* Allow ire_metrics to decrease the path MTU from above */
2251 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2252 uinfo->iulp_mtu = pmtu;
2253
2254 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2255 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2256 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2257 }
2258
2259 if (ill != NULL)
2260 ill_refrele(ill);
2261
2262 return (error);
2263
2264 bad_addr:
2265 if (ire != NULL)
2266 ire_refrele(ire);
2267
2268 if (ill != NULL)
2269 ill_refrele(ill);
2270
2271 /*
2272 * Make sure we don't leave an unreachable ixa_nce in place
2273 * since ip_select_route is used when we unplumb i.e., remove
2274 * references on ixa_ire, ixa_nce, and ixa_dce.
2275 */
2276 nce = ixa->ixa_nce;
2277 if (nce != NULL && nce->nce_is_condemned) {
2278 nce_refrele(nce);
2279 ixa->ixa_nce = NULL;
2280 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2281 }
2282
2283 return (error);
2284 }
2285
2286 /*
2287 * Handle protocols with which IP is less intimate. There
2288 * can be more than one stream bound to a particular
2289 * protocol. When this is the case, normally each one gets a copy
2290 * of any incoming packets.
2291 *
2292 * Zones notes:
2293 * Packets will be distributed to conns in all zones. This is really only
2294 * useful for ICMPv6 as only applications in the global zone can create raw
2295 * sockets for other protocols.
2296 */
2297 void
ip_fanout_proto_v6(mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)2298 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2299 {
2300 mblk_t *mp1;
2301 in6_addr_t laddr = ip6h->ip6_dst;
2302 conn_t *connp, *first_connp, *next_connp;
2303 connf_t *connfp;
2304 ill_t *ill = ira->ira_ill;
2305 ip_stack_t *ipst = ill->ill_ipst;
2306
2307 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2308 mutex_enter(&connfp->connf_lock);
2309 connp = connfp->connf_head;
2310 for (connp = connfp->connf_head; connp != NULL;
2311 connp = connp->conn_next) {
2312 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2313 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2314 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2315 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2316 break;
2317 }
2318
2319 if (connp == NULL) {
2320 /*
2321 * No one bound to this port. Is
2322 * there a client that wants all
2323 * unclaimed datagrams?
2324 */
2325 mutex_exit(&connfp->connf_lock);
2326 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2327 ICMP6_PARAMPROB_NEXTHEADER, ira);
2328 return;
2329 }
2330
2331 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2332
2333 CONN_INC_REF(connp);
2334 first_connp = connp;
2335
2336 /*
2337 * XXX: Fix the multiple protocol listeners case. We should not
2338 * be walking the conn->conn_next list here.
2339 */
2340 connp = connp->conn_next;
2341 for (;;) {
2342 while (connp != NULL) {
2343 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2344 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2345 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2346 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2347 ira, connp)))
2348 break;
2349 connp = connp->conn_next;
2350 }
2351
2352 if (connp == NULL) {
2353 /* No more interested clients */
2354 connp = first_connp;
2355 break;
2356 }
2357 if (((mp1 = dupmsg(mp)) == NULL) &&
2358 ((mp1 = copymsg(mp)) == NULL)) {
2359 /* Memory allocation failed */
2360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2361 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2362 connp = first_connp;
2363 break;
2364 }
2365
2366 CONN_INC_REF(connp);
2367 mutex_exit(&connfp->connf_lock);
2368
2369 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2370 ira);
2371
2372 mutex_enter(&connfp->connf_lock);
2373 /* Follow the next pointer before releasing the conn. */
2374 next_connp = connp->conn_next;
2375 CONN_DEC_REF(connp);
2376 connp = next_connp;
2377 }
2378
2379 /* Last one. Send it upstream. */
2380 mutex_exit(&connfp->connf_lock);
2381
2382 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2383
2384 CONN_DEC_REF(connp);
2385 }
2386
2387 /*
2388 * Called when it is conceptually a ULP that would sent the packet
2389 * e.g., port unreachable and nexthdr unknown. Check that the packet
2390 * would have passed the IPsec global policy before sending the error.
2391 *
2392 * Send an ICMP error after patching up the packet appropriately.
2393 * Uses ip_drop_input and bumps the appropriate MIB.
2394 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2395 */
2396 void
ip_fanout_send_icmp_v6(mblk_t * mp,uint_t icmp_type,uint8_t icmp_code,ip_recv_attr_t * ira)2397 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2398 ip_recv_attr_t *ira)
2399 {
2400 ip6_t *ip6h;
2401 boolean_t secure;
2402 ill_t *ill = ira->ira_ill;
2403 ip_stack_t *ipst = ill->ill_ipst;
2404 netstack_t *ns = ipst->ips_netstack;
2405 ipsec_stack_t *ipss = ns->netstack_ipsec;
2406
2407 secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2408
2409 /*
2410 * We are generating an icmp error for some inbound packet.
2411 * Called from all ip_fanout_(udp, tcp, proto) functions.
2412 * Before we generate an error, check with global policy
2413 * to see whether this is allowed to enter the system. As
2414 * there is no "conn", we are checking with global policy.
2415 */
2416 ip6h = (ip6_t *)mp->b_rptr;
2417 if (secure || ipss->ipsec_inbound_v6_policy_present) {
2418 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2419 if (mp == NULL)
2420 return;
2421 }
2422
2423 /* We never send errors for protocols that we do implement */
2424 if (ira->ira_protocol == IPPROTO_ICMPV6) {
2425 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2426 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2427 freemsg(mp);
2428 return;
2429 }
2430
2431 switch (icmp_type) {
2432 case ICMP6_DST_UNREACH:
2433 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2434
2435 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2436 ip_drop_input("ipIfStatsNoPorts", mp, ill);
2437
2438 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2439 break;
2440 case ICMP6_PARAM_PROB:
2441 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2442
2443 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2444 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2445
2446 /* Let the system determine the offset for this one */
2447 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2448 break;
2449 default:
2450 #ifdef DEBUG
2451 panic("ip_fanout_send_icmp_v6: wrong type");
2452 /*NOTREACHED*/
2453 #else
2454 freemsg(mp);
2455 break;
2456 #endif
2457 }
2458 }
2459
2460 /*
2461 * Fanout for UDP packets that are multicast or ICMP errors.
2462 * (Unicast fanout is handled in ip_input_v6.)
2463 *
2464 * If SO_REUSEADDR is set all multicast packets
2465 * will be delivered to all conns bound to the same port.
2466 *
2467 * Fanout for UDP packets.
2468 * The caller puts <fport, lport> in the ports parameter.
2469 * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2470 *
2471 * If SO_REUSEADDR is set all multicast and broadcast packets
2472 * will be delivered to all conns bound to the same port.
2473 *
2474 * Zones notes:
2475 * Earlier in ip_input on a system with multiple shared-IP zones we
2476 * duplicate the multicast and broadcast packets and send them up
2477 * with each explicit zoneid that exists on that ill.
2478 * This means that here we can match the zoneid with SO_ALLZONES being special.
2479 */
2480 void
ip_fanout_udp_multi_v6(mblk_t * mp,ip6_t * ip6h,uint16_t lport,uint16_t fport,ip_recv_attr_t * ira)2481 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2482 ip_recv_attr_t *ira)
2483 {
2484 in6_addr_t laddr;
2485 conn_t *connp;
2486 connf_t *connfp;
2487 in6_addr_t faddr;
2488 ill_t *ill = ira->ira_ill;
2489 ip_stack_t *ipst = ill->ill_ipst;
2490
2491 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2492
2493 laddr = ip6h->ip6_dst;
2494 faddr = ip6h->ip6_src;
2495
2496 /* Attempt to find a client stream based on destination port. */
2497 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2498 mutex_enter(&connfp->connf_lock);
2499 connp = connfp->connf_head;
2500 while (connp != NULL) {
2501 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2502 conn_wantpacket_v6(connp, ira, ip6h) &&
2503 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2504 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2505 break;
2506 connp = connp->conn_next;
2507 }
2508
2509 if (connp == NULL)
2510 goto notfound;
2511
2512 CONN_INC_REF(connp);
2513
2514 if (connp->conn_reuseaddr) {
2515 conn_t *first_connp = connp;
2516 conn_t *next_connp;
2517 mblk_t *mp1;
2518
2519 connp = connp->conn_next;
2520 for (;;) {
2521 while (connp != NULL) {
2522 if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2523 fport, faddr) &&
2524 conn_wantpacket_v6(connp, ira, ip6h) &&
2525 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2526 tsol_receive_local(mp, &laddr, IPV6_VERSION,
2527 ira, connp)))
2528 break;
2529 connp = connp->conn_next;
2530 }
2531 if (connp == NULL) {
2532 /* No more interested clients */
2533 connp = first_connp;
2534 break;
2535 }
2536 if (((mp1 = dupmsg(mp)) == NULL) &&
2537 ((mp1 = copymsg(mp)) == NULL)) {
2538 /* Memory allocation failed */
2539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2540 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2541 connp = first_connp;
2542 break;
2543 }
2544
2545 CONN_INC_REF(connp);
2546 mutex_exit(&connfp->connf_lock);
2547
2548 IP6_STAT(ipst, ip6_udp_fanmb);
2549 ip_fanout_udp_conn(connp, mp1, NULL,
2550 (ip6_t *)mp1->b_rptr, ira);
2551
2552 mutex_enter(&connfp->connf_lock);
2553 /* Follow the next pointer before releasing the conn. */
2554 next_connp = connp->conn_next;
2555 IP6_STAT(ipst, ip6_udp_fanmb);
2556 CONN_DEC_REF(connp);
2557 connp = next_connp;
2558 }
2559 }
2560
2561 /* Last one. Send it upstream. */
2562 mutex_exit(&connfp->connf_lock);
2563
2564 IP6_STAT(ipst, ip6_udp_fanmb);
2565 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2566 CONN_DEC_REF(connp);
2567 return;
2568
2569 notfound:
2570 mutex_exit(&connfp->connf_lock);
2571 /*
2572 * No one bound to this port. Is
2573 * there a client that wants all
2574 * unclaimed datagrams?
2575 */
2576 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2577 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2578 ip_fanout_proto_v6(mp, ip6h, ira);
2579 } else {
2580 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2581 ICMP6_DST_UNREACH_NOPORT, ira);
2582 }
2583 }
2584
2585 /*
2586 * int ip_find_hdr_v6()
2587 *
2588 * This routine is used by the upper layer protocols, iptun, and IPsec:
2589 * - Set extension header pointers to appropriate locations
2590 * - Determine IPv6 header length and return it
2591 * - Return a pointer to the last nexthdr value
2592 *
2593 * The caller must initialize ipp_fields.
2594 * The upper layer protocols normally set label_separate which makes the
2595 * routine put the TX label in ipp_label_v6. If this is not set then
2596 * the hop-by-hop options including the label are placed in ipp_hopopts.
2597 *
2598 * NOTE: If multiple extension headers of the same type are present,
2599 * ip_find_hdr_v6() will set the respective extension header pointers
2600 * to the first one that it encounters in the IPv6 header. It also
2601 * skips fragment headers. This routine deals with malformed packets
2602 * of various sorts in which case the returned length is up to the
2603 * malformed part.
2604 */
2605 int
ip_find_hdr_v6(mblk_t * mp,ip6_t * ip6h,boolean_t label_separate,ip_pkt_t * ipp,uint8_t * nexthdrp)2606 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2607 uint8_t *nexthdrp)
2608 {
2609 uint_t length, ehdrlen;
2610 uint8_t nexthdr;
2611 uint8_t *whereptr, *endptr;
2612 ip6_dest_t *tmpdstopts;
2613 ip6_rthdr_t *tmprthdr;
2614 ip6_hbh_t *tmphopopts;
2615 ip6_frag_t *tmpfraghdr;
2616
2617 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2618 ipp->ipp_hoplimit = ip6h->ip6_hops;
2619 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2620 ipp->ipp_addr = ip6h->ip6_dst;
2621
2622 length = IPV6_HDR_LEN;
2623 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2624 endptr = mp->b_wptr;
2625
2626 nexthdr = ip6h->ip6_nxt;
2627 while (whereptr < endptr) {
2628 /* Is there enough left for len + nexthdr? */
2629 if (whereptr + MIN_EHDR_LEN > endptr)
2630 goto done;
2631
2632 switch (nexthdr) {
2633 case IPPROTO_HOPOPTS: {
2634 /* We check for any CIPSO */
2635 uchar_t *secopt;
2636 boolean_t hbh_needed;
2637 uchar_t *after_secopt;
2638
2639 tmphopopts = (ip6_hbh_t *)whereptr;
2640 ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2641 if ((uchar_t *)tmphopopts + ehdrlen > endptr)
2642 goto done;
2643 nexthdr = tmphopopts->ip6h_nxt;
2644
2645 if (!label_separate) {
2646 secopt = NULL;
2647 after_secopt = whereptr;
2648 } else {
2649 /*
2650 * We have dropped packets with bad options in
2651 * ip6_input. No need to check return value
2652 * here.
2653 */
2654 (void) tsol_find_secopt_v6(whereptr, ehdrlen,
2655 &secopt, &after_secopt, &hbh_needed);
2656 }
2657 if (secopt != NULL && after_secopt - whereptr > 0) {
2658 ipp->ipp_fields |= IPPF_LABEL_V6;
2659 ipp->ipp_label_v6 = secopt;
2660 ipp->ipp_label_len_v6 = after_secopt - whereptr;
2661 } else {
2662 ipp->ipp_label_len_v6 = 0;
2663 after_secopt = whereptr;
2664 hbh_needed = B_TRUE;
2665 }
2666 /* return only 1st hbh */
2667 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2668 ipp->ipp_fields |= IPPF_HOPOPTS;
2669 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2670 ipp->ipp_hopoptslen = ehdrlen -
2671 ipp->ipp_label_len_v6;
2672 }
2673 break;
2674 }
2675 case IPPROTO_DSTOPTS:
2676 tmpdstopts = (ip6_dest_t *)whereptr;
2677 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2678 if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
2679 goto done;
2680 nexthdr = tmpdstopts->ip6d_nxt;
2681 /*
2682 * ipp_dstopts is set to the destination header after a
2683 * routing header.
2684 * Assume it is a post-rthdr destination header
2685 * and adjust when we find an rthdr.
2686 */
2687 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2688 ipp->ipp_fields |= IPPF_DSTOPTS;
2689 ipp->ipp_dstopts = tmpdstopts;
2690 ipp->ipp_dstoptslen = ehdrlen;
2691 }
2692 break;
2693 case IPPROTO_ROUTING:
2694 tmprthdr = (ip6_rthdr_t *)whereptr;
2695 ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2696 if ((uchar_t *)tmprthdr + ehdrlen > endptr)
2697 goto done;
2698 nexthdr = tmprthdr->ip6r_nxt;
2699 /* return only 1st rthdr */
2700 if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2701 ipp->ipp_fields |= IPPF_RTHDR;
2702 ipp->ipp_rthdr = tmprthdr;
2703 ipp->ipp_rthdrlen = ehdrlen;
2704 }
2705 /*
2706 * Make any destination header we've seen be a
2707 * pre-rthdr destination header.
2708 */
2709 if (ipp->ipp_fields & IPPF_DSTOPTS) {
2710 ipp->ipp_fields &= ~IPPF_DSTOPTS;
2711 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2712 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2713 ipp->ipp_dstopts = NULL;
2714 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2715 ipp->ipp_dstoptslen = 0;
2716 }
2717 break;
2718 case IPPROTO_FRAGMENT:
2719 tmpfraghdr = (ip6_frag_t *)whereptr;
2720 ehdrlen = sizeof (ip6_frag_t);
2721 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2722 goto done;
2723 nexthdr = tmpfraghdr->ip6f_nxt;
2724 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2725 ipp->ipp_fields |= IPPF_FRAGHDR;
2726 ipp->ipp_fraghdr = tmpfraghdr;
2727 ipp->ipp_fraghdrlen = ehdrlen;
2728 }
2729 break;
2730 case IPPROTO_NONE:
2731 default:
2732 goto done;
2733 }
2734 length += ehdrlen;
2735 whereptr += ehdrlen;
2736 }
2737 done:
2738 if (nexthdrp != NULL)
2739 *nexthdrp = nexthdr;
2740 return (length);
2741 }
2742
2743 /*
2744 * Try to determine where and what are the IPv6 header length and
2745 * pointer to nexthdr value for the upper layer protocol (or an
2746 * unknown next hdr).
2747 *
2748 * Parameters returns a pointer to the nexthdr value;
2749 * Must handle malformed packets of various sorts.
2750 * Function returns failure for malformed cases.
2751 */
2752 boolean_t
ip_hdr_length_nexthdr_v6(mblk_t * mp,ip6_t * ip6h,uint16_t * hdr_length_ptr,uint8_t ** nexthdrpp)2753 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2754 uint8_t **nexthdrpp)
2755 {
2756 uint16_t length;
2757 uint_t ehdrlen;
2758 uint8_t *nexthdrp;
2759 uint8_t *whereptr;
2760 uint8_t *endptr;
2761 ip6_dest_t *desthdr;
2762 ip6_rthdr_t *rthdr;
2763 ip6_frag_t *fraghdr;
2764
2765 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2766 length = IPV6_HDR_LEN;
2767 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2768 endptr = mp->b_wptr;
2769
2770 nexthdrp = &ip6h->ip6_nxt;
2771 while (whereptr < endptr) {
2772 /* Is there enough left for len + nexthdr? */
2773 if (whereptr + MIN_EHDR_LEN > endptr)
2774 break;
2775
2776 switch (*nexthdrp) {
2777 case IPPROTO_HOPOPTS:
2778 case IPPROTO_DSTOPTS:
2779 /* Assumes the headers are identical for hbh and dst */
2780 desthdr = (ip6_dest_t *)whereptr;
2781 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2782 if ((uchar_t *)desthdr + ehdrlen > endptr)
2783 return (B_FALSE);
2784 nexthdrp = &desthdr->ip6d_nxt;
2785 break;
2786 case IPPROTO_ROUTING:
2787 rthdr = (ip6_rthdr_t *)whereptr;
2788 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2789 if ((uchar_t *)rthdr + ehdrlen > endptr)
2790 return (B_FALSE);
2791 nexthdrp = &rthdr->ip6r_nxt;
2792 break;
2793 case IPPROTO_FRAGMENT:
2794 fraghdr = (ip6_frag_t *)whereptr;
2795 ehdrlen = sizeof (ip6_frag_t);
2796 if ((uchar_t *)&fraghdr[1] > endptr)
2797 return (B_FALSE);
2798 nexthdrp = &fraghdr->ip6f_nxt;
2799 break;
2800 case IPPROTO_NONE:
2801 /* No next header means we're finished */
2802 default:
2803 *hdr_length_ptr = length;
2804 *nexthdrpp = nexthdrp;
2805 return (B_TRUE);
2806 }
2807 length += ehdrlen;
2808 whereptr += ehdrlen;
2809 *hdr_length_ptr = length;
2810 *nexthdrpp = nexthdrp;
2811 }
2812 switch (*nexthdrp) {
2813 case IPPROTO_HOPOPTS:
2814 case IPPROTO_DSTOPTS:
2815 case IPPROTO_ROUTING:
2816 case IPPROTO_FRAGMENT:
2817 /*
2818 * If any know extension headers are still to be processed,
2819 * the packet's malformed (or at least all the IP header(s) are
2820 * not in the same mblk - and that should never happen.
2821 */
2822 return (B_FALSE);
2823
2824 default:
2825 /*
2826 * If we get here, we know that all of the IP headers were in
2827 * the same mblk, even if the ULP header is in the next mblk.
2828 */
2829 *hdr_length_ptr = length;
2830 *nexthdrpp = nexthdrp;
2831 return (B_TRUE);
2832 }
2833 }
2834
2835 /*
2836 * Return the length of the IPv6 related headers (including extension headers)
2837 * Returns a length even if the packet is malformed.
2838 */
2839 int
ip_hdr_length_v6(mblk_t * mp,ip6_t * ip6h)2840 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2841 {
2842 uint16_t hdr_len;
2843 uint8_t *nexthdrp;
2844
2845 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2846 return (hdr_len);
2847 }
2848
2849 /*
2850 * Parse and process any hop-by-hop or destination options.
2851 *
2852 * Assumes that q is an ill read queue so that ICMP errors for link-local
2853 * destinations are sent out the correct interface.
2854 *
2855 * Returns -1 if there was an error and mp has been consumed.
2856 * Returns 0 if no special action is needed.
2857 * Returns 1 if the packet contained a router alert option for this node
2858 * which is verified to be "interesting/known" for our implementation.
2859 *
2860 * XXX Note: In future as more hbh or dest options are defined,
2861 * it may be better to have different routines for hbh and dest
2862 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2863 * may have same value in different namespaces. Or is it same namespace ??
2864 * Current code checks for each opt_type (other than pads) if it is in
2865 * the expected nexthdr (hbh or dest)
2866 */
2867 int
ip_process_options_v6(mblk_t * mp,ip6_t * ip6h,uint8_t * optptr,uint_t optlen,uint8_t hdr_type,ip_recv_attr_t * ira)2868 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2869 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2870 {
2871 uint8_t opt_type;
2872 uint_t optused;
2873 int ret = 0;
2874 const char *errtype;
2875 ill_t *ill = ira->ira_ill;
2876 ip_stack_t *ipst = ill->ill_ipst;
2877
2878 while (optlen != 0) {
2879 opt_type = *optptr;
2880 if (opt_type == IP6OPT_PAD1) {
2881 optused = 1;
2882 } else {
2883 if (optlen < 2)
2884 goto bad_opt;
2885 errtype = "malformed";
2886 if (opt_type == ip6opt_ls) {
2887 optused = 2 + optptr[1];
2888 if (optused > optlen)
2889 goto bad_opt;
2890 } else switch (opt_type) {
2891 case IP6OPT_PADN:
2892 /*
2893 * Note:We don't verify that (N-2) pad octets
2894 * are zero as required by spec. Adhere to
2895 * "be liberal in what you accept..." part of
2896 * implementation philosophy (RFC791,RFC1122)
2897 */
2898 optused = 2 + optptr[1];
2899 if (optused > optlen)
2900 goto bad_opt;
2901 break;
2902
2903 case IP6OPT_JUMBO:
2904 if (hdr_type != IPPROTO_HOPOPTS)
2905 goto opt_error;
2906 goto opt_error; /* XXX Not implemented! */
2907
2908 case IP6OPT_ROUTER_ALERT: {
2909 struct ip6_opt_router *or;
2910
2911 if (hdr_type != IPPROTO_HOPOPTS)
2912 goto opt_error;
2913 optused = 2 + optptr[1];
2914 if (optused > optlen)
2915 goto bad_opt;
2916 or = (struct ip6_opt_router *)optptr;
2917 /* Check total length and alignment */
2918 if (optused != sizeof (*or) ||
2919 ((uintptr_t)or->ip6or_value & 0x1) != 0)
2920 goto opt_error;
2921 /* Check value */
2922 switch (*((uint16_t *)or->ip6or_value)) {
2923 case IP6_ALERT_MLD:
2924 case IP6_ALERT_RSVP:
2925 ret = 1;
2926 }
2927 break;
2928 }
2929 case IP6OPT_HOME_ADDRESS: {
2930 /*
2931 * Minimal support for the home address option
2932 * (which is required by all IPv6 nodes).
2933 * Implement by just swapping the home address
2934 * and source address.
2935 * XXX Note: this has IPsec implications since
2936 * AH needs to take this into account.
2937 * Also, when IPsec is used we need to ensure
2938 * that this is only processed once
2939 * in the received packet (to avoid swapping
2940 * back and forth).
2941 * NOTE:This option processing is considered
2942 * to be unsafe and prone to a denial of
2943 * service attack.
2944 * The current processing is not safe even with
2945 * IPsec secured IP packets. Since the home
2946 * address option processing requirement still
2947 * is in the IETF draft and in the process of
2948 * being redefined for its usage, it has been
2949 * decided to turn off the option by default.
2950 * If this section of code needs to be executed,
2951 * ndd variable ip6_ignore_home_address_opt
2952 * should be set to 0 at the user's own risk.
2953 */
2954 struct ip6_opt_home_address *oh;
2955 in6_addr_t tmp;
2956
2957 if (ipst->ips_ipv6_ignore_home_address_opt)
2958 goto opt_error;
2959
2960 if (hdr_type != IPPROTO_DSTOPTS)
2961 goto opt_error;
2962 optused = 2 + optptr[1];
2963 if (optused > optlen)
2964 goto bad_opt;
2965
2966 /*
2967 * We did this dest. opt the first time
2968 * around (i.e. before AH processing).
2969 * If we've done AH... stop now.
2970 */
2971 if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2972 ira->ira_ipsec_ah_sa != NULL)
2973 break;
2974
2975 oh = (struct ip6_opt_home_address *)optptr;
2976 /* Check total length and alignment */
2977 if (optused < sizeof (*oh) ||
2978 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2979 goto opt_error;
2980 /* Swap ip6_src and the home address */
2981 tmp = ip6h->ip6_src;
2982 /* XXX Note: only 8 byte alignment option */
2983 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2984 *(in6_addr_t *)oh->ip6oh_addr = tmp;
2985 break;
2986 }
2987
2988 case IP6OPT_TUNNEL_LIMIT:
2989 if (hdr_type != IPPROTO_DSTOPTS) {
2990 goto opt_error;
2991 }
2992 optused = 2 + optptr[1];
2993 if (optused > optlen) {
2994 goto bad_opt;
2995 }
2996 if (optused != 3) {
2997 goto opt_error;
2998 }
2999 break;
3000
3001 default:
3002 errtype = "unknown";
3003 /* FALLTHROUGH */
3004 opt_error:
3005 /* Determine which zone should send error */
3006 switch (IP6OPT_TYPE(opt_type)) {
3007 case IP6OPT_TYPE_SKIP:
3008 optused = 2 + optptr[1];
3009 if (optused > optlen)
3010 goto bad_opt;
3011 ip1dbg(("ip_process_options_v6: %s "
3012 "opt 0x%x skipped\n",
3013 errtype, opt_type));
3014 break;
3015 case IP6OPT_TYPE_DISCARD:
3016 ip1dbg(("ip_process_options_v6: %s "
3017 "opt 0x%x; packet dropped\n",
3018 errtype, opt_type));
3019 BUMP_MIB(ill->ill_ip_mib,
3020 ipIfStatsInHdrErrors);
3021 ip_drop_input("ipIfStatsInHdrErrors",
3022 mp, ill);
3023 freemsg(mp);
3024 return (-1);
3025 case IP6OPT_TYPE_ICMP:
3026 BUMP_MIB(ill->ill_ip_mib,
3027 ipIfStatsInHdrErrors);
3028 ip_drop_input("ipIfStatsInHdrErrors",
3029 mp, ill);
3030 icmp_param_problem_v6(mp,
3031 ICMP6_PARAMPROB_OPTION,
3032 (uint32_t)(optptr -
3033 (uint8_t *)ip6h),
3034 B_FALSE, ira);
3035 return (-1);
3036 case IP6OPT_TYPE_FORCEICMP:
3037 BUMP_MIB(ill->ill_ip_mib,
3038 ipIfStatsInHdrErrors);
3039 ip_drop_input("ipIfStatsInHdrErrors",
3040 mp, ill);
3041 icmp_param_problem_v6(mp,
3042 ICMP6_PARAMPROB_OPTION,
3043 (uint32_t)(optptr -
3044 (uint8_t *)ip6h),
3045 B_TRUE, ira);
3046 return (-1);
3047 default:
3048 ASSERT(0);
3049 }
3050 }
3051 }
3052 optlen -= optused;
3053 optptr += optused;
3054 }
3055 return (ret);
3056
3057 bad_opt:
3058 /* Determine which zone should send error */
3059 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3060 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3061 (uint32_t)(optptr - (uint8_t *)ip6h),
3062 B_FALSE, ira);
3063 return (-1);
3064 }
3065
3066 /*
3067 * Process a routing header that is not yet empty.
3068 * Because of RFC 5095, we now reject all route headers.
3069 */
3070 void
ip_process_rthdr(mblk_t * mp,ip6_t * ip6h,ip6_rthdr_t * rth,ip_recv_attr_t * ira)3071 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3072 ip_recv_attr_t *ira)
3073 {
3074 ill_t *ill = ira->ira_ill;
3075 ip_stack_t *ipst = ill->ill_ipst;
3076
3077 ASSERT(rth->ip6r_segleft != 0);
3078
3079 if (!ipst->ips_ipv6_forward_src_routed) {
3080 /* XXX Check for source routed out same interface? */
3081 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3082 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3083 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3084 freemsg(mp);
3085 return;
3086 }
3087
3088 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3089 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3090 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3091 B_FALSE, ira);
3092 }
3093
3094 /*
3095 * Read side put procedure for IPv6 module.
3096 */
3097 void
ip_rput_v6(queue_t * q,mblk_t * mp)3098 ip_rput_v6(queue_t *q, mblk_t *mp)
3099 {
3100 ill_t *ill;
3101
3102 ill = (ill_t *)q->q_ptr;
3103 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3104 union DL_primitives *dl;
3105
3106 dl = (union DL_primitives *)mp->b_rptr;
3107 /*
3108 * Things are opening or closing - only accept DLPI
3109 * ack messages. If the stream is closing and ip_wsrv
3110 * has completed, ip_close is out of the qwait, but has
3111 * not yet completed qprocsoff. Don't proceed any further
3112 * because the ill has been cleaned up and things hanging
3113 * off the ill have been freed.
3114 */
3115 if ((mp->b_datap->db_type != M_PCPROTO) ||
3116 (dl->dl_primitive == DL_UNITDATA_IND)) {
3117 inet_freemsg(mp);
3118 return;
3119 }
3120 }
3121 if (DB_TYPE(mp) == M_DATA) {
3122 struct mac_header_info_s mhi;
3123
3124 ip_mdata_to_mhi(ill, mp, &mhi);
3125 ip_input_v6(ill, NULL, mp, &mhi);
3126 } else {
3127 ip_rput_notdata(ill, mp);
3128 }
3129 }
3130
3131 /*
3132 * Walk through the IPv6 packet in mp and see if there's an AH header
3133 * in it. See if the AH header needs to get done before other headers in
3134 * the packet. (Worker function for ipsec_early_ah_v6().)
3135 */
3136 #define IPSEC_HDR_DONT_PROCESS 0
3137 #define IPSEC_HDR_PROCESS 1
3138 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
3139 static int
ipsec_needs_processing_v6(mblk_t * mp,uint8_t * nexthdr)3140 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3141 {
3142 uint_t length;
3143 uint_t ehdrlen;
3144 uint8_t *whereptr;
3145 uint8_t *endptr;
3146 uint8_t *nexthdrp;
3147 ip6_dest_t *desthdr;
3148 ip6_rthdr_t *rthdr;
3149 ip6_t *ip6h;
3150
3151 /*
3152 * For now just pullup everything. In general, the less pullups,
3153 * the better, but there's so much squirrelling through anyway,
3154 * it's just easier this way.
3155 */
3156 if (!pullupmsg(mp, -1)) {
3157 return (IPSEC_MEMORY_ERROR);
3158 }
3159
3160 ip6h = (ip6_t *)mp->b_rptr;
3161 length = IPV6_HDR_LEN;
3162 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3163 endptr = mp->b_wptr;
3164
3165 /*
3166 * We can't just use the argument nexthdr in the place
3167 * of nexthdrp becaue we don't dereference nexthdrp
3168 * till we confirm whether it is a valid address.
3169 */
3170 nexthdrp = &ip6h->ip6_nxt;
3171 while (whereptr < endptr) {
3172 /* Is there enough left for len + nexthdr? */
3173 if (whereptr + MIN_EHDR_LEN > endptr)
3174 return (IPSEC_MEMORY_ERROR);
3175
3176 switch (*nexthdrp) {
3177 case IPPROTO_HOPOPTS:
3178 case IPPROTO_DSTOPTS:
3179 /* Assumes the headers are identical for hbh and dst */
3180 desthdr = (ip6_dest_t *)whereptr;
3181 ehdrlen = 8 * (desthdr->ip6d_len + 1);
3182 if ((uchar_t *)desthdr + ehdrlen > endptr)
3183 return (IPSEC_MEMORY_ERROR);
3184 /*
3185 * Return DONT_PROCESS because the destination
3186 * options header may be for each hop in a
3187 * routing-header, and we only want AH if we're
3188 * finished with routing headers.
3189 */
3190 if (*nexthdrp == IPPROTO_DSTOPTS)
3191 return (IPSEC_HDR_DONT_PROCESS);
3192 nexthdrp = &desthdr->ip6d_nxt;
3193 break;
3194 case IPPROTO_ROUTING:
3195 rthdr = (ip6_rthdr_t *)whereptr;
3196
3197 /*
3198 * If there's more hops left on the routing header,
3199 * return now with DON'T PROCESS.
3200 */
3201 if (rthdr->ip6r_segleft > 0)
3202 return (IPSEC_HDR_DONT_PROCESS);
3203
3204 ehdrlen = 8 * (rthdr->ip6r_len + 1);
3205 if ((uchar_t *)rthdr + ehdrlen > endptr)
3206 return (IPSEC_MEMORY_ERROR);
3207 nexthdrp = &rthdr->ip6r_nxt;
3208 break;
3209 case IPPROTO_FRAGMENT:
3210 /* Wait for reassembly */
3211 return (IPSEC_HDR_DONT_PROCESS);
3212 case IPPROTO_AH:
3213 *nexthdr = IPPROTO_AH;
3214 return (IPSEC_HDR_PROCESS);
3215 case IPPROTO_NONE:
3216 /* No next header means we're finished */
3217 default:
3218 return (IPSEC_HDR_DONT_PROCESS);
3219 }
3220 length += ehdrlen;
3221 whereptr += ehdrlen;
3222 }
3223 /*
3224 * Malformed/truncated packet.
3225 */
3226 return (IPSEC_MEMORY_ERROR);
3227 }
3228
3229 /*
3230 * Path for AH if options are present.
3231 * Returns NULL if the mblk was consumed.
3232 *
3233 * Sometimes AH needs to be done before other IPv6 headers for security
3234 * reasons. This function (and its ipsec_needs_processing_v6() above)
3235 * indicates if that is so, and fans out to the appropriate IPsec protocol
3236 * for the datagram passed in.
3237 */
3238 mblk_t *
ipsec_early_ah_v6(mblk_t * mp,ip_recv_attr_t * ira)3239 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3240 {
3241 uint8_t nexthdr;
3242 ah_t *ah;
3243 ill_t *ill = ira->ira_ill;
3244 ip_stack_t *ipst = ill->ill_ipst;
3245 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
3246
3247 switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3248 case IPSEC_MEMORY_ERROR:
3249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3250 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3251 freemsg(mp);
3252 return (NULL);
3253 case IPSEC_HDR_DONT_PROCESS:
3254 return (mp);
3255 }
3256
3257 /* Default means send it to AH! */
3258 ASSERT(nexthdr == IPPROTO_AH);
3259
3260 if (!ipsec_loaded(ipss)) {
3261 ip_proto_not_sup(mp, ira);
3262 return (NULL);
3263 }
3264
3265 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3266 if (mp == NULL)
3267 return (NULL);
3268 ASSERT(ah != NULL);
3269 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3270 ASSERT(ira->ira_ipsec_ah_sa != NULL);
3271 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3272 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3273
3274 if (mp == NULL) {
3275 /*
3276 * Either it failed or is pending. In the former case
3277 * ipIfStatsInDiscards was increased.
3278 */
3279 return (NULL);
3280 }
3281
3282 /* we're done with IPsec processing, send it up */
3283 ip_input_post_ipsec(mp, ira);
3284 return (NULL);
3285 }
3286
3287 /*
3288 * Reassemble fragment.
3289 * When it returns a completed message the first mblk will only contain
3290 * the headers prior to the fragment header, with the nexthdr value updated
3291 * to be the header after the fragment header.
3292 */
3293 mblk_t *
ip_input_fragment_v6(mblk_t * mp,ip6_t * ip6h,ip6_frag_t * fraghdr,uint_t remlen,ip_recv_attr_t * ira)3294 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3295 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3296 {
3297 uint32_t ident = ntohl(fraghdr->ip6f_ident);
3298 uint16_t offset;
3299 boolean_t more_frags;
3300 uint8_t nexthdr = fraghdr->ip6f_nxt;
3301 in6_addr_t *v6dst_ptr;
3302 in6_addr_t *v6src_ptr;
3303 uint_t end;
3304 uint_t hdr_length;
3305 size_t count;
3306 ipf_t *ipf;
3307 ipf_t **ipfp;
3308 ipfb_t *ipfb;
3309 mblk_t *mp1;
3310 uint8_t ecn_info = 0;
3311 size_t msg_len;
3312 mblk_t *tail_mp;
3313 mblk_t *t_mp;
3314 boolean_t pruned = B_FALSE;
3315 uint32_t sum_val;
3316 uint16_t sum_flags;
3317 ill_t *ill = ira->ira_ill;
3318 ip_stack_t *ipst = ill->ill_ipst;
3319 uint_t prev_nexthdr_offset;
3320 uint8_t prev_nexthdr;
3321 uint8_t *ptr;
3322 uint32_t packet_size;
3323
3324 /*
3325 * We utilize hardware computed checksum info only for UDP since
3326 * IP fragmentation is a normal occurence for the protocol. In
3327 * addition, checksum offload support for IP fragments carrying
3328 * UDP payload is commonly implemented across network adapters.
3329 */
3330 ASSERT(ira->ira_rill != NULL);
3331 if (nexthdr == IPPROTO_UDP && dohwcksum &&
3332 ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3333 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3334 mblk_t *mp1 = mp->b_cont;
3335 int32_t len;
3336
3337 /* Record checksum information from the packet */
3338 sum_val = (uint32_t)DB_CKSUM16(mp);
3339 sum_flags = DB_CKSUMFLAGS(mp);
3340
3341 /* fragmented payload offset from beginning of mblk */
3342 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3343
3344 if ((sum_flags & HCK_PARTIALCKSUM) &&
3345 (mp1 == NULL || mp1->b_cont == NULL) &&
3346 offset >= DB_CKSUMSTART(mp) &&
3347 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3348 uint32_t adj;
3349 /*
3350 * Partial checksum has been calculated by hardware
3351 * and attached to the packet; in addition, any
3352 * prepended extraneous data is even byte aligned.
3353 * If any such data exists, we adjust the checksum;
3354 * this would also handle any postpended data.
3355 */
3356 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3357 mp, mp1, len, adj);
3358
3359 /* One's complement subtract extraneous checksum */
3360 if (adj >= sum_val)
3361 sum_val = ~(adj - sum_val) & 0xFFFF;
3362 else
3363 sum_val -= adj;
3364 }
3365 } else {
3366 sum_val = 0;
3367 sum_flags = 0;
3368 }
3369
3370 /* Clear hardware checksumming flag */
3371 DB_CKSUMFLAGS(mp) = 0;
3372
3373 /*
3374 * Determine the offset (from the begining of the IP header)
3375 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3376 * this when removing the fragment header from the packet.
3377 * This packet consists of the IPv6 header, a potential
3378 * hop-by-hop options header, a potential pre-routing-header
3379 * destination options header, and a potential routing header.
3380 */
3381 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3382 prev_nexthdr = ip6h->ip6_nxt;
3383 ptr = (uint8_t *)&ip6h[1];
3384
3385 if (prev_nexthdr == IPPROTO_HOPOPTS) {
3386 ip6_hbh_t *hbh_hdr;
3387 uint_t hdr_len;
3388
3389 hbh_hdr = (ip6_hbh_t *)ptr;
3390 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3391 prev_nexthdr = hbh_hdr->ip6h_nxt;
3392 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3393 - (uint8_t *)ip6h;
3394 ptr += hdr_len;
3395 }
3396 if (prev_nexthdr == IPPROTO_DSTOPTS) {
3397 ip6_dest_t *dest_hdr;
3398 uint_t hdr_len;
3399
3400 dest_hdr = (ip6_dest_t *)ptr;
3401 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3402 prev_nexthdr = dest_hdr->ip6d_nxt;
3403 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3404 - (uint8_t *)ip6h;
3405 ptr += hdr_len;
3406 }
3407 if (prev_nexthdr == IPPROTO_ROUTING) {
3408 ip6_rthdr_t *rthdr;
3409 uint_t hdr_len;
3410
3411 rthdr = (ip6_rthdr_t *)ptr;
3412 prev_nexthdr = rthdr->ip6r_nxt;
3413 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3414 - (uint8_t *)ip6h;
3415 hdr_len = 8 * (rthdr->ip6r_len + 1);
3416 ptr += hdr_len;
3417 }
3418 if (prev_nexthdr != IPPROTO_FRAGMENT) {
3419 /* Can't handle other headers before the fragment header */
3420 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3421 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3422 freemsg(mp);
3423 return (NULL);
3424 }
3425
3426 /*
3427 * Note: Fragment offset in header is in 8-octet units.
3428 * Clearing least significant 3 bits not only extracts
3429 * it but also gets it in units of octets.
3430 */
3431 offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3432 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3433
3434 /*
3435 * Is the more frags flag on and the payload length not a multiple
3436 * of eight?
3437 */
3438 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3439 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3440 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3441 (uint32_t)((char *)&ip6h->ip6_plen -
3442 (char *)ip6h), B_FALSE, ira);
3443 return (NULL);
3444 }
3445
3446 v6src_ptr = &ip6h->ip6_src;
3447 v6dst_ptr = &ip6h->ip6_dst;
3448 end = remlen;
3449
3450 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3451 end += offset;
3452
3453 /*
3454 * Would fragment cause reassembled packet to have a payload length
3455 * greater than IP_MAXPACKET - the max payload size?
3456 */
3457 if (end > IP_MAXPACKET) {
3458 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3459 ip_drop_input("Reassembled packet too large", mp, ill);
3460 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3461 (uint32_t)((char *)&fraghdr->ip6f_offlg -
3462 (char *)ip6h), B_FALSE, ira);
3463 return (NULL);
3464 }
3465
3466 /*
3467 * This packet just has one fragment. Reassembly not
3468 * needed.
3469 */
3470 if (!more_frags && offset == 0) {
3471 goto reass_done;
3472 }
3473
3474 /*
3475 * Drop the fragmented as early as possible, if
3476 * we don't have resource(s) to re-assemble.
3477 */
3478 if (ipst->ips_ip_reass_queue_bytes == 0) {
3479 freemsg(mp);
3480 return (NULL);
3481 }
3482
3483 /* Record the ECN field info. */
3484 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3485 /*
3486 * If this is not the first fragment, dump the unfragmentable
3487 * portion of the packet.
3488 */
3489 if (offset)
3490 mp->b_rptr = (uchar_t *)&fraghdr[1];
3491
3492 /*
3493 * Fragmentation reassembly. Each ILL has a hash table for
3494 * queueing packets undergoing reassembly for all IPIFs
3495 * associated with the ILL. The hash is based on the packet
3496 * IP ident field. The ILL frag hash table was allocated
3497 * as a timer block at the time the ILL was created. Whenever
3498 * there is anything on the reassembly queue, the timer will
3499 * be running.
3500 */
3501 /* Handle vnic loopback of fragments */
3502 if (mp->b_datap->db_ref > 2)
3503 msg_len = 0;
3504 else
3505 msg_len = MBLKSIZE(mp);
3506
3507 tail_mp = mp;
3508 while (tail_mp->b_cont != NULL) {
3509 tail_mp = tail_mp->b_cont;
3510 if (tail_mp->b_datap->db_ref <= 2)
3511 msg_len += MBLKSIZE(tail_mp);
3512 }
3513 /*
3514 * If the reassembly list for this ILL will get too big
3515 * prune it.
3516 */
3517
3518 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3519 ipst->ips_ip_reass_queue_bytes) {
3520 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3521 uint_t, ill->ill_frag_count,
3522 uint_t, ipst->ips_ip_reass_queue_bytes);
3523 ill_frag_prune(ill,
3524 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3525 (ipst->ips_ip_reass_queue_bytes - msg_len));
3526 pruned = B_TRUE;
3527 }
3528
3529 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3530 mutex_enter(&ipfb->ipfb_lock);
3531
3532 ipfp = &ipfb->ipfb_ipf;
3533 /* Try to find an existing fragment queue for this packet. */
3534 for (;;) {
3535 ipf = ipfp[0];
3536 if (ipf) {
3537 /*
3538 * It has to match on ident, source address, and
3539 * dest address.
3540 */
3541 if (ipf->ipf_ident == ident &&
3542 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3543 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3544
3545 /*
3546 * If we have received too many
3547 * duplicate fragments for this packet
3548 * free it.
3549 */
3550 if (ipf->ipf_num_dups > ip_max_frag_dups) {
3551 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3552 freemsg(mp);
3553 mutex_exit(&ipfb->ipfb_lock);
3554 return (NULL);
3555 }
3556
3557 break;
3558 }
3559 ipfp = &ipf->ipf_hash_next;
3560 continue;
3561 }
3562
3563
3564 /*
3565 * If we pruned the list, do we want to store this new
3566 * fragment?. We apply an optimization here based on the
3567 * fact that most fragments will be received in order.
3568 * So if the offset of this incoming fragment is zero,
3569 * it is the first fragment of a new packet. We will
3570 * keep it. Otherwise drop the fragment, as we have
3571 * probably pruned the packet already (since the
3572 * packet cannot be found).
3573 */
3574
3575 if (pruned && offset != 0) {
3576 mutex_exit(&ipfb->ipfb_lock);
3577 freemsg(mp);
3578 return (NULL);
3579 }
3580
3581 /* New guy. Allocate a frag message. */
3582 mp1 = allocb(sizeof (*ipf), BPRI_MED);
3583 if (!mp1) {
3584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3585 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3586 freemsg(mp);
3587 partial_reass_done:
3588 mutex_exit(&ipfb->ipfb_lock);
3589 return (NULL);
3590 }
3591
3592 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) {
3593 /*
3594 * Too many fragmented packets in this hash bucket.
3595 * Free the oldest.
3596 */
3597 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3598 }
3599
3600 mp1->b_cont = mp;
3601
3602 /* Initialize the fragment header. */
3603 ipf = (ipf_t *)mp1->b_rptr;
3604 ipf->ipf_mp = mp1;
3605 ipf->ipf_ptphn = ipfp;
3606 ipfp[0] = ipf;
3607 ipf->ipf_hash_next = NULL;
3608 ipf->ipf_ident = ident;
3609 ipf->ipf_v6src = *v6src_ptr;
3610 ipf->ipf_v6dst = *v6dst_ptr;
3611 /* Record reassembly start time. */
3612 ipf->ipf_timestamp = gethrestime_sec();
3613 /* Record ipf generation and account for frag header */
3614 ipf->ipf_gen = ill->ill_ipf_gen++;
3615 ipf->ipf_count = MBLKSIZE(mp1);
3616 ipf->ipf_protocol = nexthdr;
3617 ipf->ipf_nf_hdr_len = 0;
3618 ipf->ipf_prev_nexthdr_offset = 0;
3619 ipf->ipf_last_frag_seen = B_FALSE;
3620 ipf->ipf_ecn = ecn_info;
3621 ipf->ipf_num_dups = 0;
3622 ipfb->ipfb_frag_pkts++;
3623 ipf->ipf_checksum = 0;
3624 ipf->ipf_checksum_flags = 0;
3625
3626 /* Store checksum value in fragment header */
3627 if (sum_flags != 0) {
3628 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3629 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3630 ipf->ipf_checksum = sum_val;
3631 ipf->ipf_checksum_flags = sum_flags;
3632 }
3633
3634 /*
3635 * We handle reassembly two ways. In the easy case,
3636 * where all the fragments show up in order, we do
3637 * minimal bookkeeping, and just clip new pieces on
3638 * the end. If we ever see a hole, then we go off
3639 * to ip_reassemble which has to mark the pieces and
3640 * keep track of the number of holes, etc. Obviously,
3641 * the point of having both mechanisms is so we can
3642 * handle the easy case as efficiently as possible.
3643 */
3644 if (offset == 0) {
3645 /* Easy case, in-order reassembly so far. */
3646 /* Update the byte count */
3647 ipf->ipf_count += msg_len;
3648 ipf->ipf_tail_mp = tail_mp;
3649 /*
3650 * Keep track of next expected offset in
3651 * ipf_end.
3652 */
3653 ipf->ipf_end = end;
3654 ipf->ipf_nf_hdr_len = hdr_length;
3655 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3656 } else {
3657 /* Hard case, hole at the beginning. */
3658 ipf->ipf_tail_mp = NULL;
3659 /*
3660 * ipf_end == 0 means that we have given up
3661 * on easy reassembly.
3662 */
3663 ipf->ipf_end = 0;
3664
3665 /* Forget checksum offload from now on */
3666 ipf->ipf_checksum_flags = 0;
3667
3668 /*
3669 * ipf_hole_cnt is set by ip_reassemble.
3670 * ipf_count is updated by ip_reassemble.
3671 * No need to check for return value here
3672 * as we don't expect reassembly to complete or
3673 * fail for the first fragment itself.
3674 */
3675 (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3676 msg_len);
3677 }
3678 /* Update per ipfb and ill byte counts */
3679 ipfb->ipfb_count += ipf->ipf_count;
3680 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3681 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3682 /* If the frag timer wasn't already going, start it. */
3683 mutex_enter(&ill->ill_lock);
3684 ill_frag_timer_start(ill);
3685 mutex_exit(&ill->ill_lock);
3686 goto partial_reass_done;
3687 }
3688
3689 /*
3690 * If the packet's flag has changed (it could be coming up
3691 * from an interface different than the previous, therefore
3692 * possibly different checksum capability), then forget about
3693 * any stored checksum states. Otherwise add the value to
3694 * the existing one stored in the fragment header.
3695 */
3696 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3697 sum_val += ipf->ipf_checksum;
3698 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3699 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3700 ipf->ipf_checksum = sum_val;
3701 } else if (ipf->ipf_checksum_flags != 0) {
3702 /* Forget checksum offload from now on */
3703 ipf->ipf_checksum_flags = 0;
3704 }
3705
3706 /*
3707 * We have a new piece of a datagram which is already being
3708 * reassembled. Update the ECN info if all IP fragments
3709 * are ECN capable. If there is one which is not, clear
3710 * all the info. If there is at least one which has CE
3711 * code point, IP needs to report that up to transport.
3712 */
3713 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3714 if (ecn_info == IPH_ECN_CE)
3715 ipf->ipf_ecn = IPH_ECN_CE;
3716 } else {
3717 ipf->ipf_ecn = IPH_ECN_NECT;
3718 }
3719
3720 if (offset && ipf->ipf_end == offset) {
3721 /* The new fragment fits at the end */
3722 ipf->ipf_tail_mp->b_cont = mp;
3723 /* Update the byte count */
3724 ipf->ipf_count += msg_len;
3725 /* Update per ipfb and ill byte counts */
3726 ipfb->ipfb_count += msg_len;
3727 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3728 atomic_add_32(&ill->ill_frag_count, msg_len);
3729 if (more_frags) {
3730 /* More to come. */
3731 ipf->ipf_end = end;
3732 ipf->ipf_tail_mp = tail_mp;
3733 goto partial_reass_done;
3734 }
3735 } else {
3736 /*
3737 * Go do the hard cases.
3738 * Call ip_reassemble().
3739 */
3740 int ret;
3741
3742 if (offset == 0) {
3743 if (ipf->ipf_prev_nexthdr_offset == 0) {
3744 ipf->ipf_nf_hdr_len = hdr_length;
3745 ipf->ipf_prev_nexthdr_offset =
3746 prev_nexthdr_offset;
3747 }
3748 }
3749 /* Save current byte count */
3750 count = ipf->ipf_count;
3751 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3752
3753 /* Count of bytes added and subtracted (freeb()ed) */
3754 count = ipf->ipf_count - count;
3755 if (count) {
3756 /* Update per ipfb and ill byte counts */
3757 ipfb->ipfb_count += count;
3758 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3759 atomic_add_32(&ill->ill_frag_count, count);
3760 }
3761 if (ret == IP_REASS_PARTIAL) {
3762 goto partial_reass_done;
3763 } else if (ret == IP_REASS_FAILED) {
3764 /* Reassembly failed. Free up all resources */
3765 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3766 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3767 IP_REASS_SET_START(t_mp, 0);
3768 IP_REASS_SET_END(t_mp, 0);
3769 }
3770 freemsg(mp);
3771 goto partial_reass_done;
3772 }
3773
3774 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3775 }
3776 /*
3777 * We have completed reassembly. Unhook the frag header from
3778 * the reassembly list.
3779 *
3780 * Grab the unfragmentable header length next header value out
3781 * of the first fragment
3782 */
3783 ASSERT(ipf->ipf_nf_hdr_len != 0);
3784 hdr_length = ipf->ipf_nf_hdr_len;
3785
3786 /*
3787 * Before we free the frag header, record the ECN info
3788 * to report back to the transport.
3789 */
3790 ecn_info = ipf->ipf_ecn;
3791
3792 /*
3793 * Store the nextheader field in the header preceding the fragment
3794 * header
3795 */
3796 nexthdr = ipf->ipf_protocol;
3797 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3798 ipfp = ipf->ipf_ptphn;
3799
3800 /* We need to supply these to caller */
3801 if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3802 sum_val = ipf->ipf_checksum;
3803 else
3804 sum_val = 0;
3805
3806 mp1 = ipf->ipf_mp;
3807 count = ipf->ipf_count;
3808 ipf = ipf->ipf_hash_next;
3809 if (ipf)
3810 ipf->ipf_ptphn = ipfp;
3811 ipfp[0] = ipf;
3812 atomic_add_32(&ill->ill_frag_count, -count);
3813 ASSERT(ipfb->ipfb_count >= count);
3814 ipfb->ipfb_count -= count;
3815 ipfb->ipfb_frag_pkts--;
3816 mutex_exit(&ipfb->ipfb_lock);
3817 /* Ditch the frag header. */
3818 mp = mp1->b_cont;
3819 freeb(mp1);
3820
3821 /*
3822 * Make sure the packet is good by doing some sanity
3823 * check. If bad we can silentely drop the packet.
3824 */
3825 reass_done:
3826 if (hdr_length < sizeof (ip6_frag_t)) {
3827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3828 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3829 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3830 freemsg(mp);
3831 return (NULL);
3832 }
3833
3834 /*
3835 * Remove the fragment header from the initial header by
3836 * splitting the mblk into the non-fragmentable header and
3837 * everthing after the fragment extension header. This has the
3838 * side effect of putting all the headers that need destination
3839 * processing into the b_cont block-- on return this fact is
3840 * used in order to avoid having to look at the extensions
3841 * already processed.
3842 *
3843 * Note that this code assumes that the unfragmentable portion
3844 * of the header is in the first mblk and increments
3845 * the read pointer past it. If this assumption is broken
3846 * this code fails badly.
3847 */
3848 if (mp->b_rptr + hdr_length != mp->b_wptr) {
3849 mblk_t *nmp;
3850
3851 if (!(nmp = dupb(mp))) {
3852 ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3853 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3854 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3855 freemsg(mp);
3856 return (NULL);
3857 }
3858 nmp->b_cont = mp->b_cont;
3859 mp->b_cont = nmp;
3860 nmp->b_rptr += hdr_length;
3861 }
3862 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3863
3864 ip6h = (ip6_t *)mp->b_rptr;
3865 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3866
3867 /* Restore original IP length in header. */
3868 packet_size = msgdsize(mp);
3869 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3870 /* Record the ECN info. */
3871 ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3872 ip6h->ip6_vcf |= htonl(ecn_info << 20);
3873
3874 /* Update the receive attributes */
3875 ira->ira_pktlen = packet_size;
3876 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3877 ira->ira_protocol = nexthdr;
3878
3879 /* Reassembly is successful; set checksum information in packet */
3880 DB_CKSUM16(mp) = (uint16_t)sum_val;
3881 DB_CKSUMFLAGS(mp) = sum_flags;
3882 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3883
3884 return (mp);
3885 }
3886
3887 /*
3888 * Given an mblk and a ptr, find the destination address in an IPv6 routing
3889 * header.
3890 */
3891 static in6_addr_t
pluck_out_dst(const mblk_t * mp,uint8_t * whereptr,in6_addr_t oldrv)3892 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3893 {
3894 ip6_rthdr0_t *rt0;
3895 int segleft, numaddr;
3896 in6_addr_t *ap, rv = oldrv;
3897
3898 rt0 = (ip6_rthdr0_t *)whereptr;
3899 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3900 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3901 uint8_t *, whereptr);
3902 return (rv);
3903 }
3904 segleft = rt0->ip6r0_segleft;
3905 numaddr = rt0->ip6r0_len / 2;
3906
3907 if ((rt0->ip6r0_len & 0x1) ||
3908 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3909 (segleft > rt0->ip6r0_len / 2)) {
3910 /*
3911 * Corrupt packet. Either the routing header length is odd
3912 * (can't happen) or mismatched compared to the packet, or the
3913 * number of addresses is. Return what we can. This will
3914 * only be a problem on forwarded packets that get squeezed
3915 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3916 */
3917 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3918 whereptr);
3919 return (rv);
3920 }
3921
3922 if (segleft != 0) {
3923 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3924 rv = ap[numaddr - 1];
3925 }
3926
3927 return (rv);
3928 }
3929
3930 /*
3931 * Walk through the options to see if there is a routing header.
3932 * If present get the destination which is the last address of
3933 * the option.
3934 * mp needs to be provided in cases when the extension headers might span
3935 * b_cont; mp is never modified by this function.
3936 */
3937 in6_addr_t
ip_get_dst_v6(ip6_t * ip6h,const mblk_t * mp,boolean_t * is_fragment)3938 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3939 {
3940 const mblk_t *current_mp = mp;
3941 uint8_t nexthdr;
3942 uint8_t *whereptr;
3943 int ehdrlen;
3944 in6_addr_t rv;
3945
3946 whereptr = (uint8_t *)ip6h;
3947 ehdrlen = sizeof (ip6_t);
3948
3949 /* We assume at least the IPv6 base header is within one mblk. */
3950 ASSERT(mp == NULL ||
3951 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3952
3953 rv = ip6h->ip6_dst;
3954 nexthdr = ip6h->ip6_nxt;
3955 if (is_fragment != NULL)
3956 *is_fragment = B_FALSE;
3957
3958 /*
3959 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3960 * no extension headers will be split across mblks.
3961 */
3962
3963 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3964 nexthdr == IPPROTO_ROUTING) {
3965 if (nexthdr == IPPROTO_ROUTING)
3966 rv = pluck_out_dst(current_mp, whereptr, rv);
3967
3968 /*
3969 * All IPv6 extension headers have the next-header in byte
3970 * 0, and the (length - 8) in 8-byte-words.
3971 */
3972 while (current_mp != NULL &&
3973 whereptr + ehdrlen >= current_mp->b_wptr) {
3974 ehdrlen -= (current_mp->b_wptr - whereptr);
3975 current_mp = current_mp->b_cont;
3976 if (current_mp == NULL) {
3977 /* Bad packet. Return what we can. */
3978 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3979 mp, mblk_t *, current_mp, ip6_t *, ip6h);
3980 goto done;
3981 }
3982 whereptr = current_mp->b_rptr;
3983 }
3984 whereptr += ehdrlen;
3985
3986 nexthdr = *whereptr;
3987 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3988 ehdrlen = (*(whereptr + 1) + 1) * 8;
3989 }
3990
3991 done:
3992 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3993 *is_fragment = B_TRUE;
3994 return (rv);
3995 }
3996
3997 /*
3998 * ip_source_routed_v6:
3999 * This function is called by redirect code (called from ip_input_v6) to
4000 * know whether this packet is source routed through this node i.e
4001 * whether this node (router) is part of the journey. This
4002 * function is called under two cases :
4003 *
4004 * case 1 : Routing header was processed by this node and
4005 * ip_process_rthdr replaced ip6_dst with the next hop
4006 * and we are forwarding the packet to the next hop.
4007 *
4008 * case 2 : Routing header was not processed by this node and we
4009 * are just forwarding the packet.
4010 *
4011 * For case (1) we don't want to send redirects. For case(2) we
4012 * want to send redirects.
4013 */
4014 static boolean_t
ip_source_routed_v6(ip6_t * ip6h,mblk_t * mp,ip_stack_t * ipst)4015 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4016 {
4017 uint8_t nexthdr;
4018 in6_addr_t *addrptr;
4019 ip6_rthdr0_t *rthdr;
4020 uint8_t numaddr;
4021 ip6_hbh_t *hbhhdr;
4022 uint_t ehdrlen;
4023 uint8_t *byteptr;
4024
4025 ip2dbg(("ip_source_routed_v6\n"));
4026 nexthdr = ip6h->ip6_nxt;
4027 ehdrlen = IPV6_HDR_LEN;
4028
4029 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4030 while (nexthdr == IPPROTO_HOPOPTS ||
4031 nexthdr == IPPROTO_DSTOPTS) {
4032 byteptr = (uint8_t *)ip6h + ehdrlen;
4033 /*
4034 * Check if we have already processed
4035 * packets or we are just a forwarding
4036 * router which only pulled up msgs up
4037 * to IPV6HDR and one HBH ext header
4038 */
4039 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4040 ip2dbg(("ip_source_routed_v6: Extension"
4041 " headers not processed\n"));
4042 return (B_FALSE);
4043 }
4044 hbhhdr = (ip6_hbh_t *)byteptr;
4045 nexthdr = hbhhdr->ip6h_nxt;
4046 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4047 }
4048 switch (nexthdr) {
4049 case IPPROTO_ROUTING:
4050 byteptr = (uint8_t *)ip6h + ehdrlen;
4051 /*
4052 * If for some reason, we haven't pulled up
4053 * the routing hdr data mblk, then we must
4054 * not have processed it at all. So for sure
4055 * we are not part of the source routed journey.
4056 */
4057 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4058 ip2dbg(("ip_source_routed_v6: Routing"
4059 " header not processed\n"));
4060 return (B_FALSE);
4061 }
4062 rthdr = (ip6_rthdr0_t *)byteptr;
4063 /*
4064 * Either we are an intermediate router or the
4065 * last hop before destination and we have
4066 * already processed the routing header.
4067 * If segment_left is greater than or equal to zero,
4068 * then we must be the (numaddr - segleft) entry
4069 * of the routing header. Although ip6r0_segleft
4070 * is a unit8_t variable, we still check for zero
4071 * or greater value, if in case the data type
4072 * is changed someday in future.
4073 */
4074 if (rthdr->ip6r0_segleft > 0 ||
4075 rthdr->ip6r0_segleft == 0) {
4076 numaddr = rthdr->ip6r0_len / 2;
4077 addrptr = (in6_addr_t *)((char *)rthdr +
4078 sizeof (*rthdr));
4079 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4080 if (addrptr != NULL) {
4081 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4082 return (B_TRUE);
4083 ip1dbg(("ip_source_routed_v6: Not local\n"));
4084 }
4085 }
4086 /* FALLTHRU */
4087 default:
4088 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4089 return (B_FALSE);
4090 }
4091 }
4092
4093 /*
4094 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
4095 * We have not optimized this in terms of number of mblks
4096 * allocated. For instance, for each fragment sent we always allocate a
4097 * mblk to hold the IPv6 header and fragment header.
4098 *
4099 * Assumes that all the extension headers are contained in the first mblk
4100 * and that the fragment header has has already been added by calling
4101 * ip_fraghdr_add_v6.
4102 */
4103 int
ip_fragment_v6(mblk_t * mp,nce_t * nce,iaflags_t ixaflags,uint_t pkt_len,uint32_t max_frag,uint32_t xmit_hint,zoneid_t szone,zoneid_t nolzid,pfirepostfrag_t postfragfn,uintptr_t * ixa_cookie)4104 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4105 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4106 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4107 {
4108 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4109 ip6_t *fip6h;
4110 mblk_t *hmp;
4111 mblk_t *hmp0;
4112 mblk_t *dmp;
4113 ip6_frag_t *fraghdr;
4114 size_t unfragmentable_len;
4115 size_t mlen;
4116 size_t max_chunk;
4117 uint16_t off_flags;
4118 uint16_t offset = 0;
4119 ill_t *ill = nce->nce_ill;
4120 uint8_t nexthdr;
4121 uint8_t *ptr;
4122 ip_stack_t *ipst = ill->ill_ipst;
4123 uint_t priority = mp->b_band;
4124 int error = 0;
4125
4126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4127 if (max_frag == 0) {
4128 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4129 ip_drop_output("FragFails: zero max_frag", mp, ill);
4130 freemsg(mp);
4131 return (EINVAL);
4132 }
4133
4134 /*
4135 * Caller should have added fraghdr_t to pkt_len, and also
4136 * updated ip6_plen.
4137 */
4138 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4139 ASSERT(msgdsize(mp) == pkt_len);
4140
4141 /*
4142 * Determine the length of the unfragmentable portion of this
4143 * datagram. This consists of the IPv6 header, a potential
4144 * hop-by-hop options header, a potential pre-routing-header
4145 * destination options header, and a potential routing header.
4146 */
4147 nexthdr = ip6h->ip6_nxt;
4148 ptr = (uint8_t *)&ip6h[1];
4149
4150 if (nexthdr == IPPROTO_HOPOPTS) {
4151 ip6_hbh_t *hbh_hdr;
4152 uint_t hdr_len;
4153
4154 hbh_hdr = (ip6_hbh_t *)ptr;
4155 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4156 nexthdr = hbh_hdr->ip6h_nxt;
4157 ptr += hdr_len;
4158 }
4159 if (nexthdr == IPPROTO_DSTOPTS) {
4160 ip6_dest_t *dest_hdr;
4161 uint_t hdr_len;
4162
4163 dest_hdr = (ip6_dest_t *)ptr;
4164 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4165 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4166 nexthdr = dest_hdr->ip6d_nxt;
4167 ptr += hdr_len;
4168 }
4169 }
4170 if (nexthdr == IPPROTO_ROUTING) {
4171 ip6_rthdr_t *rthdr;
4172 uint_t hdr_len;
4173
4174 rthdr = (ip6_rthdr_t *)ptr;
4175 nexthdr = rthdr->ip6r_nxt;
4176 hdr_len = 8 * (rthdr->ip6r_len + 1);
4177 ptr += hdr_len;
4178 }
4179 if (nexthdr != IPPROTO_FRAGMENT) {
4180 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4181 ip_drop_output("FragFails: bad nexthdr", mp, ill);
4182 freemsg(mp);
4183 return (EINVAL);
4184 }
4185 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4186 unfragmentable_len += sizeof (ip6_frag_t);
4187
4188 max_chunk = (max_frag - unfragmentable_len) & ~7;
4189
4190 /*
4191 * Allocate an mblk with enough room for the link-layer
4192 * header and the unfragmentable part of the datagram, which includes
4193 * the fragment header. This (or a copy) will be used as the
4194 * first mblk for each fragment we send.
4195 */
4196 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4197 if (hmp == NULL) {
4198 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4199 ip_drop_output("FragFails: no hmp", mp, ill);
4200 freemsg(mp);
4201 return (ENOBUFS);
4202 }
4203 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4204 hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4205
4206 fip6h = (ip6_t *)hmp->b_rptr;
4207 bcopy(ip6h, fip6h, unfragmentable_len);
4208
4209 /*
4210 * pkt_len is set to the total length of the fragmentable data in this
4211 * datagram. For each fragment sent, we will decrement pkt_len
4212 * by the amount of fragmentable data sent in that fragment
4213 * until len reaches zero.
4214 */
4215 pkt_len -= unfragmentable_len;
4216
4217 /*
4218 * Move read ptr past unfragmentable portion, we don't want this part
4219 * of the data in our fragments.
4220 */
4221 mp->b_rptr += unfragmentable_len;
4222 if (mp->b_rptr == mp->b_wptr) {
4223 mblk_t *mp1 = mp->b_cont;
4224 freeb(mp);
4225 mp = mp1;
4226 }
4227
4228 while (pkt_len != 0) {
4229 mlen = MIN(pkt_len, max_chunk);
4230 pkt_len -= mlen;
4231 if (pkt_len != 0) {
4232 /* Not last */
4233 hmp0 = copyb(hmp);
4234 if (hmp0 == NULL) {
4235 BUMP_MIB(ill->ill_ip_mib,
4236 ipIfStatsOutFragFails);
4237 ip_drop_output("FragFails: copyb failed",
4238 mp, ill);
4239 freeb(hmp);
4240 freemsg(mp);
4241 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4242 return (ENOBUFS);
4243 }
4244 off_flags = IP6F_MORE_FRAG;
4245 } else {
4246 /* Last fragment */
4247 hmp0 = hmp;
4248 hmp = NULL;
4249 off_flags = 0;
4250 }
4251 fip6h = (ip6_t *)(hmp0->b_rptr);
4252 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4253 sizeof (ip6_frag_t));
4254
4255 fip6h->ip6_plen = htons((uint16_t)(mlen +
4256 unfragmentable_len - IPV6_HDR_LEN));
4257 /*
4258 * Note: Optimization alert.
4259 * In IPv6 (and IPv4) protocol header, Fragment Offset
4260 * ("offset") is 13 bits wide and in 8-octet units.
4261 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4262 * it occupies the most significant 13 bits.
4263 * (least significant 13 bits in IPv4).
4264 * We do not do any shifts here. Not shifting is same effect
4265 * as taking offset value in octet units, dividing by 8 and
4266 * then shifting 3 bits left to line it up in place in proper
4267 * place protocol header.
4268 */
4269 fraghdr->ip6f_offlg = htons(offset) | off_flags;
4270
4271 if (!(dmp = ip_carve_mp(&mp, mlen))) {
4272 /* mp has already been freed by ip_carve_mp() */
4273 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4274 ip_drop_output("FragFails: could not carve mp",
4275 hmp0, ill);
4276 if (hmp != NULL)
4277 freeb(hmp);
4278 freeb(hmp0);
4279 ip1dbg(("ip_carve_mp: failed\n"));
4280 return (ENOBUFS);
4281 }
4282 hmp0->b_cont = dmp;
4283 /* Get the priority marking, if any */
4284 hmp0->b_band = priority;
4285
4286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4287
4288 error = postfragfn(hmp0, nce, ixaflags,
4289 mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4290 ixa_cookie);
4291 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4292 /* No point in sending the other fragments */
4293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4294 ip_drop_output("FragFails: postfragfn failed",
4295 hmp, ill);
4296 freeb(hmp);
4297 freemsg(mp);
4298 return (error);
4299 }
4300 /* No need to redo state machine in loop */
4301 ixaflags &= ~IXAF_REACH_CONF;
4302
4303 offset += mlen;
4304 }
4305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4306 return (error);
4307 }
4308
4309 /*
4310 * Add a fragment header to an IPv6 packet.
4311 * Assumes that all the extension headers are contained in the first mblk.
4312 *
4313 * The fragment header is inserted after an hop-by-hop options header
4314 * and after [an optional destinations header followed by] a routing header.
4315 */
4316 mblk_t *
ip_fraghdr_add_v6(mblk_t * mp,uint32_t ident,ip_xmit_attr_t * ixa)4317 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4318 {
4319 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4320 ip6_t *fip6h;
4321 mblk_t *hmp;
4322 ip6_frag_t *fraghdr;
4323 size_t unfragmentable_len;
4324 uint8_t nexthdr;
4325 uint_t prev_nexthdr_offset;
4326 uint8_t *ptr;
4327 uint_t priority = mp->b_band;
4328 ip_stack_t *ipst = ixa->ixa_ipst;
4329
4330 /*
4331 * Determine the length of the unfragmentable portion of this
4332 * datagram. This consists of the IPv6 header, a potential
4333 * hop-by-hop options header, a potential pre-routing-header
4334 * destination options header, and a potential routing header.
4335 */
4336 nexthdr = ip6h->ip6_nxt;
4337 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4338 ptr = (uint8_t *)&ip6h[1];
4339
4340 if (nexthdr == IPPROTO_HOPOPTS) {
4341 ip6_hbh_t *hbh_hdr;
4342 uint_t hdr_len;
4343
4344 hbh_hdr = (ip6_hbh_t *)ptr;
4345 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4346 nexthdr = hbh_hdr->ip6h_nxt;
4347 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4348 - (uint8_t *)ip6h;
4349 ptr += hdr_len;
4350 }
4351 if (nexthdr == IPPROTO_DSTOPTS) {
4352 ip6_dest_t *dest_hdr;
4353 uint_t hdr_len;
4354
4355 dest_hdr = (ip6_dest_t *)ptr;
4356 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4357 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4358 nexthdr = dest_hdr->ip6d_nxt;
4359 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4360 - (uint8_t *)ip6h;
4361 ptr += hdr_len;
4362 }
4363 }
4364 if (nexthdr == IPPROTO_ROUTING) {
4365 ip6_rthdr_t *rthdr;
4366 uint_t hdr_len;
4367
4368 rthdr = (ip6_rthdr_t *)ptr;
4369 nexthdr = rthdr->ip6r_nxt;
4370 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4371 - (uint8_t *)ip6h;
4372 hdr_len = 8 * (rthdr->ip6r_len + 1);
4373 ptr += hdr_len;
4374 }
4375 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4376
4377 /*
4378 * Allocate an mblk with enough room for the link-layer
4379 * header, the unfragmentable part of the datagram, and the
4380 * fragment header.
4381 */
4382 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4383 ipst->ips_ip_wroff_extra, mp);
4384 if (hmp == NULL) {
4385 ill_t *ill = ixa->ixa_nce->nce_ill;
4386
4387 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4388 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4389 freemsg(mp);
4390 return (NULL);
4391 }
4392 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4393 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4394
4395 fip6h = (ip6_t *)hmp->b_rptr;
4396 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4397
4398 bcopy(ip6h, fip6h, unfragmentable_len);
4399 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4400 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4401
4402 fraghdr->ip6f_nxt = nexthdr;
4403 fraghdr->ip6f_reserved = 0;
4404 fraghdr->ip6f_offlg = 0;
4405 fraghdr->ip6f_ident = htonl(ident);
4406
4407 /* Get the priority marking, if any */
4408 hmp->b_band = priority;
4409
4410 /*
4411 * Move read ptr past unfragmentable portion, we don't want this part
4412 * of the data in our fragments.
4413 */
4414 mp->b_rptr += unfragmentable_len;
4415 hmp->b_cont = mp;
4416 return (hmp);
4417 }
4418
4419 /*
4420 * Determine if the ill and multicast aspects of that packets
4421 * "matches" the conn.
4422 */
4423 boolean_t
conn_wantpacket_v6(conn_t * connp,ip_recv_attr_t * ira,ip6_t * ip6h)4424 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4425 {
4426 ill_t *ill = ira->ira_rill;
4427 zoneid_t zoneid = ira->ira_zoneid;
4428 uint_t in_ifindex;
4429 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst;
4430 in6_addr_t *v6src_ptr = &ip6h->ip6_src;
4431
4432 /*
4433 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4434 * scopeid. This is used to limit
4435 * unicast and multicast reception to conn_incoming_ifindex.
4436 * conn_wantpacket_v6 is called both for unicast and
4437 * multicast packets.
4438 */
4439 in_ifindex = connp->conn_incoming_ifindex;
4440
4441 /* mpathd can bind to the under IPMP interface, which we allow */
4442 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4443 if (!IS_UNDER_IPMP(ill))
4444 return (B_FALSE);
4445
4446 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4447 return (B_FALSE);
4448 }
4449
4450 if (!IPCL_ZONE_MATCH(connp, zoneid))
4451 return (B_FALSE);
4452
4453 if (!(ira->ira_flags & IRAF_MULTICAST))
4454 return (B_TRUE);
4455
4456 if (connp->conn_multi_router)
4457 return (B_TRUE);
4458
4459 if (ira->ira_protocol == IPPROTO_RSVP)
4460 return (B_TRUE);
4461
4462 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4463 ira->ira_ill));
4464 }
4465
4466 /*
4467 * pr_addr_dbg function provides the needed buffer space to call
4468 * inet_ntop() function's 3rd argument. This function should be
4469 * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4470 * stack buffer space in it's own stack frame. This function uses
4471 * a buffer from it's own stack and prints the information.
4472 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4473 *
4474 * Note: This function can call inet_ntop() once.
4475 */
4476 void
pr_addr_dbg(char * fmt1,int af,const void * addr)4477 pr_addr_dbg(char *fmt1, int af, const void *addr)
4478 {
4479 char buf[INET6_ADDRSTRLEN];
4480
4481 if (fmt1 == NULL) {
4482 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4483 return;
4484 }
4485
4486 /*
4487 * This does not compare debug level and just prints
4488 * out. Thus it is the responsibility of the caller
4489 * to check the appropriate debug-level before calling
4490 * this function.
4491 */
4492 if (ip_debug > 0) {
4493 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4494 }
4495
4496
4497 }
4498
4499
4500 /*
4501 * Return the length in bytes of the IPv6 headers (base header
4502 * extension headers) that will be needed based on the
4503 * ip_pkt_t structure passed by the caller.
4504 *
4505 * The returned length does not include the length of the upper level
4506 * protocol (ULP) header.
4507 */
4508 int
ip_total_hdrs_len_v6(const ip_pkt_t * ipp)4509 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4510 {
4511 int len;
4512
4513 len = IPV6_HDR_LEN;
4514
4515 /*
4516 * If there's a security label here, then we ignore any hop-by-hop
4517 * options the user may try to set.
4518 */
4519 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4520 uint_t hopoptslen;
4521 /*
4522 * Note that ipp_label_len_v6 is just the option - not
4523 * the hopopts extension header. It also needs to be padded
4524 * to a multiple of 8 bytes.
4525 */
4526 ASSERT(ipp->ipp_label_len_v6 != 0);
4527 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4528 hopoptslen = (hopoptslen + 7)/8 * 8;
4529 len += hopoptslen;
4530 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4531 ASSERT(ipp->ipp_hopoptslen != 0);
4532 len += ipp->ipp_hopoptslen;
4533 }
4534
4535 /*
4536 * En-route destination options
4537 * Only do them if there's a routing header as well
4538 */
4539 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4540 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4541 ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4542 len += ipp->ipp_rthdrdstoptslen;
4543 }
4544 if (ipp->ipp_fields & IPPF_RTHDR) {
4545 ASSERT(ipp->ipp_rthdrlen != 0);
4546 len += ipp->ipp_rthdrlen;
4547 }
4548 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4549 ASSERT(ipp->ipp_dstoptslen != 0);
4550 len += ipp->ipp_dstoptslen;
4551 }
4552 return (len);
4553 }
4554
4555 /*
4556 * All-purpose routine to build a header chain of an IPv6 header
4557 * followed by any required extension headers and a proto header.
4558 *
4559 * The caller has to set the source and destination address as well as
4560 * ip6_plen. The caller has to massage any routing header and compensate
4561 * for the ULP pseudo-header checksum due to the source route.
4562 *
4563 * The extension headers will all be fully filled in.
4564 */
4565 void
ip_build_hdrs_v6(uchar_t * buf,uint_t buf_len,const ip_pkt_t * ipp,uint8_t protocol,uint32_t flowinfo)4566 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4567 uint8_t protocol, uint32_t flowinfo)
4568 {
4569 uint8_t *nxthdr_ptr;
4570 uint8_t *cp;
4571 ip6_t *ip6h = (ip6_t *)buf;
4572
4573 /* Initialize IPv6 header */
4574 ip6h->ip6_vcf =
4575 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4576 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4577
4578 if (ipp->ipp_fields & IPPF_TCLASS) {
4579 /* Overrides the class part of flowinfo */
4580 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4581 ipp->ipp_tclass);
4582 }
4583
4584 if (ipp->ipp_fields & IPPF_HOPLIMIT)
4585 ip6h->ip6_hops = ipp->ipp_hoplimit;
4586 else
4587 ip6h->ip6_hops = ipp->ipp_unicast_hops;
4588
4589 if ((ipp->ipp_fields & IPPF_ADDR) &&
4590 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4591 ip6h->ip6_src = ipp->ipp_addr;
4592
4593 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4594 cp = (uint8_t *)&ip6h[1];
4595 /*
4596 * Here's where we have to start stringing together
4597 * any extension headers in the right order:
4598 * Hop-by-hop, destination, routing, and final destination opts.
4599 */
4600 /*
4601 * If there's a security label here, then we ignore any hop-by-hop
4602 * options the user may try to set.
4603 */
4604 if (ipp->ipp_fields & IPPF_LABEL_V6) {
4605 /*
4606 * Hop-by-hop options with the label.
4607 * Note that ipp_label_v6 is just the option - not
4608 * the hopopts extension header. It also needs to be padded
4609 * to a multiple of 8 bytes.
4610 */
4611 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4612 uint_t hopoptslen;
4613 uint_t padlen;
4614
4615 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4616 hopoptslen = (padlen + 7)/8 * 8;
4617 padlen = hopoptslen - padlen;
4618
4619 *nxthdr_ptr = IPPROTO_HOPOPTS;
4620 nxthdr_ptr = &hbh->ip6h_nxt;
4621 hbh->ip6h_len = hopoptslen/8 - 1;
4622 cp += sizeof (ip6_hbh_t);
4623 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4624 cp += ipp->ipp_label_len_v6;
4625
4626 ASSERT(padlen <= 7);
4627 switch (padlen) {
4628 case 0:
4629 break;
4630 case 1:
4631 cp[0] = IP6OPT_PAD1;
4632 break;
4633 default:
4634 cp[0] = IP6OPT_PADN;
4635 cp[1] = padlen - 2;
4636 bzero(&cp[2], padlen - 2);
4637 break;
4638 }
4639 cp += padlen;
4640 } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4641 /* Hop-by-hop options */
4642 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4643
4644 *nxthdr_ptr = IPPROTO_HOPOPTS;
4645 nxthdr_ptr = &hbh->ip6h_nxt;
4646
4647 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4648 cp += ipp->ipp_hopoptslen;
4649 }
4650 /*
4651 * En-route destination options
4652 * Only do them if there's a routing header as well
4653 */
4654 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4655 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4656 ip6_dest_t *dst = (ip6_dest_t *)cp;
4657
4658 *nxthdr_ptr = IPPROTO_DSTOPTS;
4659 nxthdr_ptr = &dst->ip6d_nxt;
4660
4661 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4662 cp += ipp->ipp_rthdrdstoptslen;
4663 }
4664 /*
4665 * Routing header next
4666 */
4667 if (ipp->ipp_fields & IPPF_RTHDR) {
4668 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4669
4670 *nxthdr_ptr = IPPROTO_ROUTING;
4671 nxthdr_ptr = &rt->ip6r_nxt;
4672
4673 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4674 cp += ipp->ipp_rthdrlen;
4675 }
4676 /*
4677 * Do ultimate destination options
4678 */
4679 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4680 ip6_dest_t *dest = (ip6_dest_t *)cp;
4681
4682 *nxthdr_ptr = IPPROTO_DSTOPTS;
4683 nxthdr_ptr = &dest->ip6d_nxt;
4684
4685 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4686 cp += ipp->ipp_dstoptslen;
4687 }
4688 /*
4689 * Now set the last header pointer to the proto passed in
4690 */
4691 *nxthdr_ptr = protocol;
4692 ASSERT((int)(cp - buf) == buf_len);
4693 }
4694
4695 /*
4696 * Return a pointer to the routing header extension header
4697 * in the IPv6 header(s) chain passed in.
4698 * If none found, return NULL
4699 * Assumes that all extension headers are in same mblk as the v6 header
4700 */
4701 ip6_rthdr_t *
ip_find_rthdr_v6(ip6_t * ip6h,uint8_t * endptr)4702 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4703 {
4704 ip6_dest_t *desthdr;
4705 ip6_frag_t *fraghdr;
4706 uint_t hdrlen;
4707 uint8_t nexthdr;
4708 uint8_t *ptr = (uint8_t *)&ip6h[1];
4709
4710 if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4711 return ((ip6_rthdr_t *)ptr);
4712
4713 /*
4714 * The routing header will precede all extension headers
4715 * other than the hop-by-hop and destination options
4716 * extension headers, so if we see anything other than those,
4717 * we're done and didn't find it.
4718 * We could see a destination options header alone but no
4719 * routing header, in which case we'll return NULL as soon as
4720 * we see anything after that.
4721 * Hop-by-hop and destination option headers are identical,
4722 * so we can use either one we want as a template.
4723 */
4724 nexthdr = ip6h->ip6_nxt;
4725 while (ptr < endptr) {
4726 /* Is there enough left for len + nexthdr? */
4727 if (ptr + MIN_EHDR_LEN > endptr)
4728 return (NULL);
4729
4730 switch (nexthdr) {
4731 case IPPROTO_HOPOPTS:
4732 case IPPROTO_DSTOPTS:
4733 /* Assumes the headers are identical for hbh and dst */
4734 desthdr = (ip6_dest_t *)ptr;
4735 hdrlen = 8 * (desthdr->ip6d_len + 1);
4736 nexthdr = desthdr->ip6d_nxt;
4737 break;
4738
4739 case IPPROTO_ROUTING:
4740 return ((ip6_rthdr_t *)ptr);
4741
4742 case IPPROTO_FRAGMENT:
4743 fraghdr = (ip6_frag_t *)ptr;
4744 hdrlen = sizeof (ip6_frag_t);
4745 nexthdr = fraghdr->ip6f_nxt;
4746 break;
4747
4748 default:
4749 return (NULL);
4750 }
4751 ptr += hdrlen;
4752 }
4753 return (NULL);
4754 }
4755
4756 /*
4757 * Called for source-routed packets originating on this node.
4758 * Manipulates the original routing header by moving every entry up
4759 * one slot, placing the first entry in the v6 header's v6_dst field,
4760 * and placing the ultimate destination in the routing header's last
4761 * slot.
4762 *
4763 * Returns the checksum diference between the ultimate destination
4764 * (last hop in the routing header when the packet is sent) and
4765 * the first hop (ip6_dst when the packet is sent)
4766 */
4767 /* ARGSUSED2 */
4768 uint32_t
ip_massage_options_v6(ip6_t * ip6h,ip6_rthdr_t * rth,netstack_t * ns)4769 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4770 {
4771 uint_t numaddr;
4772 uint_t i;
4773 in6_addr_t *addrptr;
4774 in6_addr_t tmp;
4775 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth;
4776 uint32_t cksm;
4777 uint32_t addrsum = 0;
4778 uint16_t *ptr;
4779
4780 /*
4781 * Perform any processing needed for source routing.
4782 * We know that all extension headers will be in the same mblk
4783 * as the IPv6 header.
4784 */
4785
4786 /*
4787 * If no segments left in header, or the header length field is zero,
4788 * don't move hop addresses around;
4789 * Checksum difference is zero.
4790 */
4791 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4792 return (0);
4793
4794 ptr = (uint16_t *)&ip6h->ip6_dst;
4795 cksm = 0;
4796 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4797 cksm += ptr[i];
4798 }
4799 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4800
4801 /*
4802 * Here's where the fun begins - we have to
4803 * move all addresses up one spot, take the
4804 * first hop and make it our first ip6_dst,
4805 * and place the ultimate destination in the
4806 * newly-opened last slot.
4807 */
4808 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4809 numaddr = rthdr->ip6r0_len / 2;
4810 tmp = *addrptr;
4811 for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4812 *addrptr = addrptr[1];
4813 }
4814 *addrptr = ip6h->ip6_dst;
4815 ip6h->ip6_dst = tmp;
4816
4817 /*
4818 * From the checksummed ultimate destination subtract the checksummed
4819 * current ip6_dst (the first hop address). Return that number.
4820 * (In the v4 case, the second part of this is done in each routine
4821 * that calls ip_massage_options(). We do it all in this one place
4822 * for v6).
4823 */
4824 ptr = (uint16_t *)&ip6h->ip6_dst;
4825 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4826 addrsum += ptr[i];
4827 }
4828 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4829 if ((int)cksm < 0)
4830 cksm--;
4831 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4832
4833 return (cksm);
4834 }
4835
4836 void
ip6_kstat_init(netstackid_t stackid,ip6_stat_t * ip6_statisticsp)4837 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4838 {
4839 kstat_t *ksp;
4840
4841 ip6_stat_t template = {
4842 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
4843 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
4844 { "ip6_recv_pullup", KSTAT_DATA_UINT64 },
4845 { "ip6_db_ref", KSTAT_DATA_UINT64 },
4846 { "ip6_notaligned", KSTAT_DATA_UINT64 },
4847 { "ip6_multimblk", KSTAT_DATA_UINT64 },
4848 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 },
4849 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
4850 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
4851 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
4852 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4853 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4854 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4855 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4856 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4857 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4858 };
4859 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4860 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4861 KSTAT_FLAG_VIRTUAL, stackid);
4862
4863 if (ksp == NULL)
4864 return (NULL);
4865
4866 bcopy(&template, ip6_statisticsp, sizeof (template));
4867 ksp->ks_data = (void *)ip6_statisticsp;
4868 ksp->ks_private = (void *)(uintptr_t)stackid;
4869
4870 kstat_install(ksp);
4871 return (ksp);
4872 }
4873
4874 void
ip6_kstat_fini(netstackid_t stackid,kstat_t * ksp)4875 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4876 {
4877 if (ksp != NULL) {
4878 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4879 kstat_delete_netstack(ksp, stackid);
4880 }
4881 }
4882
4883 /*
4884 * The following two functions set and get the value for the
4885 * IPV6_SRC_PREFERENCES socket option.
4886 */
4887 int
ip6_set_src_preferences(ip_xmit_attr_t * ixa,uint32_t prefs)4888 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4889 {
4890 /*
4891 * We only support preferences that are covered by
4892 * IPV6_PREFER_SRC_MASK.
4893 */
4894 if (prefs & ~IPV6_PREFER_SRC_MASK)
4895 return (EINVAL);
4896
4897 /*
4898 * Look for conflicting preferences or default preferences. If
4899 * both bits of a related pair are clear, the application wants the
4900 * system's default value for that pair. Both bits in a pair can't
4901 * be set.
4902 */
4903 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4904 prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4905 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4906 IPV6_PREFER_SRC_MIPMASK) {
4907 return (EINVAL);
4908 }
4909 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4910 prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4911 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4912 IPV6_PREFER_SRC_TMPMASK) {
4913 return (EINVAL);
4914 }
4915 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4916 prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4917 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4918 IPV6_PREFER_SRC_CGAMASK) {
4919 return (EINVAL);
4920 }
4921
4922 ixa->ixa_src_preferences = prefs;
4923 return (0);
4924 }
4925
4926 size_t
ip6_get_src_preferences(ip_xmit_attr_t * ixa,uint32_t * val)4927 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4928 {
4929 *val = ixa->ixa_src_preferences;
4930 return (sizeof (ixa->ixa_src_preferences));
4931 }
4932
4933 /*
4934 * Get the size of the IP options (including the IP headers size)
4935 * without including the AH header's size. If till_ah is B_FALSE,
4936 * and if AH header is present, dest options beyond AH header will
4937 * also be included in the returned size.
4938 */
4939 int
ipsec_ah_get_hdr_size_v6(mblk_t * mp,boolean_t till_ah)4940 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4941 {
4942 ip6_t *ip6h;
4943 uint8_t nexthdr;
4944 uint8_t *whereptr;
4945 ip6_hbh_t *hbhhdr;
4946 ip6_dest_t *dsthdr;
4947 ip6_rthdr_t *rthdr;
4948 int ehdrlen;
4949 int size;
4950 ah_t *ah;
4951
4952 ip6h = (ip6_t *)mp->b_rptr;
4953 size = IPV6_HDR_LEN;
4954 nexthdr = ip6h->ip6_nxt;
4955 whereptr = (uint8_t *)&ip6h[1];
4956 for (;;) {
4957 /* Assume IP has already stripped it */
4958 ASSERT(nexthdr != IPPROTO_FRAGMENT);
4959 switch (nexthdr) {
4960 case IPPROTO_HOPOPTS:
4961 hbhhdr = (ip6_hbh_t *)whereptr;
4962 nexthdr = hbhhdr->ip6h_nxt;
4963 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4964 break;
4965 case IPPROTO_DSTOPTS:
4966 dsthdr = (ip6_dest_t *)whereptr;
4967 nexthdr = dsthdr->ip6d_nxt;
4968 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4969 break;
4970 case IPPROTO_ROUTING:
4971 rthdr = (ip6_rthdr_t *)whereptr;
4972 nexthdr = rthdr->ip6r_nxt;
4973 ehdrlen = 8 * (rthdr->ip6r_len + 1);
4974 break;
4975 default :
4976 if (till_ah) {
4977 ASSERT(nexthdr == IPPROTO_AH);
4978 return (size);
4979 }
4980 /*
4981 * If we don't have a AH header to traverse,
4982 * return now. This happens normally for
4983 * outbound datagrams where we have not inserted
4984 * the AH header.
4985 */
4986 if (nexthdr != IPPROTO_AH) {
4987 return (size);
4988 }
4989
4990 /*
4991 * We don't include the AH header's size
4992 * to be symmetrical with other cases where
4993 * we either don't have a AH header (outbound)
4994 * or peek into the AH header yet (inbound and
4995 * not pulled up yet).
4996 */
4997 ah = (ah_t *)whereptr;
4998 nexthdr = ah->ah_nexthdr;
4999 ehdrlen = (ah->ah_length << 2) + 8;
5000
5001 if (nexthdr == IPPROTO_DSTOPTS) {
5002 if (whereptr + ehdrlen >= mp->b_wptr) {
5003 /*
5004 * The destination options header
5005 * is not part of the first mblk.
5006 */
5007 whereptr = mp->b_cont->b_rptr;
5008 } else {
5009 whereptr += ehdrlen;
5010 }
5011
5012 dsthdr = (ip6_dest_t *)whereptr;
5013 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5014 size += ehdrlen;
5015 }
5016 return (size);
5017 }
5018 whereptr += ehdrlen;
5019 size += ehdrlen;
5020 }
5021 }
5022
5023 /*
5024 * Utility routine that checks if `v6srcp' is a valid address on underlying
5025 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
5026 * associated with `v6srcp' on success. NOTE: if this is not called from
5027 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5028 * group during or after this lookup.
5029 */
5030 boolean_t
ipif_lookup_testaddr_v6(ill_t * ill,const in6_addr_t * v6srcp,ipif_t ** ipifp)5031 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5032 {
5033 ipif_t *ipif;
5034
5035
5036 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5037 if (ipif != NULL) {
5038 if (ipifp != NULL)
5039 *ipifp = ipif;
5040 else
5041 ipif_refrele(ipif);
5042 return (B_TRUE);
5043 }
5044
5045 if (ip_debug > 2) {
5046 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5047 "src %s\n", AF_INET6, v6srcp);
5048 }
5049 return (B_FALSE);
5050 }
5051