xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6.c (revision 7ae7577ce925f555e5e3410f14a3ccc896c842f2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 1990 Mentat Inc.
24  * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/sysmacros.h>
32 #include <sys/strsun.h>
33 #include <sys/strlog.h>
34 #include <sys/strsubr.h>
35 #define	_SUN_TPI_VERSION	2
36 #include <sys/tihdr.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/sdt.h>
42 #include <sys/kobj.h>
43 #include <sys/zone.h>
44 #include <sys/neti.h>
45 #include <sys/hook.h>
46 
47 #include <sys/kmem.h>
48 #include <sys/systm.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <sys/vtrace.h>
52 #include <sys/isa_defs.h>
53 #include <sys/atomic.h>
54 #include <sys/policy.h>
55 #include <sys/mac.h>
56 #include <net/if.h>
57 #include <net/if_types.h>
58 #include <net/route.h>
59 #include <net/if_dl.h>
60 #include <sys/sockio.h>
61 #include <netinet/in.h>
62 #include <netinet/ip6.h>
63 #include <netinet/icmp6.h>
64 #include <netinet/sctp.h>
65 
66 #include <inet/common.h>
67 #include <inet/mi.h>
68 #include <inet/optcom.h>
69 #include <inet/mib2.h>
70 #include <inet/nd.h>
71 #include <inet/arp.h>
72 
73 #include <inet/ip.h>
74 #include <inet/ip_impl.h>
75 #include <inet/ip6.h>
76 #include <inet/ip6_asp.h>
77 #include <inet/tcp.h>
78 #include <inet/tcp_impl.h>
79 #include <inet/udp_impl.h>
80 #include <inet/ipp_common.h>
81 
82 #include <inet/ip_multi.h>
83 #include <inet/ip_if.h>
84 #include <inet/ip_ire.h>
85 #include <inet/ip_rts.h>
86 #include <inet/ip_ndp.h>
87 #include <net/pfkeyv2.h>
88 #include <inet/sadb.h>
89 #include <inet/ipsec_impl.h>
90 #include <inet/iptun/iptun_impl.h>
91 #include <inet/sctp_ip.h>
92 #include <sys/pattr.h>
93 #include <inet/ipclassifier.h>
94 #include <inet/ipsecah.h>
95 #include <inet/rawip_impl.h>
96 #include <inet/rts_impl.h>
97 #include <sys/squeue_impl.h>
98 #include <sys/squeue.h>
99 
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
102 
103 /* Temporary; for CR 6451644 work-around */
104 #include <sys/ethernet.h>
105 
106 /*
107  * Naming conventions:
108  *      These rules should be judiciously applied
109  *	if there is a need to identify something as IPv6 versus IPv4
110  *	IPv6 funcions will end with _v6 in the ip module.
111  *	IPv6 funcions will end with _ipv6 in the transport modules.
112  *	IPv6 macros:
113  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
114  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
115  *		And then there are ..V4_PART_OF_V6.
116  *		The intent is that macros in the ip module end with _V6.
117  *	IPv6 global variables will start with ipv6_
118  *	IPv6 structures will start with ipv6
119  *	IPv6 defined constants should start with IPV6_
120  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
121  */
122 
123 /*
124  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
125  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
126  * from IANA. This mechanism will remain in effect until an official
127  * number is obtained.
128  */
129 uchar_t ip6opt_ls;
130 
131 const in6_addr_t ipv6_all_ones =
132 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
133 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
134 
135 #ifdef	_BIG_ENDIAN
136 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
137 #else	/* _BIG_ENDIAN */
138 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
139 #endif	/* _BIG_ENDIAN */
140 
141 #ifdef	_BIG_ENDIAN
142 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
143 #else  /* _BIG_ENDIAN */
144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
145 #endif /* _BIG_ENDIAN */
146 
147 #ifdef _BIG_ENDIAN
148 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
149 #else  /* _BIG_ENDIAN */
150 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
151 #endif /* _BIG_ENDIAN */
152 
153 #ifdef _BIG_ENDIAN
154 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
155 #else  /* _BIG_ENDIAN */
156 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
157 #endif /* _BIG_ENDIAN */
158 
159 #ifdef _BIG_ENDIAN
160 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
161 #else  /* _BIG_ENDIAN */
162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
163 #endif /* _BIG_ENDIAN */
164 
165 #ifdef _BIG_ENDIAN
166 const in6_addr_t ipv6_solicited_node_mcast =
167 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
168 #else  /* _BIG_ENDIAN */
169 const in6_addr_t ipv6_solicited_node_mcast =
170 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
171 #endif /* _BIG_ENDIAN */
172 
173 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
174 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
175 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
176     ip_recv_attr_t *);
177 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
178     ip_recv_attr_t *);
179 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
180     in6_addr_t *, ip_recv_attr_t *);
181 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
182     ip_recv_attr_t *);
183 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
184 
185 /*
186  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
187  * If the ICMP message is consumed by IP, i.e., it should not be delivered
188  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
189  * Likewise, if the ICMP error is misformed (too short, etc), then it
190  * returns NULL. The caller uses this to determine whether or not to send
191  * to raw sockets.
192  *
193  * All error messages are passed to the matching transport stream.
194  *
195  * See comment for icmp_inbound_v4() on how IPsec is handled.
196  */
197 mblk_t *
198 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
199 {
200 	icmp6_t		*icmp6;
201 	ip6_t		*ip6h;		/* Outer header */
202 	int		ip_hdr_length;	/* Outer header length */
203 	boolean_t	interested;
204 	ill_t		*ill = ira->ira_ill;
205 	ip_stack_t	*ipst = ill->ill_ipst;
206 	mblk_t		*mp_ret = NULL;
207 
208 	ip6h = (ip6_t *)mp->b_rptr;
209 
210 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
211 
212 	/* Check for Martian packets  */
213 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
214 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
215 		ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
216 		freemsg(mp);
217 		return (NULL);
218 	}
219 
220 	/* Make sure ira_l2src is set for ndp_input */
221 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
222 		ip_setl2src(mp, ira, ira->ira_rill);
223 
224 	ip_hdr_length = ira->ira_ip_hdr_length;
225 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
226 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
227 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
228 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
229 			freemsg(mp);
230 			return (NULL);
231 		}
232 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
233 		if (ip6h == NULL) {
234 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
235 			freemsg(mp);
236 			return (NULL);
237 		}
238 	}
239 
240 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
241 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
242 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
243 	    icmp6->icmp6_code));
244 
245 	/*
246 	 * We will set "interested" to "true" if we should pass a copy to
247 	 * the transport i.e., if it is an error message.
248 	 */
249 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
250 
251 	switch (icmp6->icmp6_type) {
252 	case ICMP6_DST_UNREACH:
253 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
254 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
255 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
256 		break;
257 
258 	case ICMP6_TIME_EXCEEDED:
259 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
260 		break;
261 
262 	case ICMP6_PARAM_PROB:
263 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
264 		break;
265 
266 	case ICMP6_PACKET_TOO_BIG:
267 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
268 		break;
269 
270 	case ICMP6_ECHO_REQUEST:
271 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
272 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
273 		    !ipst->ips_ipv6_resp_echo_mcast)
274 			break;
275 
276 		/*
277 		 * We must have exclusive use of the mblk to convert it to
278 		 * a response.
279 		 * If not, we copy it.
280 		 */
281 		if (mp->b_datap->db_ref > 1) {
282 			mblk_t	*mp1;
283 
284 			mp1 = copymsg(mp);
285 			if (mp1 == NULL) {
286 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
287 				ip_drop_input("ipIfStatsInDiscards - copymsg",
288 				    mp, ill);
289 				freemsg(mp);
290 				return (NULL);
291 			}
292 			freemsg(mp);
293 			mp = mp1;
294 			ip6h = (ip6_t *)mp->b_rptr;
295 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
296 		}
297 
298 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
299 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
300 		return (NULL);
301 
302 	case ICMP6_ECHO_REPLY:
303 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
304 		break;
305 
306 	case ND_ROUTER_SOLICIT:
307 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
308 		break;
309 
310 	case ND_ROUTER_ADVERT:
311 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
312 		break;
313 
314 	case ND_NEIGHBOR_SOLICIT:
315 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
316 		ndp_input(mp, ira);
317 		return (NULL);
318 
319 	case ND_NEIGHBOR_ADVERT:
320 		BUMP_MIB(ill->ill_icmp6_mib,
321 		    ipv6IfIcmpInNeighborAdvertisements);
322 		ndp_input(mp, ira);
323 		return (NULL);
324 
325 	case ND_REDIRECT:
326 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
327 
328 		if (ipst->ips_ipv6_ignore_redirect)
329 			break;
330 
331 		/* We now allow a RAW socket to receive this. */
332 		interested = B_TRUE;
333 		break;
334 
335 	/*
336 	 * The next three icmp messages will be handled by MLD.
337 	 * Pass all valid MLD packets up to any process(es)
338 	 * listening on a raw ICMP socket.
339 	 */
340 	case MLD_LISTENER_QUERY:
341 	case MLD_LISTENER_REPORT:
342 	case MLD_LISTENER_REDUCTION:
343 		mp = mld_input(mp, ira);
344 		return (mp);
345 	default:
346 		break;
347 	}
348 	/*
349 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
350 	 * if there isn't one.
351 	 */
352 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
353 		/* If there is an ICMP client and we want one too, copy it. */
354 
355 		if (!interested) {
356 			/* Caller will deliver to RAW sockets */
357 			return (mp);
358 		}
359 		mp_ret = copymsg(mp);
360 		if (mp_ret == NULL) {
361 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
362 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
363 		}
364 	} else if (!interested) {
365 		/* Neither we nor raw sockets are interested. Drop packet now */
366 		freemsg(mp);
367 		return (NULL);
368 	}
369 
370 	/*
371 	 * ICMP error or redirect packet. Make sure we have enough of
372 	 * the header and that db_ref == 1 since we might end up modifying
373 	 * the packet.
374 	 */
375 	if (mp->b_cont != NULL) {
376 		if (ip_pullup(mp, -1, ira) == NULL) {
377 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
378 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
379 			    mp, ill);
380 			freemsg(mp);
381 			return (mp_ret);
382 		}
383 	}
384 
385 	if (mp->b_datap->db_ref > 1) {
386 		mblk_t	*mp1;
387 
388 		mp1 = copymsg(mp);
389 		if (mp1 == NULL) {
390 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
391 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
392 			freemsg(mp);
393 			return (mp_ret);
394 		}
395 		freemsg(mp);
396 		mp = mp1;
397 	}
398 
399 	/*
400 	 * In case mp has changed, verify the message before any further
401 	 * processes.
402 	 */
403 	ip6h = (ip6_t *)mp->b_rptr;
404 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
405 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
406 		freemsg(mp);
407 		return (mp_ret);
408 	}
409 
410 	switch (icmp6->icmp6_type) {
411 	case ND_REDIRECT:
412 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
413 		break;
414 	case ICMP6_PACKET_TOO_BIG:
415 		/* Update DCE and adjust MTU is icmp header if needed */
416 		icmp_inbound_too_big_v6(icmp6, ira);
417 		/* FALLTHROUGH */
418 	default:
419 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
420 		break;
421 	}
422 
423 	return (mp_ret);
424 }
425 
426 /*
427  * Send an ICMP echo reply.
428  * The caller has already updated the payload part of the packet.
429  * We handle the ICMP checksum, IP source address selection and feed
430  * the packet into ip_output_simple.
431  */
432 static void
433 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
434     ip_recv_attr_t *ira)
435 {
436 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
437 	ill_t		*ill = ira->ira_ill;
438 	ip_stack_t	*ipst = ill->ill_ipst;
439 	ip_xmit_attr_t	ixas;
440 	in6_addr_t	origsrc;
441 
442 	/*
443 	 * Remove any extension headers (do not reverse a source route)
444 	 * and clear the flow id (keep traffic class for now).
445 	 */
446 	if (ip_hdr_length != IPV6_HDR_LEN) {
447 		int	i;
448 
449 		for (i = 0; i < IPV6_HDR_LEN; i++) {
450 			mp->b_rptr[ip_hdr_length - i - 1] =
451 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
452 		}
453 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
454 		ip6h = (ip6_t *)mp->b_rptr;
455 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
456 		i = ntohs(ip6h->ip6_plen);
457 		i -= (ip_hdr_length - IPV6_HDR_LEN);
458 		ip6h->ip6_plen = htons(i);
459 		ip_hdr_length = IPV6_HDR_LEN;
460 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
461 	}
462 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
463 
464 	/* Reverse the source and destination addresses. */
465 	origsrc = ip6h->ip6_src;
466 	ip6h->ip6_src = ip6h->ip6_dst;
467 	ip6h->ip6_dst = origsrc;
468 
469 	/* set the hop limit */
470 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
471 
472 	/*
473 	 * Prepare for checksum by putting icmp length in the icmp
474 	 * checksum field. The checksum is calculated in ip_output
475 	 */
476 	icmp6->icmp6_cksum = ip6h->ip6_plen;
477 
478 	bzero(&ixas, sizeof (ixas));
479 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
480 	ixas.ixa_zoneid = ira->ira_zoneid;
481 	ixas.ixa_cred = kcred;
482 	ixas.ixa_cpid = NOPID;
483 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
484 	ixas.ixa_ifindex = 0;
485 	ixas.ixa_ipst = ipst;
486 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
487 
488 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
489 		/*
490 		 * This packet should go out the same way as it
491 		 * came in i.e in clear, independent of the IPsec
492 		 * policy for transmitting packets.
493 		 */
494 		ixas.ixa_flags |= IXAF_NO_IPSEC;
495 	} else {
496 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
497 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
498 			/* Note: mp already consumed and ip_drop_packet done */
499 			return;
500 		}
501 	}
502 
503 	/* Was the destination (now source) link-local? Send out same group */
504 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
505 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
506 		if (IS_UNDER_IPMP(ill))
507 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
508 		else
509 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
510 	}
511 
512 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
513 		/*
514 		 * Not one or our addresses (IRE_LOCALs), thus we let
515 		 * ip_output_simple pick the source.
516 		 */
517 		ip6h->ip6_src = ipv6_all_zeros;
518 		ixas.ixa_flags |= IXAF_SET_SOURCE;
519 	}
520 
521 	/* Should we send using dce_pmtu? */
522 	if (ipst->ips_ipv6_icmp_return_pmtu)
523 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
524 
525 	(void) ip_output_simple(mp, &ixas);
526 	ixa_cleanup(&ixas);
527 
528 }
529 
530 /*
531  * Verify the ICMP messages for either for ICMP error or redirect packet.
532  * The caller should have fully pulled up the message. If it's a redirect
533  * packet, only basic checks on IP header will be done; otherwise, verify
534  * the packet by looking at the included ULP header.
535  *
536  * Called before icmp_inbound_error_fanout_v6 is called.
537  */
538 static boolean_t
539 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
540 {
541 	ill_t		*ill = ira->ira_ill;
542 	uint16_t	hdr_length;
543 	uint8_t		*nexthdrp;
544 	uint8_t		nexthdr;
545 	ip_stack_t	*ipst = ill->ill_ipst;
546 	conn_t		*connp;
547 	ip6_t		*ip6h;	/* Inner header */
548 
549 	ip6h = (ip6_t *)&icmp6[1];
550 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
551 		goto truncated;
552 
553 	if (icmp6->icmp6_type == ND_REDIRECT) {
554 		hdr_length = sizeof (nd_redirect_t);
555 	} else {
556 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
557 			goto discard_pkt;
558 		hdr_length = IPV6_HDR_LEN;
559 	}
560 
561 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
562 		goto truncated;
563 
564 	/*
565 	 * Stop here for ICMP_REDIRECT.
566 	 */
567 	if (icmp6->icmp6_type == ND_REDIRECT)
568 		return (B_TRUE);
569 
570 	/*
571 	 * ICMP errors only.
572 	 */
573 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
574 		goto discard_pkt;
575 	nexthdr = *nexthdrp;
576 
577 	/* Try to pass the ICMP message to clients who need it */
578 	switch (nexthdr) {
579 	case IPPROTO_UDP:
580 		/*
581 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
582 		 * transport header.
583 		 */
584 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
585 		    mp->b_wptr)
586 			goto truncated;
587 		break;
588 	case IPPROTO_TCP: {
589 		tcpha_t		*tcpha;
590 
591 		/*
592 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
593 		 * transport header.
594 		 */
595 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
596 		    mp->b_wptr)
597 			goto truncated;
598 
599 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
600 		/*
601 		 * With IPMP we need to match across group, which we do
602 		 * since we have the upper ill from ira_ill.
603 		 */
604 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
605 		    ill->ill_phyint->phyint_ifindex, ipst);
606 		if (connp == NULL)
607 			goto discard_pkt;
608 
609 		if ((connp->conn_verifyicmp != NULL) &&
610 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
611 			CONN_DEC_REF(connp);
612 			goto discard_pkt;
613 		}
614 		CONN_DEC_REF(connp);
615 		break;
616 	}
617 	case IPPROTO_SCTP:
618 		/*
619 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
620 		 * transport header.
621 		 */
622 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
623 		    mp->b_wptr)
624 			goto truncated;
625 		break;
626 	case IPPROTO_ESP:
627 	case IPPROTO_AH:
628 		break;
629 	case IPPROTO_ENCAP:
630 	case IPPROTO_IPV6: {
631 		/* Look for self-encapsulated packets that caused an error */
632 		ip6_t *in_ip6h;
633 
634 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
635 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
636 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
637 			goto truncated;
638 		break;
639 	}
640 	default:
641 		break;
642 	}
643 
644 	return (B_TRUE);
645 
646 discard_pkt:
647 	/* Bogus ICMP error. */
648 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
649 	return (B_FALSE);
650 
651 truncated:
652 	/* We pulled up everthing already. Must be truncated */
653 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
654 	return (B_FALSE);
655 }
656 
657 /*
658  * Process received IPv6 ICMP Packet too big.
659  * The caller is responsible for validating the packet before passing it in
660  * and also to fanout the ICMP error to any matching transport conns. Assumes
661  * the message has been fully pulled up.
662  *
663  * Before getting here, the caller has called icmp_inbound_verify_v6()
664  * that should have verified with ULP to prevent undoing the changes we're
665  * going to make to DCE. For example, TCP might have verified that the packet
666  * which generated error is in the send window.
667  *
668  * In some cases modified this MTU in the ICMP header packet; the caller
669  * should pass to the matching ULP after this returns.
670  */
671 static void
672 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
673 {
674 	uint32_t	mtu;
675 	dce_t		*dce;
676 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
677 	ip_stack_t	*ipst = ill->ill_ipst;
678 	int		old_max_frag;
679 	in6_addr_t	final_dst;
680 	ip6_t		*ip6h;	/* Inner IP header */
681 
682 	/* Caller has already pulled up everything. */
683 	ip6h = (ip6_t *)&icmp6[1];
684 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
685 
686 	mtu = ntohl(icmp6->icmp6_mtu);
687 	if (mtu < IPV6_MIN_MTU) {
688 		/*
689 		 * RFC 8021 suggests to ignore messages where mtu is
690 		 * less than the IPv6 minimum.
691 		 */
692 		ip1dbg(("Received mtu less than IPv6 "
693 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
694 		DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
695 		return;
696 	}
697 
698 	/*
699 	 * For link local destinations matching simply on address is not
700 	 * sufficient. Same link local addresses for different ILL's is
701 	 * possible.
702 	 */
703 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
704 		dce = dce_lookup_and_add_v6(&final_dst,
705 		    ill->ill_phyint->phyint_ifindex, ipst);
706 	} else {
707 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
708 	}
709 	if (dce == NULL) {
710 		/* Couldn't add a unique one - ENOMEM */
711 		if (ip_debug > 2) {
712 			/* ip1dbg */
713 			pr_addr_dbg("icmp_inbound_too_big_v6:"
714 			    "no dce for dst %s\n", AF_INET6,
715 			    &final_dst);
716 		}
717 		return;
718 	}
719 
720 	mutex_enter(&dce->dce_lock);
721 	if (dce->dce_flags & DCEF_PMTU)
722 		old_max_frag = dce->dce_pmtu;
723 	else if (IN6_IS_ADDR_MULTICAST(&final_dst))
724 		old_max_frag = ill->ill_mc_mtu;
725 	else
726 		old_max_frag = ill->ill_mtu;
727 
728 	ip1dbg(("Received mtu from router: %d\n", mtu));
729 	DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
730 	dce->dce_pmtu = MIN(old_max_frag, mtu);
731 	icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
732 
733 	/* We now have a PMTU for sure */
734 	dce->dce_flags |= DCEF_PMTU;
735 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
736 
737 	mutex_exit(&dce->dce_lock);
738 	/*
739 	 * After dropping the lock the new value is visible to everyone.
740 	 * Then we bump the generation number so any cached values reinspect
741 	 * the dce_t.
742 	 */
743 	dce_increment_generation(dce);
744 	dce_refrele(dce);
745 }
746 
747 /*
748  * Fanout received ICMPv6 error packets to the transports.
749  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
750  *
751  * The caller must have called icmp_inbound_verify_v6.
752  */
753 void
754 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
755 {
756 	uint16_t	*up;	/* Pointer to ports in ULP header */
757 	uint32_t	ports;	/* reversed ports for fanout */
758 	ip6_t		rip6h;	/* With reversed addresses */
759 	ip6_t		*ip6h;	/* Inner IP header */
760 	uint16_t	hdr_length; /* Inner IP header length */
761 	uint8_t		*nexthdrp;
762 	uint8_t		nexthdr;
763 	tcpha_t		*tcpha;
764 	conn_t		*connp;
765 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
766 	ip_stack_t	*ipst = ill->ill_ipst;
767 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
768 
769 	/* Caller has already pulled up everything. */
770 	ip6h = (ip6_t *)&icmp6[1];
771 	ASSERT(mp->b_cont == NULL);
772 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
773 
774 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
775 		goto drop_pkt;
776 	nexthdr = *nexthdrp;
777 	ira->ira_protocol = nexthdr;
778 
779 	/*
780 	 * We need a separate IP header with the source and destination
781 	 * addresses reversed to do fanout/classification because the ip6h in
782 	 * the ICMPv6 error is in the form we sent it out.
783 	 */
784 	rip6h.ip6_src = ip6h->ip6_dst;
785 	rip6h.ip6_dst = ip6h->ip6_src;
786 	rip6h.ip6_nxt = nexthdr;
787 
788 	/* Try to pass the ICMP message to clients who need it */
789 	switch (nexthdr) {
790 	case IPPROTO_UDP: {
791 		/* Attempt to find a client stream based on port. */
792 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
793 
794 		/* Note that we send error to all matches. */
795 		ira->ira_flags |= IRAF_ICMP_ERROR;
796 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
797 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
798 		return;
799 	}
800 	case IPPROTO_TCP: {
801 		/*
802 		 * Attempt to find a client stream based on port.
803 		 * Note that we do a reverse lookup since the header is
804 		 * in the form we sent it out.
805 		 */
806 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
807 		/*
808 		 * With IPMP we need to match across group, which we do
809 		 * since we have the upper ill from ira_ill.
810 		 */
811 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
812 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
813 		if (connp == NULL) {
814 			goto drop_pkt;
815 		}
816 
817 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
818 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
819 			mp = ipsec_check_inbound_policy(mp, connp,
820 			    NULL, ip6h, ira);
821 			if (mp == NULL) {
822 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
823 				/* Note that mp is NULL */
824 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
825 				CONN_DEC_REF(connp);
826 				return;
827 			}
828 		}
829 
830 		ira->ira_flags |= IRAF_ICMP_ERROR;
831 		if (IPCL_IS_TCP(connp)) {
832 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
833 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
834 			    SQTAG_TCP6_INPUT_ICMP_ERR);
835 		} else {
836 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
837 			ill_t *rill = ira->ira_rill;
838 
839 			ira->ira_ill = ira->ira_rill = NULL;
840 			(connp->conn_recv)(connp, mp, NULL, ira);
841 			CONN_DEC_REF(connp);
842 			ira->ira_ill = ill;
843 			ira->ira_rill = rill;
844 		}
845 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
846 		return;
847 
848 	}
849 	case IPPROTO_SCTP:
850 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
851 		/* Find a SCTP client stream for this packet. */
852 		((uint16_t *)&ports)[0] = up[1];
853 		((uint16_t *)&ports)[1] = up[0];
854 
855 		ira->ira_flags |= IRAF_ICMP_ERROR;
856 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
857 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
858 		return;
859 
860 	case IPPROTO_ESP:
861 	case IPPROTO_AH:
862 		if (!ipsec_loaded(ipss)) {
863 			ip_proto_not_sup(mp, ira);
864 			return;
865 		}
866 
867 		if (nexthdr == IPPROTO_ESP)
868 			mp = ipsecesp_icmp_error(mp, ira);
869 		else
870 			mp = ipsecah_icmp_error(mp, ira);
871 		if (mp == NULL)
872 			return;
873 
874 		/* Just in case ipsec didn't preserve the NULL b_cont */
875 		if (mp->b_cont != NULL) {
876 			if (!pullupmsg(mp, -1))
877 				goto drop_pkt;
878 		}
879 
880 		/*
881 		 * If succesful, the mp has been modified to not include
882 		 * the ESP/AH header so we can fanout to the ULP's icmp
883 		 * error handler.
884 		 */
885 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
886 			goto drop_pkt;
887 
888 		ip6h = (ip6_t *)mp->b_rptr;
889 		/* Don't call hdr_length_v6() unless you have to. */
890 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
891 			hdr_length = ip_hdr_length_v6(mp, ip6h);
892 		else
893 			hdr_length = IPV6_HDR_LEN;
894 
895 		/* Verify the modified message before any further processes. */
896 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
897 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
898 			freemsg(mp);
899 			return;
900 		}
901 
902 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
903 		return;
904 
905 	case IPPROTO_IPV6: {
906 		/* Look for self-encapsulated packets that caused an error */
907 		ip6_t *in_ip6h;
908 
909 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
910 
911 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
912 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
913 			/*
914 			 * Self-encapsulated case. As in the ipv4 case,
915 			 * we need to strip the 2nd IP header. Since mp
916 			 * is already pulled-up, we can simply bcopy
917 			 * the 3rd header + data over the 2nd header.
918 			 */
919 			uint16_t unused_len;
920 
921 			/*
922 			 * Make sure we don't do recursion more than once.
923 			 */
924 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
925 			    &unused_len, &nexthdrp) ||
926 			    *nexthdrp == IPPROTO_IPV6) {
927 				goto drop_pkt;
928 			}
929 
930 			/*
931 			 * Copy the 3rd header + remaining data on top
932 			 * of the 2nd header.
933 			 */
934 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
935 
936 			/*
937 			 * Subtract length of the 2nd header.
938 			 */
939 			mp->b_wptr -= hdr_length;
940 
941 			ip6h = (ip6_t *)mp->b_rptr;
942 			/* Don't call hdr_length_v6() unless you have to. */
943 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
944 				hdr_length = ip_hdr_length_v6(mp, ip6h);
945 			else
946 				hdr_length = IPV6_HDR_LEN;
947 
948 			/*
949 			 * Verify the modified message before any further
950 			 * processes.
951 			 */
952 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
953 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
954 				freemsg(mp);
955 				return;
956 			}
957 
958 			/*
959 			 * Now recurse, and see what I _really_ should be
960 			 * doing here.
961 			 */
962 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
963 			return;
964 		}
965 	}
966 	/* FALLTHROUGH */
967 	case IPPROTO_ENCAP:
968 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
969 		    &rip6h.ip6_dst, ipst)) != NULL) {
970 			ira->ira_flags |= IRAF_ICMP_ERROR;
971 			connp->conn_recvicmp(connp, mp, NULL, ira);
972 			CONN_DEC_REF(connp);
973 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
974 			return;
975 		}
976 		/*
977 		 * No IP tunnel is interested, fallthrough and see
978 		 * if a raw socket will want it.
979 		 */
980 		/* FALLTHROUGH */
981 	default:
982 		ira->ira_flags |= IRAF_ICMP_ERROR;
983 		ASSERT(ira->ira_protocol == nexthdr);
984 		ip_fanout_proto_v6(mp, &rip6h, ira);
985 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
986 		return;
987 	}
988 	/* NOTREACHED */
989 drop_pkt:
990 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
991 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
992 	freemsg(mp);
993 }
994 
995 /*
996  * Process received IPv6 ICMP Redirect messages.
997  * Assumes the caller has verified that the headers are in the pulled up mblk.
998  * Consumes mp.
999  */
1000 /* ARGSUSED */
1001 static void
1002 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1003     ip_recv_attr_t *ira)
1004 {
1005 	ire_t		*ire, *nire;
1006 	ire_t		*prev_ire = NULL;
1007 	ire_t		*redir_ire;
1008 	in6_addr_t	*src, *dst, *gateway;
1009 	nd_opt_hdr_t	*opt;
1010 	nce_t		*nce;
1011 	int		ncec_flags = 0;
1012 	int		err = 0;
1013 	boolean_t	redirect_to_router = B_FALSE;
1014 	int		len;
1015 	int		optlen;
1016 	ill_t		*ill = ira->ira_rill;
1017 	ill_t		*rill = ira->ira_rill;
1018 	ip_stack_t	*ipst = ill->ill_ipst;
1019 
1020 	/*
1021 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1022 	 * and make it be the IPMP upper so avoid being confused by a packet
1023 	 * addressed to a unicast address on a different ill.
1024 	 */
1025 	if (IS_UNDER_IPMP(rill)) {
1026 		rill = ipmp_ill_hold_ipmp_ill(rill);
1027 		if (rill == NULL) {
1028 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1029 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1030 			    mp, ill);
1031 			freemsg(mp);
1032 			return;
1033 		}
1034 		ASSERT(rill != ira->ira_rill);
1035 	}
1036 
1037 	len = mp->b_wptr - (uchar_t *)rd;
1038 	src = &ip6h->ip6_src;
1039 	dst = &rd->nd_rd_dst;
1040 	gateway = &rd->nd_rd_target;
1041 
1042 	/* Verify if it is a valid redirect */
1043 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1044 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1045 	    (rd->nd_rd_code != 0) ||
1046 	    (len < sizeof (nd_redirect_t)) ||
1047 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
1048 	    (IN6_IS_ADDR_MULTICAST(dst))) {
1049 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1050 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1051 		goto fail_redirect;
1052 	}
1053 
1054 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1055 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1056 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1057 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1058 		    mp, ill);
1059 		goto fail_redirect;
1060 	}
1061 
1062 	optlen = len - sizeof (nd_redirect_t);
1063 	if (optlen != 0) {
1064 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1065 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1066 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1067 			    mp, ill);
1068 			goto fail_redirect;
1069 		}
1070 	}
1071 
1072 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1073 		redirect_to_router = B_TRUE;
1074 		ncec_flags |= NCE_F_ISROUTER;
1075 	} else {
1076 		gateway = dst;	/* Add nce for dst */
1077 	}
1078 
1079 
1080 	/*
1081 	 * Verify that the IP source address of the redirect is
1082 	 * the same as the current first-hop router for the specified
1083 	 * ICMP destination address.
1084 	 * Also, Make sure we had a route for the dest in question and
1085 	 * that route was pointing to the old gateway (the source of the
1086 	 * redirect packet.)
1087 	 * We do longest match and then compare ire_gateway_addr_v6 below.
1088 	 */
1089 	prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1090 	    ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1091 
1092 	/*
1093 	 * Check that
1094 	 *	the redirect was not from ourselves
1095 	 *	old gateway is still directly reachable
1096 	 */
1097 	if (prev_ire == NULL ||
1098 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1099 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1100 	    !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1101 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1102 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1103 		goto fail_redirect;
1104 	}
1105 
1106 	ASSERT(prev_ire->ire_ill != NULL);
1107 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1108 		ncec_flags |= NCE_F_NONUD;
1109 
1110 	opt = (nd_opt_hdr_t *)&rd[1];
1111 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1112 	if (opt != NULL) {
1113 		err = nce_lookup_then_add_v6(rill,
1114 		    (uchar_t *)&opt[1],		/* Link layer address */
1115 		    rill->ill_phys_addr_length,
1116 		    gateway, ncec_flags, ND_STALE, &nce);
1117 		switch (err) {
1118 		case 0:
1119 			nce_refrele(nce);
1120 			break;
1121 		case EEXIST:
1122 			/*
1123 			 * Check to see if link layer address has changed and
1124 			 * process the ncec_state accordingly.
1125 			 */
1126 			nce_process(nce->nce_common,
1127 			    (uchar_t *)&opt[1], 0, B_FALSE);
1128 			nce_refrele(nce);
1129 			break;
1130 		default:
1131 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1132 			    err));
1133 			goto fail_redirect;
1134 		}
1135 	}
1136 	if (redirect_to_router) {
1137 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1138 
1139 		/*
1140 		 * Create a Route Association.  This will allow us to remember
1141 		 * a router told us to use the particular gateway.
1142 		 */
1143 		ire = ire_create_v6(
1144 		    dst,
1145 		    &ipv6_all_ones,		/* mask */
1146 		    gateway,			/* gateway addr */
1147 		    IRE_HOST,
1148 		    prev_ire->ire_ill,
1149 		    ALL_ZONES,
1150 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1151 		    NULL,
1152 		    ipst);
1153 	} else {
1154 		ipif_t *ipif;
1155 		in6_addr_t gw;
1156 
1157 		/*
1158 		 * Just create an on link entry, i.e. interface route.
1159 		 * The gateway field is our link-local on the ill.
1160 		 */
1161 		mutex_enter(&rill->ill_lock);
1162 		for (ipif = rill->ill_ipif; ipif != NULL;
1163 		    ipif = ipif->ipif_next) {
1164 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1165 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1166 				break;
1167 		}
1168 		if (ipif == NULL) {
1169 			/* We have no link-local address! */
1170 			mutex_exit(&rill->ill_lock);
1171 			goto fail_redirect;
1172 		}
1173 		gw = ipif->ipif_v6lcl_addr;
1174 		mutex_exit(&rill->ill_lock);
1175 
1176 		ire = ire_create_v6(
1177 		    dst,				/* gateway == dst */
1178 		    &ipv6_all_ones,			/* mask */
1179 		    &gw,				/* gateway addr */
1180 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
1181 		    prev_ire->ire_ill,
1182 		    ALL_ZONES,
1183 		    (RTF_DYNAMIC | RTF_HOST),
1184 		    NULL,
1185 		    ipst);
1186 	}
1187 
1188 	if (ire == NULL)
1189 		goto fail_redirect;
1190 
1191 	nire = ire_add(ire);
1192 	/* Check if it was a duplicate entry */
1193 	if (nire != NULL && nire != ire) {
1194 		ASSERT(nire->ire_identical_ref > 1);
1195 		ire_delete(nire);
1196 		ire_refrele(nire);
1197 		nire = NULL;
1198 	}
1199 	ire = nire;
1200 	if (ire != NULL) {
1201 		ire_refrele(ire);		/* Held in ire_add */
1202 
1203 		/* tell routing sockets that we received a redirect */
1204 		ip_rts_change_v6(RTM_REDIRECT,
1205 		    &rd->nd_rd_dst,
1206 		    &rd->nd_rd_target,
1207 		    &ipv6_all_ones, 0, src,
1208 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1209 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1210 
1211 		/*
1212 		 * Delete any existing IRE_HOST type ires for this destination.
1213 		 * This together with the added IRE has the effect of
1214 		 * modifying an existing redirect.
1215 		 */
1216 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1217 		    prev_ire->ire_ill, ALL_ZONES, NULL,
1218 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1219 		    NULL);
1220 
1221 		if (redir_ire != NULL) {
1222 			if (redir_ire->ire_flags & RTF_DYNAMIC)
1223 				ire_delete(redir_ire);
1224 			ire_refrele(redir_ire);
1225 		}
1226 	}
1227 
1228 	ire_refrele(prev_ire);
1229 	prev_ire = NULL;
1230 
1231 fail_redirect:
1232 	if (prev_ire != NULL)
1233 		ire_refrele(prev_ire);
1234 	freemsg(mp);
1235 	if (rill != ira->ira_rill)
1236 		ill_refrele(rill);
1237 }
1238 
1239 /*
1240  * Build and ship an IPv6 ICMP message using the packet data in mp,
1241  * and the ICMP header pointed to by "stuff".  (May be called as
1242  * writer.)
1243  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1244  * verify that an icmp error packet can be sent.
1245  *
1246  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1247  * source address (see above function).
1248  */
1249 static void
1250 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1251     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1252 {
1253 	ip6_t		*ip6h;
1254 	in6_addr_t	v6dst;
1255 	size_t		len_needed;
1256 	size_t		msg_len;
1257 	mblk_t		*mp1;
1258 	icmp6_t		*icmp6;
1259 	in6_addr_t	v6src;
1260 	ill_t		*ill = ira->ira_ill;
1261 	ip_stack_t	*ipst = ill->ill_ipst;
1262 	ip_xmit_attr_t	ixas;
1263 
1264 	ip6h = (ip6_t *)mp->b_rptr;
1265 
1266 	bzero(&ixas, sizeof (ixas));
1267 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1268 	ixas.ixa_zoneid = ira->ira_zoneid;
1269 	ixas.ixa_ifindex = 0;
1270 	ixas.ixa_ipst = ipst;
1271 	ixas.ixa_cred = kcred;
1272 	ixas.ixa_cpid = NOPID;
1273 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
1274 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1275 
1276 	/*
1277 	 * If the source of the original packet was link-local, then
1278 	 * make sure we send on the same ill (group) as we received it on.
1279 	 */
1280 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1281 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
1282 		if (IS_UNDER_IPMP(ill))
1283 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1284 		else
1285 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1286 	}
1287 
1288 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1289 		/*
1290 		 * Apply IPsec based on how IPsec was applied to
1291 		 * the packet that had the error.
1292 		 *
1293 		 * If it was an outbound packet that caused the ICMP
1294 		 * error, then the caller will have setup the IRA
1295 		 * appropriately.
1296 		 */
1297 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1298 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1299 			/* Note: mp already consumed and ip_drop_packet done */
1300 			return;
1301 		}
1302 	} else {
1303 		/*
1304 		 * This is in clear. The icmp message we are building
1305 		 * here should go out in clear, independent of our policy.
1306 		 */
1307 		ixas.ixa_flags |= IXAF_NO_IPSEC;
1308 	}
1309 
1310 	/*
1311 	 * If the caller specified the source we use that.
1312 	 * Otherwise, if the packet was for one of our unicast addresses, make
1313 	 * sure we respond with that as the source. Otherwise
1314 	 * have ip_output_simple pick the source address.
1315 	 */
1316 	if (v6src_ptr != NULL) {
1317 		v6src = *v6src_ptr;
1318 	} else {
1319 		ire_t *ire;
1320 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1321 
1322 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1323 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1324 			match_flags |= MATCH_IRE_ILL;
1325 
1326 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1327 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1328 		    match_flags, 0, ipst, NULL);
1329 		if (ire != NULL) {
1330 			v6src = ip6h->ip6_dst;
1331 			ire_refrele(ire);
1332 		} else {
1333 			v6src = ipv6_all_zeros;
1334 			ixas.ixa_flags |= IXAF_SET_SOURCE;
1335 		}
1336 	}
1337 	v6dst = ip6h->ip6_src;
1338 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1339 	msg_len = msgdsize(mp);
1340 	if (msg_len > len_needed) {
1341 		if (!adjmsg(mp, len_needed - msg_len)) {
1342 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1343 			freemsg(mp);
1344 			return;
1345 		}
1346 		msg_len = len_needed;
1347 	}
1348 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1349 	if (mp1 == NULL) {
1350 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1351 		freemsg(mp);
1352 		return;
1353 	}
1354 	mp1->b_cont = mp;
1355 	mp = mp1;
1356 
1357 	/*
1358 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1359 	 * node generates be accepted in peace by all on-host destinations.
1360 	 * If we do NOT assume that all on-host destinations trust
1361 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1362 	 * (Look for IXAF_TRUSTED_ICMP).
1363 	 */
1364 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1365 
1366 	ip6h = (ip6_t *)mp->b_rptr;
1367 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1368 
1369 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1370 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
1371 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1372 	ip6h->ip6_dst = v6dst;
1373 	ip6h->ip6_src = v6src;
1374 	msg_len += IPV6_HDR_LEN + len;
1375 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1376 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1377 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1378 	}
1379 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1380 	icmp6 = (icmp6_t *)&ip6h[1];
1381 	bcopy(stuff, (char *)icmp6, len);
1382 	/*
1383 	 * Prepare for checksum by putting icmp length in the icmp
1384 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
1385 	 */
1386 	icmp6->icmp6_cksum = ip6h->ip6_plen;
1387 	if (icmp6->icmp6_type == ND_REDIRECT) {
1388 		ip6h->ip6_hops = IPV6_MAX_HOPS;
1389 	}
1390 
1391 	(void) ip_output_simple(mp, &ixas);
1392 	ixa_cleanup(&ixas);
1393 }
1394 
1395 /*
1396  * Update the output mib when ICMPv6 packets are sent.
1397  */
1398 void
1399 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1400 {
1401 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1402 
1403 	switch (icmp6->icmp6_type) {
1404 	case ICMP6_DST_UNREACH:
1405 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1406 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1407 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1408 		break;
1409 
1410 	case ICMP6_TIME_EXCEEDED:
1411 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1412 		break;
1413 
1414 	case ICMP6_PARAM_PROB:
1415 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1416 		break;
1417 
1418 	case ICMP6_PACKET_TOO_BIG:
1419 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1420 		break;
1421 
1422 	case ICMP6_ECHO_REQUEST:
1423 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1424 		break;
1425 
1426 	case ICMP6_ECHO_REPLY:
1427 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1428 		break;
1429 
1430 	case ND_ROUTER_SOLICIT:
1431 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1432 		break;
1433 
1434 	case ND_ROUTER_ADVERT:
1435 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1436 		break;
1437 
1438 	case ND_NEIGHBOR_SOLICIT:
1439 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1440 		break;
1441 
1442 	case ND_NEIGHBOR_ADVERT:
1443 		BUMP_MIB(ill->ill_icmp6_mib,
1444 		    ipv6IfIcmpOutNeighborAdvertisements);
1445 		break;
1446 
1447 	case ND_REDIRECT:
1448 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1449 		break;
1450 
1451 	case MLD_LISTENER_QUERY:
1452 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1453 		break;
1454 
1455 	case MLD_LISTENER_REPORT:
1456 	case MLD_V2_LISTENER_REPORT:
1457 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1458 		break;
1459 
1460 	case MLD_LISTENER_REDUCTION:
1461 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1462 		break;
1463 	}
1464 }
1465 
1466 /*
1467  * Check if it is ok to send an ICMPv6 error packet in
1468  * response to the IP packet in mp.
1469  * Free the message and return null if no
1470  * ICMP error packet should be sent.
1471  */
1472 static mblk_t *
1473 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1474 {
1475 	ill_t		*ill = ira->ira_ill;
1476 	ip_stack_t	*ipst = ill->ill_ipst;
1477 	boolean_t	llbcast;
1478 	ip6_t		*ip6h;
1479 
1480 	if (!mp)
1481 		return (NULL);
1482 
1483 	/* We view multicast and broadcast as the same.. */
1484 	llbcast = (ira->ira_flags &
1485 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1486 	ip6h = (ip6_t *)mp->b_rptr;
1487 
1488 	/* Check if source address uniquely identifies the host */
1489 
1490 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1491 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1492 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1493 		freemsg(mp);
1494 		return (NULL);
1495 	}
1496 
1497 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1498 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1499 		icmp6_t		*icmp6;
1500 
1501 		if (mp->b_wptr - mp->b_rptr < len_needed) {
1502 			if (!pullupmsg(mp, len_needed)) {
1503 				BUMP_MIB(ill->ill_icmp6_mib,
1504 				    ipv6IfIcmpInErrors);
1505 				freemsg(mp);
1506 				return (NULL);
1507 			}
1508 			ip6h = (ip6_t *)mp->b_rptr;
1509 		}
1510 		icmp6 = (icmp6_t *)&ip6h[1];
1511 		/* Explicitly do not generate errors in response to redirects */
1512 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1513 		    icmp6->icmp6_type == ND_REDIRECT) {
1514 			freemsg(mp);
1515 			return (NULL);
1516 		}
1517 	}
1518 	/*
1519 	 * Check that the destination is not multicast and that the packet
1520 	 * was not sent on link layer broadcast or multicast.  (Exception
1521 	 * is Packet too big message as per the draft - when mcast_ok is set.)
1522 	 */
1523 	if (!mcast_ok &&
1524 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1525 		freemsg(mp);
1526 		return (NULL);
1527 	}
1528 	/*
1529 	 * If this is a labeled system, then check to see if we're allowed to
1530 	 * send a response to this particular sender.  If not, then just drop.
1531 	 */
1532 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1533 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1534 		freemsg(mp);
1535 		return (NULL);
1536 	}
1537 
1538 	if (icmp_err_rate_limit(ipst)) {
1539 		/*
1540 		 * Only send ICMP error packets every so often.
1541 		 * This should be done on a per port/source basis,
1542 		 * but for now this will suffice.
1543 		 */
1544 		freemsg(mp);
1545 		return (NULL);
1546 	}
1547 	return (mp);
1548 }
1549 
1550 /*
1551  * Called when a packet was sent out the same link that it arrived on.
1552  * Check if it is ok to send a redirect and then send it.
1553  */
1554 void
1555 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1556     ip_recv_attr_t *ira)
1557 {
1558 	ill_t		*ill = ira->ira_ill;
1559 	ip_stack_t	*ipst = ill->ill_ipst;
1560 	in6_addr_t	*v6targ;
1561 	ire_t		*src_ire_v6 = NULL;
1562 	mblk_t		*mp1;
1563 	ire_t		*nhop_ire = NULL;
1564 
1565 	/*
1566 	 * Don't send a redirect when forwarding a source
1567 	 * routed packet.
1568 	 */
1569 	if (ip_source_routed_v6(ip6h, mp, ipst))
1570 		return;
1571 
1572 	if (ire->ire_type & IRE_ONLINK) {
1573 		/* Target is directly connected */
1574 		v6targ = &ip6h->ip6_dst;
1575 	} else {
1576 		/* Determine the most specific IRE used to send the packets */
1577 		nhop_ire = ire_nexthop(ire);
1578 		if (nhop_ire == NULL)
1579 			return;
1580 
1581 		/*
1582 		 * We won't send redirects to a router
1583 		 * that doesn't have a link local
1584 		 * address, but will forward.
1585 		 */
1586 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1587 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1588 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1589 			ire_refrele(nhop_ire);
1590 			return;
1591 		}
1592 		v6targ = &nhop_ire->ire_addr_v6;
1593 	}
1594 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1595 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1596 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1597 
1598 	if (src_ire_v6 == NULL) {
1599 		if (nhop_ire != NULL)
1600 			ire_refrele(nhop_ire);
1601 		return;
1602 	}
1603 
1604 	/*
1605 	 * The source is directly connected.
1606 	 */
1607 	mp1 = copymsg(mp);
1608 	if (mp1 != NULL)
1609 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1610 
1611 	if (nhop_ire != NULL)
1612 		ire_refrele(nhop_ire);
1613 	ire_refrele(src_ire_v6);
1614 }
1615 
1616 /*
1617  * Generate an ICMPv6 redirect message.
1618  * Include target link layer address option if it exits.
1619  * Always include redirect header.
1620  */
1621 static void
1622 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1623     ip_recv_attr_t *ira)
1624 {
1625 	nd_redirect_t	*rd;
1626 	nd_opt_rd_hdr_t	*rdh;
1627 	uchar_t		*buf;
1628 	ncec_t		*ncec = NULL;
1629 	nd_opt_hdr_t	*opt;
1630 	int		len;
1631 	int		ll_opt_len = 0;
1632 	int		max_redir_hdr_data_len;
1633 	int		pkt_len;
1634 	in6_addr_t	*srcp;
1635 	ill_t		*ill;
1636 	boolean_t	need_refrele;
1637 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
1638 
1639 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1640 	if (mp == NULL)
1641 		return;
1642 
1643 	if (IS_UNDER_IPMP(ira->ira_ill)) {
1644 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1645 		if (ill == NULL) {
1646 			ill = ira->ira_ill;
1647 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1648 			ip_drop_output("no IPMP ill for sending redirect",
1649 			    mp, ill);
1650 			freemsg(mp);
1651 			return;
1652 		}
1653 		need_refrele = B_TRUE;
1654 	} else {
1655 		ill = ira->ira_ill;
1656 		need_refrele = B_FALSE;
1657 	}
1658 
1659 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
1660 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1661 	    ncec->ncec_lladdr != NULL) {
1662 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
1663 		    ill->ill_phys_addr_length + 7)/8 * 8;
1664 	}
1665 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1666 	ASSERT(len % 4 == 0);
1667 	buf = kmem_alloc(len, KM_NOSLEEP);
1668 	if (buf == NULL) {
1669 		if (ncec != NULL)
1670 			ncec_refrele(ncec);
1671 		if (need_refrele)
1672 			ill_refrele(ill);
1673 		freemsg(mp);
1674 		return;
1675 	}
1676 
1677 	rd = (nd_redirect_t *)buf;
1678 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1679 	rd->nd_rd_code = 0;
1680 	rd->nd_rd_reserved = 0;
1681 	rd->nd_rd_target = *targetp;
1682 	rd->nd_rd_dst = *dest;
1683 
1684 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1685 	if (ncec != NULL && ll_opt_len != 0) {
1686 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1687 		opt->nd_opt_len = ll_opt_len/8;
1688 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
1689 		    ill->ill_phys_addr_length);
1690 	}
1691 	if (ncec != NULL)
1692 		ncec_refrele(ncec);
1693 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1694 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1695 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1696 	max_redir_hdr_data_len =
1697 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1698 	pkt_len = msgdsize(mp);
1699 	/* Make sure mp is 8 byte aligned */
1700 	if (pkt_len > max_redir_hdr_data_len) {
1701 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1702 		    sizeof (nd_opt_rd_hdr_t))/8;
1703 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1704 	} else {
1705 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1706 		(void) adjmsg(mp, -(pkt_len % 8));
1707 	}
1708 	rdh->nd_opt_rh_reserved1 = 0;
1709 	rdh->nd_opt_rh_reserved2 = 0;
1710 	/* ipif_v6lcl_addr contains the link-local source address */
1711 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1712 
1713 	/* Redirects sent by router, and router is global zone */
1714 	ASSERT(ira->ira_zoneid == ALL_ZONES);
1715 	ira->ira_zoneid = GLOBAL_ZONEID;
1716 	icmp_pkt_v6(mp, buf, len, srcp, ira);
1717 	kmem_free(buf, len);
1718 	if (need_refrele)
1719 		ill_refrele(ill);
1720 }
1721 
1722 
1723 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1724 void
1725 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1726     ip_recv_attr_t *ira)
1727 {
1728 	icmp6_t	icmp6;
1729 
1730 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1731 	if (mp == NULL)
1732 		return;
1733 
1734 	bzero(&icmp6, sizeof (icmp6_t));
1735 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1736 	icmp6.icmp6_code = code;
1737 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1738 }
1739 
1740 /*
1741  * Generate an ICMP unreachable message.
1742  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1743  * constructed by the caller.
1744  */
1745 void
1746 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1747     ip_recv_attr_t *ira)
1748 {
1749 	icmp6_t	icmp6;
1750 
1751 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1752 	if (mp == NULL)
1753 		return;
1754 
1755 	bzero(&icmp6, sizeof (icmp6_t));
1756 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
1757 	icmp6.icmp6_code = code;
1758 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1759 }
1760 
1761 /*
1762  * Generate an ICMP pkt too big message.
1763  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1764  * constructed by the caller.
1765  */
1766 void
1767 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1768     ip_recv_attr_t *ira)
1769 {
1770 	icmp6_t	icmp6;
1771 
1772 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1773 	if (mp == NULL)
1774 		return;
1775 
1776 	bzero(&icmp6, sizeof (icmp6_t));
1777 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1778 	icmp6.icmp6_code = 0;
1779 	icmp6.icmp6_mtu = htonl(mtu);
1780 
1781 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1782 }
1783 
1784 /*
1785  * Generate an ICMP parameter problem message. (May be called as writer.)
1786  * 'offset' is the offset from the beginning of the packet in error.
1787  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1788  * constructed by the caller.
1789  */
1790 static void
1791 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1792     boolean_t mcast_ok, ip_recv_attr_t *ira)
1793 {
1794 	icmp6_t	icmp6;
1795 
1796 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1797 	if (mp == NULL)
1798 		return;
1799 
1800 	bzero((char *)&icmp6, sizeof (icmp6_t));
1801 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
1802 	icmp6.icmp6_code = code;
1803 	icmp6.icmp6_pptr = htonl(offset);
1804 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1805 }
1806 
1807 void
1808 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1809     ip_recv_attr_t *ira)
1810 {
1811 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
1812 	uint16_t	hdr_length;
1813 	uint8_t		*nexthdrp;
1814 	uint32_t	offset;
1815 	ill_t		*ill = ira->ira_ill;
1816 
1817 	/* Determine the offset of the bad nexthdr value */
1818 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
1819 		/* Malformed packet */
1820 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1821 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
1822 		freemsg(mp);
1823 		return;
1824 	}
1825 
1826 	offset = nexthdrp - mp->b_rptr;
1827 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1828 	    mcast_ok, ira);
1829 }
1830 
1831 /*
1832  * Verify whether or not the IP address is a valid local address.
1833  * Could be a unicast, including one for a down interface.
1834  * If allow_mcbc then a multicast or broadcast address is also
1835  * acceptable.
1836  *
1837  * In the case of a multicast address, however, the
1838  * upper protocol is expected to reset the src address
1839  * to zero when we return IPVL_MCAST so that
1840  * no packets are emitted with multicast address as
1841  * source address.
1842  * The addresses valid for bind are:
1843  *	(1) - in6addr_any
1844  *	(2) - IP address of an UP interface
1845  *	(3) - IP address of a DOWN interface
1846  *	(4) - a multicast address. In this case
1847  *	the conn will only receive packets destined to
1848  *	the specified multicast address. Note: the
1849  *	application still has to issue an
1850  *	IPV6_JOIN_GROUP socket option.
1851  *
1852  * In all the above cases, the bound address must be valid in the current zone.
1853  * When the address is loopback or multicast, there might be many matching IREs
1854  * so bind has to look up based on the zone.
1855  */
1856 ip_laddr_t
1857 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1858     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1859 {
1860 	ire_t		*src_ire;
1861 	uint_t		match_flags;
1862 	ill_t		*ill = NULL;
1863 
1864 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1865 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1866 
1867 	match_flags = MATCH_IRE_ZONEONLY;
1868 	if (scopeid != 0) {
1869 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1870 		if (ill == NULL)
1871 			return (IPVL_BAD);
1872 		match_flags |= MATCH_IRE_ILL;
1873 	}
1874 
1875 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1876 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1877 	if (ill != NULL)
1878 		ill_refrele(ill);
1879 
1880 	/*
1881 	 * If an address other than in6addr_any is requested,
1882 	 * we verify that it is a valid address for bind
1883 	 * Note: Following code is in if-else-if form for
1884 	 * readability compared to a condition check.
1885 	 */
1886 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1887 		/*
1888 		 * (2) Bind to address of local UP interface
1889 		 */
1890 		ire_refrele(src_ire);
1891 		return (IPVL_UNICAST_UP);
1892 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1893 		/* (4) bind to multicast address. */
1894 		if (src_ire != NULL)
1895 			ire_refrele(src_ire);
1896 
1897 		/*
1898 		 * Note: caller should take IPV6_MULTICAST_IF
1899 		 * into account when selecting a real source address.
1900 		 */
1901 		if (allow_mcbc)
1902 			return (IPVL_MCAST);
1903 		else
1904 			return (IPVL_BAD);
1905 	} else {
1906 		ipif_t *ipif;
1907 
1908 		/*
1909 		 * (3) Bind to address of local DOWN interface?
1910 		 * (ipif_lookup_addr() looks up all interfaces
1911 		 * but we do not get here for UP interfaces
1912 		 * - case (2) above)
1913 		 */
1914 		if (src_ire != NULL)
1915 			ire_refrele(src_ire);
1916 
1917 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1918 		if (ipif == NULL)
1919 			return (IPVL_BAD);
1920 
1921 		/* Not a useful source? */
1922 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1923 			ipif_refrele(ipif);
1924 			return (IPVL_BAD);
1925 		}
1926 		ipif_refrele(ipif);
1927 		return (IPVL_UNICAST_DOWN);
1928 	}
1929 }
1930 
1931 /*
1932  * Verify that both the source and destination addresses are valid.  If
1933  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1934  * i.e. have no route to it.  Protocols like TCP want to verify destination
1935  * reachability, while tunnels do not.
1936  *
1937  * Determine the route, the interface, and (optionally) the source address
1938  * to use to reach a given destination.
1939  * Note that we allow connect to broadcast and multicast addresses when
1940  * IPDF_ALLOW_MCBC is set.
1941  * first_hop and dst_addr are normally the same, but if source routing
1942  * they will differ; in that case the first_hop is what we'll use for the
1943  * routing lookup but the dce and label checks will be done on dst_addr,
1944  *
1945  * If uinfo is set, then we fill in the best available information
1946  * we have for the destination. This is based on (in priority order) any
1947  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1948  * ill_mtu/ill_mc_mtu.
1949  *
1950  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1951  * always do the label check on dst_addr.
1952  *
1953  * Assumes that the caller has set ixa_scopeid for link-local communication.
1954  */
1955 int
1956 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1957     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1958     uint32_t flags, uint_t mac_mode)
1959 {
1960 	ire_t		*ire;
1961 	int		error = 0;
1962 	in6_addr_t	setsrc;				/* RTF_SETSRC */
1963 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
1964 	ip_stack_t	*ipst = ixa->ixa_ipst;
1965 	dce_t		*dce;
1966 	uint_t		pmtu;
1967 	uint_t		ifindex;
1968 	uint_t		generation;
1969 	nce_t		*nce;
1970 	ill_t		*ill = NULL;
1971 	boolean_t	multirt = B_FALSE;
1972 
1973 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1974 
1975 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1976 
1977 	/*
1978 	 * We never send to zero; the ULPs map it to the loopback address.
1979 	 * We can't allow it since we use zero to mean unitialized in some
1980 	 * places.
1981 	 */
1982 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1983 
1984 	if (is_system_labeled()) {
1985 		ts_label_t *tsl = NULL;
1986 
1987 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1988 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1989 		if (error != 0)
1990 			return (error);
1991 		if (tsl != NULL) {
1992 			/* Update the label */
1993 			ip_xmit_attr_replace_tsl(ixa, tsl);
1994 		}
1995 	}
1996 
1997 	setsrc = ipv6_all_zeros;
1998 	/*
1999 	 * Select a route; For IPMP interfaces, we would only select
2000 	 * a "hidden" route (i.e., going through a specific under_ill)
2001 	 * if ixa_ifindex has been specified.
2002 	 */
2003 	ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2004 	    &setsrc, &error, &multirt);
2005 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
2006 	if (error != 0)
2007 		goto bad_addr;
2008 
2009 	/*
2010 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2011 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2012 	 * Otherwise the destination needn't be reachable.
2013 	 *
2014 	 * If we match on a reject or black hole, then we've got a
2015 	 * local failure.  May as well fail out the connect() attempt,
2016 	 * since it's never going to succeed.
2017 	 */
2018 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2019 		/*
2020 		 * If we're verifying destination reachability, we always want
2021 		 * to complain here.
2022 		 *
2023 		 * If we're not verifying destination reachability but the
2024 		 * destination has a route, we still want to fail on the
2025 		 * temporary address and broadcast address tests.
2026 		 *
2027 		 * In both cases do we let the code continue so some reasonable
2028 		 * information is returned to the caller. That enables the
2029 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
2030 		 * use the generation mismatch path to check for the unreachable
2031 		 * case thereby avoiding any specific check in the main path.
2032 		 */
2033 		ASSERT(generation == IRE_GENERATION_VERIFY);
2034 		if (flags & IPDF_VERIFY_DST) {
2035 			/*
2036 			 * Set errno but continue to set up ixa_ire to be
2037 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2038 			 * That allows callers to use ip_output to get an
2039 			 * ICMP error back.
2040 			 */
2041 			if (!(ire->ire_type & IRE_HOST))
2042 				error = ENETUNREACH;
2043 			else
2044 				error = EHOSTUNREACH;
2045 		}
2046 	}
2047 
2048 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2049 	    !(flags & IPDF_ALLOW_MCBC)) {
2050 		ire_refrele(ire);
2051 		ire = ire_reject(ipst, B_FALSE);
2052 		generation = IRE_GENERATION_VERIFY;
2053 		error = ENETUNREACH;
2054 	}
2055 
2056 	/* Cache things */
2057 	if (ixa->ixa_ire != NULL)
2058 		ire_refrele_notr(ixa->ixa_ire);
2059 #ifdef DEBUG
2060 	ire_refhold_notr(ire);
2061 	ire_refrele(ire);
2062 #endif
2063 	ixa->ixa_ire = ire;
2064 	ixa->ixa_ire_generation = generation;
2065 
2066 	/*
2067 	 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2068 	 * since some callers will send a packet to conn_ip_output() even if
2069 	 * there's an error.
2070 	 */
2071 	ifindex = 0;
2072 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2073 		/* If we are creating a DCE we'd better have an ifindex */
2074 		if (ill != NULL)
2075 			ifindex = ill->ill_phyint->phyint_ifindex;
2076 		else
2077 			flags &= ~IPDF_UNIQUE_DCE;
2078 	}
2079 
2080 	if (flags & IPDF_UNIQUE_DCE) {
2081 		/* Fallback to the default dce if allocation fails */
2082 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2083 		if (dce != NULL) {
2084 			generation = dce->dce_generation;
2085 		} else {
2086 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2087 			    &generation);
2088 		}
2089 	} else {
2090 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2091 	}
2092 	ASSERT(dce != NULL);
2093 	if (ixa->ixa_dce != NULL)
2094 		dce_refrele_notr(ixa->ixa_dce);
2095 #ifdef DEBUG
2096 	dce_refhold_notr(dce);
2097 	dce_refrele(dce);
2098 #endif
2099 	ixa->ixa_dce = dce;
2100 	ixa->ixa_dce_generation = generation;
2101 
2102 
2103 	/*
2104 	 * For multicast with multirt we have a flag passed back from
2105 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2106 	 * possible multicast address.
2107 	 * We also need a flag for multicast since we can't check
2108 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2109 	 */
2110 	if (multirt) {
2111 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2112 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2113 	} else {
2114 		ixa->ixa_postfragfn = ire->ire_postfragfn;
2115 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2116 	}
2117 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2118 		/* Get an nce to cache. */
2119 		nce = ire_to_nce(ire, 0, firsthop);
2120 		if (nce == NULL) {
2121 			/* Allocation failure? */
2122 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2123 		} else {
2124 			if (ixa->ixa_nce != NULL)
2125 				nce_refrele(ixa->ixa_nce);
2126 			ixa->ixa_nce = nce;
2127 		}
2128 	}
2129 
2130 	/*
2131 	 * If the source address is a loopback address, the
2132 	 * destination had best be local or multicast.
2133 	 * If we are sending to an IRE_LOCAL using a loopback source then
2134 	 * it had better be the same zoneid.
2135 	 */
2136 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2137 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2138 			ire = NULL;	/* Stored in ixa_ire */
2139 			error = EADDRNOTAVAIL;
2140 			goto bad_addr;
2141 		}
2142 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2143 			ire = NULL;	/* Stored in ixa_ire */
2144 			error = EADDRNOTAVAIL;
2145 			goto bad_addr;
2146 		}
2147 	}
2148 
2149 	/*
2150 	 * Does the caller want us to pick a source address?
2151 	 */
2152 	if (flags & IPDF_SELECT_SRC) {
2153 		in6_addr_t	src_addr;
2154 
2155 		/*
2156 		 * We use use ire_nexthop_ill to avoid the under ipmp
2157 		 * interface for source address selection. Note that for ipmp
2158 		 * probe packets, ixa_ifindex would have been specified, and
2159 		 * the ip_select_route() invocation would have picked an ire
2160 		 * will ire_ill pointing at an under interface.
2161 		 */
2162 		ill = ire_nexthop_ill(ire);
2163 
2164 		/* If unreachable we have no ill but need some source */
2165 		if (ill == NULL) {
2166 			src_addr = ipv6_loopback;
2167 			/* Make sure we look for a better source address */
2168 			generation = SRC_GENERATION_VERIFY;
2169 		} else {
2170 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
2171 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2172 			    &src_addr, &generation, NULL);
2173 			if (error != 0) {
2174 				ire = NULL;	/* Stored in ixa_ire */
2175 				goto bad_addr;
2176 			}
2177 		}
2178 
2179 		/*
2180 		 * We allow the source address to to down.
2181 		 * However, we check that we don't use the loopback address
2182 		 * as a source when sending out on the wire.
2183 		 */
2184 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2185 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2186 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2187 			ire = NULL;	/* Stored in ixa_ire */
2188 			error = EADDRNOTAVAIL;
2189 			goto bad_addr;
2190 		}
2191 
2192 		*src_addrp = src_addr;
2193 		ixa->ixa_src_generation = generation;
2194 	}
2195 
2196 	/*
2197 	 * Make sure we don't leave an unreachable ixa_nce in place
2198 	 * since ip_select_route is used when we unplumb i.e., remove
2199 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2200 	 */
2201 	nce = ixa->ixa_nce;
2202 	if (nce != NULL && nce->nce_is_condemned) {
2203 		nce_refrele(nce);
2204 		ixa->ixa_nce = NULL;
2205 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2206 	}
2207 
2208 	/*
2209 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2210 	 * multicast. But pmtu discovery is only enabled for connected
2211 	 * sockets in general.
2212 	 */
2213 
2214 	/*
2215 	 * Set initial value for fragmentation limit.  Either conn_ip_output
2216 	 * or ULP might updates it when there are routing changes.
2217 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2218 	 */
2219 	pmtu = ip_get_pmtu(ixa);
2220 	ixa->ixa_fragsize = pmtu;
2221 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
2222 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2223 		ixa->ixa_pmtu = pmtu;
2224 
2225 	/*
2226 	 * Extract information useful for some transports.
2227 	 * First we look for DCE metrics. Then we take what we have in
2228 	 * the metrics in the route, where the offlink is used if we have
2229 	 * one.
2230 	 */
2231 	if (uinfo != NULL) {
2232 		bzero(uinfo, sizeof (*uinfo));
2233 
2234 		if (dce->dce_flags & DCEF_UINFO)
2235 			*uinfo = dce->dce_uinfo;
2236 
2237 		rts_merge_metrics(uinfo, &ire->ire_metrics);
2238 
2239 		/* Allow ire_metrics to decrease the path MTU from above */
2240 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2241 			uinfo->iulp_mtu = pmtu;
2242 
2243 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2244 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2245 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2246 	}
2247 
2248 	if (ill != NULL)
2249 		ill_refrele(ill);
2250 
2251 	return (error);
2252 
2253 bad_addr:
2254 	if (ire != NULL)
2255 		ire_refrele(ire);
2256 
2257 	if (ill != NULL)
2258 		ill_refrele(ill);
2259 
2260 	/*
2261 	 * Make sure we don't leave an unreachable ixa_nce in place
2262 	 * since ip_select_route is used when we unplumb i.e., remove
2263 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2264 	 */
2265 	nce = ixa->ixa_nce;
2266 	if (nce != NULL && nce->nce_is_condemned) {
2267 		nce_refrele(nce);
2268 		ixa->ixa_nce = NULL;
2269 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2270 	}
2271 
2272 	return (error);
2273 }
2274 
2275 /*
2276  * Handle protocols with which IP is less intimate.  There
2277  * can be more than one stream bound to a particular
2278  * protocol.  When this is the case, normally each one gets a copy
2279  * of any incoming packets.
2280  *
2281  * Zones notes:
2282  * Packets will be distributed to conns in all zones. This is really only
2283  * useful for ICMPv6 as only applications in the global zone can create raw
2284  * sockets for other protocols.
2285  */
2286 void
2287 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2288 {
2289 	mblk_t		*mp1;
2290 	in6_addr_t	laddr = ip6h->ip6_dst;
2291 	conn_t		*connp, *first_connp, *next_connp;
2292 	connf_t		*connfp;
2293 	ill_t		*ill = ira->ira_ill;
2294 	ip_stack_t	*ipst = ill->ill_ipst;
2295 
2296 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2297 	mutex_enter(&connfp->connf_lock);
2298 	connp = connfp->connf_head;
2299 	for (connp = connfp->connf_head; connp != NULL;
2300 	    connp = connp->conn_next) {
2301 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2302 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2303 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2304 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2305 			break;
2306 	}
2307 
2308 	if (connp == NULL) {
2309 		/*
2310 		 * No one bound to this port.  Is
2311 		 * there a client that wants all
2312 		 * unclaimed datagrams?
2313 		 */
2314 		mutex_exit(&connfp->connf_lock);
2315 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2316 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
2317 		return;
2318 	}
2319 
2320 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2321 
2322 	CONN_INC_REF(connp);
2323 	first_connp = connp;
2324 
2325 	/*
2326 	 * XXX: Fix the multiple protocol listeners case. We should not
2327 	 * be walking the conn->conn_next list here.
2328 	 */
2329 	connp = connp->conn_next;
2330 	for (;;) {
2331 		while (connp != NULL) {
2332 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2333 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2334 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2335 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2336 			    ira, connp)))
2337 				break;
2338 			connp = connp->conn_next;
2339 		}
2340 
2341 		if (connp == NULL) {
2342 			/* No more interested clients */
2343 			connp = first_connp;
2344 			break;
2345 		}
2346 		if (((mp1 = dupmsg(mp)) == NULL) &&
2347 		    ((mp1 = copymsg(mp)) == NULL)) {
2348 			/* Memory allocation failed */
2349 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2350 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
2351 			connp = first_connp;
2352 			break;
2353 		}
2354 
2355 		CONN_INC_REF(connp);
2356 		mutex_exit(&connfp->connf_lock);
2357 
2358 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2359 		    ira);
2360 
2361 		mutex_enter(&connfp->connf_lock);
2362 		/* Follow the next pointer before releasing the conn. */
2363 		next_connp = connp->conn_next;
2364 		CONN_DEC_REF(connp);
2365 		connp = next_connp;
2366 	}
2367 
2368 	/* Last one.  Send it upstream. */
2369 	mutex_exit(&connfp->connf_lock);
2370 
2371 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2372 
2373 	CONN_DEC_REF(connp);
2374 }
2375 
2376 /*
2377  * Called when it is conceptually a ULP that would sent the packet
2378  * e.g., port unreachable and nexthdr unknown. Check that the packet
2379  * would have passed the IPsec global policy before sending the error.
2380  *
2381  * Send an ICMP error after patching up the packet appropriately.
2382  * Uses ip_drop_input and bumps the appropriate MIB.
2383  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2384  */
2385 void
2386 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2387     ip_recv_attr_t *ira)
2388 {
2389 	ip6_t		*ip6h;
2390 	boolean_t	secure;
2391 	ill_t		*ill = ira->ira_ill;
2392 	ip_stack_t	*ipst = ill->ill_ipst;
2393 	netstack_t	*ns = ipst->ips_netstack;
2394 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2395 
2396 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2397 
2398 	/*
2399 	 * We are generating an icmp error for some inbound packet.
2400 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
2401 	 * Before we generate an error, check with global policy
2402 	 * to see whether this is allowed to enter the system. As
2403 	 * there is no "conn", we are checking with global policy.
2404 	 */
2405 	ip6h = (ip6_t *)mp->b_rptr;
2406 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
2407 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2408 		if (mp == NULL)
2409 			return;
2410 	}
2411 
2412 	/* We never send errors for protocols that we do implement */
2413 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
2414 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2415 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2416 		freemsg(mp);
2417 		return;
2418 	}
2419 
2420 	switch (icmp_type) {
2421 	case ICMP6_DST_UNREACH:
2422 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2423 
2424 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2425 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
2426 
2427 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2428 		break;
2429 	case ICMP6_PARAM_PROB:
2430 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2431 
2432 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2433 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2434 
2435 		/* Let the system determine the offset for this one */
2436 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2437 		break;
2438 	default:
2439 #ifdef DEBUG
2440 		panic("ip_fanout_send_icmp_v6: wrong type");
2441 		/*NOTREACHED*/
2442 #else
2443 		freemsg(mp);
2444 		break;
2445 #endif
2446 	}
2447 }
2448 
2449 /*
2450  * Fanout for UDP packets that are multicast or ICMP errors.
2451  * (Unicast fanout is handled in ip_input_v6.)
2452  *
2453  * If SO_REUSEADDR is set all multicast packets
2454  * will be delivered to all conns bound to the same port.
2455  *
2456  * Fanout for UDP packets.
2457  * The caller puts <fport, lport> in the ports parameter.
2458  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2459  *
2460  * If SO_REUSEADDR is set all multicast and broadcast packets
2461  * will be delivered to all conns bound to the same port.
2462  *
2463  * Zones notes:
2464  * Earlier in ip_input on a system with multiple shared-IP zones we
2465  * duplicate the multicast and broadcast packets and send them up
2466  * with each explicit zoneid that exists on that ill.
2467  * This means that here we can match the zoneid with SO_ALLZONES being special.
2468  */
2469 void
2470 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2471     ip_recv_attr_t *ira)
2472 {
2473 	in6_addr_t	laddr;
2474 	conn_t		*connp;
2475 	connf_t		*connfp;
2476 	in6_addr_t	faddr;
2477 	ill_t		*ill = ira->ira_ill;
2478 	ip_stack_t	*ipst = ill->ill_ipst;
2479 
2480 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2481 
2482 	laddr = ip6h->ip6_dst;
2483 	faddr = ip6h->ip6_src;
2484 
2485 	/* Attempt to find a client stream based on destination port. */
2486 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2487 	mutex_enter(&connfp->connf_lock);
2488 	connp = connfp->connf_head;
2489 	while (connp != NULL) {
2490 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2491 		    conn_wantpacket_v6(connp, ira, ip6h) &&
2492 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2493 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2494 			break;
2495 		connp = connp->conn_next;
2496 	}
2497 
2498 	if (connp == NULL)
2499 		goto notfound;
2500 
2501 	CONN_INC_REF(connp);
2502 
2503 	if (connp->conn_reuseaddr) {
2504 		conn_t		*first_connp = connp;
2505 		conn_t		*next_connp;
2506 		mblk_t		*mp1;
2507 
2508 		connp = connp->conn_next;
2509 		for (;;) {
2510 			while (connp != NULL) {
2511 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2512 				    fport, faddr) &&
2513 				    conn_wantpacket_v6(connp, ira, ip6h) &&
2514 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2515 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2516 				    ira, connp)))
2517 					break;
2518 				connp = connp->conn_next;
2519 			}
2520 			if (connp == NULL) {
2521 				/* No more interested clients */
2522 				connp = first_connp;
2523 				break;
2524 			}
2525 			if (((mp1 = dupmsg(mp)) == NULL) &&
2526 			    ((mp1 = copymsg(mp)) == NULL)) {
2527 				/* Memory allocation failed */
2528 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2529 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
2530 				connp = first_connp;
2531 				break;
2532 			}
2533 
2534 			CONN_INC_REF(connp);
2535 			mutex_exit(&connfp->connf_lock);
2536 
2537 			IP6_STAT(ipst, ip6_udp_fanmb);
2538 			ip_fanout_udp_conn(connp, mp1, NULL,
2539 			    (ip6_t *)mp1->b_rptr, ira);
2540 
2541 			mutex_enter(&connfp->connf_lock);
2542 			/* Follow the next pointer before releasing the conn. */
2543 			next_connp = connp->conn_next;
2544 			IP6_STAT(ipst, ip6_udp_fanmb);
2545 			CONN_DEC_REF(connp);
2546 			connp = next_connp;
2547 		}
2548 	}
2549 
2550 	/* Last one.  Send it upstream. */
2551 	mutex_exit(&connfp->connf_lock);
2552 
2553 	IP6_STAT(ipst, ip6_udp_fanmb);
2554 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2555 	CONN_DEC_REF(connp);
2556 	return;
2557 
2558 notfound:
2559 	mutex_exit(&connfp->connf_lock);
2560 	/*
2561 	 * No one bound to this port.  Is
2562 	 * there a client that wants all
2563 	 * unclaimed datagrams?
2564 	 */
2565 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2566 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
2567 		ip_fanout_proto_v6(mp, ip6h, ira);
2568 	} else {
2569 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2570 		    ICMP6_DST_UNREACH_NOPORT, ira);
2571 	}
2572 }
2573 
2574 /*
2575  * int ip_find_hdr_v6()
2576  *
2577  * This routine is used by the upper layer protocols, iptun, and IPsec:
2578  * - Set extension header pointers to appropriate locations
2579  * - Determine IPv6 header length and return it
2580  * - Return a pointer to the last nexthdr value
2581  *
2582  * The caller must initialize ipp_fields.
2583  * The upper layer protocols normally set label_separate which makes the
2584  * routine put the TX label in ipp_label_v6. If this is not set then
2585  * the hop-by-hop options including the label are placed in ipp_hopopts.
2586  *
2587  * NOTE: If multiple extension headers of the same type are present,
2588  * ip_find_hdr_v6() will set the respective extension header pointers
2589  * to the first one that it encounters in the IPv6 header.  It also
2590  * skips fragment headers.  This routine deals with malformed packets
2591  * of various sorts in which case the returned length is up to the
2592  * malformed part.
2593  */
2594 int
2595 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2596     uint8_t *nexthdrp)
2597 {
2598 	uint_t	length, ehdrlen;
2599 	uint8_t nexthdr;
2600 	uint8_t *whereptr, *endptr;
2601 	ip6_dest_t *tmpdstopts;
2602 	ip6_rthdr_t *tmprthdr;
2603 	ip6_hbh_t *tmphopopts;
2604 	ip6_frag_t *tmpfraghdr;
2605 
2606 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2607 	ipp->ipp_hoplimit = ip6h->ip6_hops;
2608 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2609 	ipp->ipp_addr = ip6h->ip6_dst;
2610 
2611 	length = IPV6_HDR_LEN;
2612 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2613 	endptr = mp->b_wptr;
2614 
2615 	nexthdr = ip6h->ip6_nxt;
2616 	while (whereptr < endptr) {
2617 		/* Is there enough left for len + nexthdr? */
2618 		if (whereptr + MIN_EHDR_LEN > endptr)
2619 			goto done;
2620 
2621 		switch (nexthdr) {
2622 		case IPPROTO_HOPOPTS: {
2623 			/* We check for any CIPSO */
2624 			uchar_t *secopt;
2625 			boolean_t hbh_needed;
2626 			uchar_t *after_secopt;
2627 
2628 			tmphopopts = (ip6_hbh_t *)whereptr;
2629 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2630 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2631 				goto done;
2632 			nexthdr = tmphopopts->ip6h_nxt;
2633 
2634 			if (!label_separate) {
2635 				secopt = NULL;
2636 				after_secopt = whereptr;
2637 			} else {
2638 				/*
2639 				 * We have dropped packets with bad options in
2640 				 * ip6_input. No need to check return value
2641 				 * here.
2642 				 */
2643 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
2644 				    &secopt, &after_secopt, &hbh_needed);
2645 			}
2646 			if (secopt != NULL && after_secopt - whereptr > 0) {
2647 				ipp->ipp_fields |= IPPF_LABEL_V6;
2648 				ipp->ipp_label_v6 = secopt;
2649 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
2650 			} else {
2651 				ipp->ipp_label_len_v6 = 0;
2652 				after_secopt = whereptr;
2653 				hbh_needed = B_TRUE;
2654 			}
2655 			/* return only 1st hbh */
2656 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2657 				ipp->ipp_fields |= IPPF_HOPOPTS;
2658 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2659 				ipp->ipp_hopoptslen = ehdrlen -
2660 				    ipp->ipp_label_len_v6;
2661 			}
2662 			break;
2663 		}
2664 		case IPPROTO_DSTOPTS:
2665 			tmpdstopts = (ip6_dest_t *)whereptr;
2666 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2667 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2668 				goto done;
2669 			nexthdr = tmpdstopts->ip6d_nxt;
2670 			/*
2671 			 * ipp_dstopts is set to the destination header after a
2672 			 * routing header.
2673 			 * Assume it is a post-rthdr destination header
2674 			 * and adjust when we find an rthdr.
2675 			 */
2676 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2677 				ipp->ipp_fields |= IPPF_DSTOPTS;
2678 				ipp->ipp_dstopts = tmpdstopts;
2679 				ipp->ipp_dstoptslen = ehdrlen;
2680 			}
2681 			break;
2682 		case IPPROTO_ROUTING:
2683 			tmprthdr = (ip6_rthdr_t *)whereptr;
2684 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2685 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2686 				goto done;
2687 			nexthdr = tmprthdr->ip6r_nxt;
2688 			/* return only 1st rthdr */
2689 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2690 				ipp->ipp_fields |= IPPF_RTHDR;
2691 				ipp->ipp_rthdr = tmprthdr;
2692 				ipp->ipp_rthdrlen = ehdrlen;
2693 			}
2694 			/*
2695 			 * Make any destination header we've seen be a
2696 			 * pre-rthdr destination header.
2697 			 */
2698 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
2699 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2700 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2701 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2702 				ipp->ipp_dstopts = NULL;
2703 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2704 				ipp->ipp_dstoptslen = 0;
2705 			}
2706 			break;
2707 		case IPPROTO_FRAGMENT:
2708 			tmpfraghdr = (ip6_frag_t *)whereptr;
2709 			ehdrlen = sizeof (ip6_frag_t);
2710 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2711 				goto done;
2712 			nexthdr = tmpfraghdr->ip6f_nxt;
2713 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2714 				ipp->ipp_fields |= IPPF_FRAGHDR;
2715 				ipp->ipp_fraghdr = tmpfraghdr;
2716 				ipp->ipp_fraghdrlen = ehdrlen;
2717 			}
2718 			break;
2719 		case IPPROTO_NONE:
2720 		default:
2721 			goto done;
2722 		}
2723 		length += ehdrlen;
2724 		whereptr += ehdrlen;
2725 	}
2726 done:
2727 	if (nexthdrp != NULL)
2728 		*nexthdrp = nexthdr;
2729 	return (length);
2730 }
2731 
2732 /*
2733  * Try to determine where and what are the IPv6 header length and
2734  * pointer to nexthdr value for the upper layer protocol (or an
2735  * unknown next hdr).
2736  *
2737  * Parameters returns a pointer to the nexthdr value;
2738  * Must handle malformed packets of various sorts.
2739  * Function returns failure for malformed cases.
2740  */
2741 boolean_t
2742 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2743     uint8_t **nexthdrpp)
2744 {
2745 	uint16_t length;
2746 	uint_t	ehdrlen;
2747 	uint8_t	*nexthdrp;
2748 	uint8_t *whereptr;
2749 	uint8_t *endptr;
2750 	ip6_dest_t *desthdr;
2751 	ip6_rthdr_t *rthdr;
2752 	ip6_frag_t *fraghdr;
2753 
2754 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2755 	length = IPV6_HDR_LEN;
2756 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2757 	endptr = mp->b_wptr;
2758 
2759 	nexthdrp = &ip6h->ip6_nxt;
2760 	while (whereptr < endptr) {
2761 		/* Is there enough left for len + nexthdr? */
2762 		if (whereptr + MIN_EHDR_LEN > endptr)
2763 			break;
2764 
2765 		switch (*nexthdrp) {
2766 		case IPPROTO_HOPOPTS:
2767 		case IPPROTO_DSTOPTS:
2768 			/* Assumes the headers are identical for hbh and dst */
2769 			desthdr = (ip6_dest_t *)whereptr;
2770 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
2771 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
2772 				return (B_FALSE);
2773 			nexthdrp = &desthdr->ip6d_nxt;
2774 			break;
2775 		case IPPROTO_ROUTING:
2776 			rthdr = (ip6_rthdr_t *)whereptr;
2777 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
2778 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
2779 				return (B_FALSE);
2780 			nexthdrp = &rthdr->ip6r_nxt;
2781 			break;
2782 		case IPPROTO_FRAGMENT:
2783 			fraghdr = (ip6_frag_t *)whereptr;
2784 			ehdrlen = sizeof (ip6_frag_t);
2785 			if ((uchar_t *)&fraghdr[1] > endptr)
2786 				return (B_FALSE);
2787 			nexthdrp = &fraghdr->ip6f_nxt;
2788 			break;
2789 		case IPPROTO_NONE:
2790 			/* No next header means we're finished */
2791 		default:
2792 			*hdr_length_ptr = length;
2793 			*nexthdrpp = nexthdrp;
2794 			return (B_TRUE);
2795 		}
2796 		length += ehdrlen;
2797 		whereptr += ehdrlen;
2798 		*hdr_length_ptr = length;
2799 		*nexthdrpp = nexthdrp;
2800 	}
2801 	switch (*nexthdrp) {
2802 	case IPPROTO_HOPOPTS:
2803 	case IPPROTO_DSTOPTS:
2804 	case IPPROTO_ROUTING:
2805 	case IPPROTO_FRAGMENT:
2806 		/*
2807 		 * If any know extension headers are still to be processed,
2808 		 * the packet's malformed (or at least all the IP header(s) are
2809 		 * not in the same mblk - and that should never happen.
2810 		 */
2811 		return (B_FALSE);
2812 
2813 	default:
2814 		/*
2815 		 * If we get here, we know that all of the IP headers were in
2816 		 * the same mblk, even if the ULP header is in the next mblk.
2817 		 */
2818 		*hdr_length_ptr = length;
2819 		*nexthdrpp = nexthdrp;
2820 		return (B_TRUE);
2821 	}
2822 }
2823 
2824 /*
2825  * Return the length of the IPv6 related headers (including extension headers)
2826  * Returns a length even if the packet is malformed.
2827  */
2828 int
2829 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2830 {
2831 	uint16_t hdr_len;
2832 	uint8_t	*nexthdrp;
2833 
2834 	(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2835 	return (hdr_len);
2836 }
2837 
2838 /*
2839  * Parse and process any hop-by-hop or destination options.
2840  *
2841  * Assumes that q is an ill read queue so that ICMP errors for link-local
2842  * destinations are sent out the correct interface.
2843  *
2844  * Returns -1 if there was an error and mp has been consumed.
2845  * Returns 0 if no special action is needed.
2846  * Returns 1 if the packet contained a router alert option for this node
2847  * which is verified to be "interesting/known" for our implementation.
2848  *
2849  * XXX Note: In future as more hbh or dest options are defined,
2850  * it may be better to have different routines for hbh and dest
2851  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2852  * may have same value in different namespaces. Or is it same namespace ??
2853  * Current code checks for each opt_type (other than pads) if it is in
2854  * the expected  nexthdr (hbh or dest)
2855  */
2856 int
2857 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2858     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2859 {
2860 	uint8_t opt_type;
2861 	uint_t optused;
2862 	int ret = 0;
2863 	const char *errtype;
2864 	ill_t		*ill = ira->ira_ill;
2865 	ip_stack_t	*ipst = ill->ill_ipst;
2866 
2867 	while (optlen != 0) {
2868 		opt_type = *optptr;
2869 		if (opt_type == IP6OPT_PAD1) {
2870 			optused = 1;
2871 		} else {
2872 			if (optlen < 2)
2873 				goto bad_opt;
2874 			errtype = "malformed";
2875 			if (opt_type == ip6opt_ls) {
2876 				optused = 2 + optptr[1];
2877 				if (optused > optlen)
2878 					goto bad_opt;
2879 			} else switch (opt_type) {
2880 			case IP6OPT_PADN:
2881 				/*
2882 				 * Note:We don't verify that (N-2) pad octets
2883 				 * are zero as required by spec. Adhere to
2884 				 * "be liberal in what you accept..." part of
2885 				 * implementation philosophy (RFC791,RFC1122)
2886 				 */
2887 				optused = 2 + optptr[1];
2888 				if (optused > optlen)
2889 					goto bad_opt;
2890 				break;
2891 
2892 			case IP6OPT_JUMBO:
2893 				if (hdr_type != IPPROTO_HOPOPTS)
2894 					goto opt_error;
2895 				goto opt_error; /* XXX Not implemented! */
2896 
2897 			case IP6OPT_ROUTER_ALERT: {
2898 				struct ip6_opt_router *or;
2899 
2900 				if (hdr_type != IPPROTO_HOPOPTS)
2901 					goto opt_error;
2902 				optused = 2 + optptr[1];
2903 				if (optused > optlen)
2904 					goto bad_opt;
2905 				or = (struct ip6_opt_router *)optptr;
2906 				/* Check total length and alignment */
2907 				if (optused != sizeof (*or) ||
2908 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
2909 					goto opt_error;
2910 				/* Check value */
2911 				switch (*((uint16_t *)or->ip6or_value)) {
2912 				case IP6_ALERT_MLD:
2913 				case IP6_ALERT_RSVP:
2914 					ret = 1;
2915 				}
2916 				break;
2917 			}
2918 			case IP6OPT_HOME_ADDRESS: {
2919 				/*
2920 				 * Minimal support for the home address option
2921 				 * (which is required by all IPv6 nodes).
2922 				 * Implement by just swapping the home address
2923 				 * and source address.
2924 				 * XXX Note: this has IPsec implications since
2925 				 * AH needs to take this into account.
2926 				 * Also, when IPsec is used we need to ensure
2927 				 * that this is only processed once
2928 				 * in the received packet (to avoid swapping
2929 				 * back and forth).
2930 				 * NOTE:This option processing is considered
2931 				 * to be unsafe and prone to a denial of
2932 				 * service attack.
2933 				 * The current processing is not safe even with
2934 				 * IPsec secured IP packets. Since the home
2935 				 * address option processing requirement still
2936 				 * is in the IETF draft and in the process of
2937 				 * being redefined for its usage, it has been
2938 				 * decided to turn off the option by default.
2939 				 * If this section of code needs to be executed,
2940 				 * ndd variable ip6_ignore_home_address_opt
2941 				 * should be set to 0 at the user's own risk.
2942 				 */
2943 				struct ip6_opt_home_address *oh;
2944 				in6_addr_t tmp;
2945 
2946 				if (ipst->ips_ipv6_ignore_home_address_opt)
2947 					goto opt_error;
2948 
2949 				if (hdr_type != IPPROTO_DSTOPTS)
2950 					goto opt_error;
2951 				optused = 2 + optptr[1];
2952 				if (optused > optlen)
2953 					goto bad_opt;
2954 
2955 				/*
2956 				 * We did this dest. opt the first time
2957 				 * around (i.e. before AH processing).
2958 				 * If we've done AH... stop now.
2959 				 */
2960 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2961 				    ira->ira_ipsec_ah_sa != NULL)
2962 					break;
2963 
2964 				oh = (struct ip6_opt_home_address *)optptr;
2965 				/* Check total length and alignment */
2966 				if (optused < sizeof (*oh) ||
2967 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2968 					goto opt_error;
2969 				/* Swap ip6_src and the home address */
2970 				tmp = ip6h->ip6_src;
2971 				/* XXX Note: only 8 byte alignment option */
2972 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2973 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
2974 				break;
2975 			}
2976 
2977 			case IP6OPT_TUNNEL_LIMIT:
2978 				if (hdr_type != IPPROTO_DSTOPTS) {
2979 					goto opt_error;
2980 				}
2981 				optused = 2 + optptr[1];
2982 				if (optused > optlen) {
2983 					goto bad_opt;
2984 				}
2985 				if (optused != 3) {
2986 					goto opt_error;
2987 				}
2988 				break;
2989 
2990 			default:
2991 				errtype = "unknown";
2992 				/* FALLTHROUGH */
2993 			opt_error:
2994 				/* Determine which zone should send error */
2995 				switch (IP6OPT_TYPE(opt_type)) {
2996 				case IP6OPT_TYPE_SKIP:
2997 					optused = 2 + optptr[1];
2998 					if (optused > optlen)
2999 						goto bad_opt;
3000 					ip1dbg(("ip_process_options_v6: %s "
3001 					    "opt 0x%x skipped\n",
3002 					    errtype, opt_type));
3003 					break;
3004 				case IP6OPT_TYPE_DISCARD:
3005 					ip1dbg(("ip_process_options_v6: %s "
3006 					    "opt 0x%x; packet dropped\n",
3007 					    errtype, opt_type));
3008 					BUMP_MIB(ill->ill_ip_mib,
3009 					    ipIfStatsInHdrErrors);
3010 					ip_drop_input("ipIfStatsInHdrErrors",
3011 					    mp, ill);
3012 					freemsg(mp);
3013 					return (-1);
3014 				case IP6OPT_TYPE_ICMP:
3015 					BUMP_MIB(ill->ill_ip_mib,
3016 					    ipIfStatsInHdrErrors);
3017 					ip_drop_input("ipIfStatsInHdrErrors",
3018 					    mp, ill);
3019 					icmp_param_problem_v6(mp,
3020 					    ICMP6_PARAMPROB_OPTION,
3021 					    (uint32_t)(optptr -
3022 					    (uint8_t *)ip6h),
3023 					    B_FALSE, ira);
3024 					return (-1);
3025 				case IP6OPT_TYPE_FORCEICMP:
3026 					BUMP_MIB(ill->ill_ip_mib,
3027 					    ipIfStatsInHdrErrors);
3028 					ip_drop_input("ipIfStatsInHdrErrors",
3029 					    mp, ill);
3030 					icmp_param_problem_v6(mp,
3031 					    ICMP6_PARAMPROB_OPTION,
3032 					    (uint32_t)(optptr -
3033 					    (uint8_t *)ip6h),
3034 					    B_TRUE, ira);
3035 					return (-1);
3036 				default:
3037 					ASSERT(0);
3038 				}
3039 			}
3040 		}
3041 		optlen -= optused;
3042 		optptr += optused;
3043 	}
3044 	return (ret);
3045 
3046 bad_opt:
3047 	/* Determine which zone should send error */
3048 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3049 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3050 	    (uint32_t)(optptr - (uint8_t *)ip6h),
3051 	    B_FALSE, ira);
3052 	return (-1);
3053 }
3054 
3055 /*
3056  * Process a routing header that is not yet empty.
3057  * Because of RFC 5095, we now reject all route headers.
3058  */
3059 void
3060 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3061     ip_recv_attr_t *ira)
3062 {
3063 	ill_t		*ill = ira->ira_ill;
3064 	ip_stack_t	*ipst = ill->ill_ipst;
3065 
3066 	ASSERT(rth->ip6r_segleft != 0);
3067 
3068 	if (!ipst->ips_ipv6_forward_src_routed) {
3069 		/* XXX Check for source routed out same interface? */
3070 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3071 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3072 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3073 		freemsg(mp);
3074 		return;
3075 	}
3076 
3077 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3078 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3079 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3080 	    B_FALSE, ira);
3081 }
3082 
3083 /*
3084  * Read side put procedure for IPv6 module.
3085  */
3086 int
3087 ip_rput_v6(queue_t *q, mblk_t *mp)
3088 {
3089 	ill_t		*ill;
3090 
3091 	ill = (ill_t *)q->q_ptr;
3092 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3093 		union DL_primitives *dl;
3094 
3095 		dl = (union DL_primitives *)mp->b_rptr;
3096 		/*
3097 		 * Things are opening or closing - only accept DLPI
3098 		 * ack messages. If the stream is closing and ip_wsrv
3099 		 * has completed, ip_close is out of the qwait, but has
3100 		 * not yet completed qprocsoff. Don't proceed any further
3101 		 * because the ill has been cleaned up and things hanging
3102 		 * off the ill have been freed.
3103 		 */
3104 		if ((mp->b_datap->db_type != M_PCPROTO) ||
3105 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
3106 			inet_freemsg(mp);
3107 			return (0);
3108 		}
3109 	}
3110 	if (DB_TYPE(mp) == M_DATA) {
3111 		struct mac_header_info_s mhi;
3112 
3113 		ip_mdata_to_mhi(ill, mp, &mhi);
3114 		ip_input_v6(ill, NULL, mp, &mhi);
3115 	} else {
3116 		ip_rput_notdata(ill, mp);
3117 	}
3118 	return (0);
3119 }
3120 
3121 /*
3122  * Walk through the IPv6 packet in mp and see if there's an AH header
3123  * in it.  See if the AH header needs to get done before other headers in
3124  * the packet.  (Worker function for ipsec_early_ah_v6().)
3125  */
3126 #define	IPSEC_HDR_DONT_PROCESS	0
3127 #define	IPSEC_HDR_PROCESS	1
3128 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
3129 static int
3130 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3131 {
3132 	uint_t	length;
3133 	uint_t	ehdrlen;
3134 	uint8_t *whereptr;
3135 	uint8_t *endptr;
3136 	uint8_t *nexthdrp;
3137 	ip6_dest_t *desthdr;
3138 	ip6_rthdr_t *rthdr;
3139 	ip6_t	*ip6h;
3140 
3141 	/*
3142 	 * For now just pullup everything.  In general, the less pullups,
3143 	 * the better, but there's so much squirrelling through anyway,
3144 	 * it's just easier this way.
3145 	 */
3146 	if (!pullupmsg(mp, -1)) {
3147 		return (IPSEC_MEMORY_ERROR);
3148 	}
3149 
3150 	ip6h = (ip6_t *)mp->b_rptr;
3151 	length = IPV6_HDR_LEN;
3152 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3153 	endptr = mp->b_wptr;
3154 
3155 	/*
3156 	 * We can't just use the argument nexthdr in the place
3157 	 * of nexthdrp becaue we don't dereference nexthdrp
3158 	 * till we confirm whether it is a valid address.
3159 	 */
3160 	nexthdrp = &ip6h->ip6_nxt;
3161 	while (whereptr < endptr) {
3162 		/* Is there enough left for len + nexthdr? */
3163 		if (whereptr + MIN_EHDR_LEN > endptr)
3164 			return (IPSEC_MEMORY_ERROR);
3165 
3166 		switch (*nexthdrp) {
3167 		case IPPROTO_HOPOPTS:
3168 		case IPPROTO_DSTOPTS:
3169 			/* Assumes the headers are identical for hbh and dst */
3170 			desthdr = (ip6_dest_t *)whereptr;
3171 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
3172 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
3173 				return (IPSEC_MEMORY_ERROR);
3174 			/*
3175 			 * Return DONT_PROCESS because the destination
3176 			 * options header may be for each hop in a
3177 			 * routing-header, and we only want AH if we're
3178 			 * finished with routing headers.
3179 			 */
3180 			if (*nexthdrp == IPPROTO_DSTOPTS)
3181 				return (IPSEC_HDR_DONT_PROCESS);
3182 			nexthdrp = &desthdr->ip6d_nxt;
3183 			break;
3184 		case IPPROTO_ROUTING:
3185 			rthdr = (ip6_rthdr_t *)whereptr;
3186 
3187 			/*
3188 			 * If there's more hops left on the routing header,
3189 			 * return now with DON'T PROCESS.
3190 			 */
3191 			if (rthdr->ip6r_segleft > 0)
3192 				return (IPSEC_HDR_DONT_PROCESS);
3193 
3194 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
3195 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
3196 				return (IPSEC_MEMORY_ERROR);
3197 			nexthdrp = &rthdr->ip6r_nxt;
3198 			break;
3199 		case IPPROTO_FRAGMENT:
3200 			/* Wait for reassembly */
3201 			return (IPSEC_HDR_DONT_PROCESS);
3202 		case IPPROTO_AH:
3203 			*nexthdr = IPPROTO_AH;
3204 			return (IPSEC_HDR_PROCESS);
3205 		case IPPROTO_NONE:
3206 			/* No next header means we're finished */
3207 		default:
3208 			return (IPSEC_HDR_DONT_PROCESS);
3209 		}
3210 		length += ehdrlen;
3211 		whereptr += ehdrlen;
3212 	}
3213 	/*
3214 	 * Malformed/truncated packet.
3215 	 */
3216 	return (IPSEC_MEMORY_ERROR);
3217 }
3218 
3219 /*
3220  * Path for AH if options are present.
3221  * Returns NULL if the mblk was consumed.
3222  *
3223  * Sometimes AH needs to be done before other IPv6 headers for security
3224  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3225  * indicates if that is so, and fans out to the appropriate IPsec protocol
3226  * for the datagram passed in.
3227  */
3228 mblk_t *
3229 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3230 {
3231 	uint8_t nexthdr;
3232 	ah_t *ah;
3233 	ill_t		*ill = ira->ira_ill;
3234 	ip_stack_t	*ipst = ill->ill_ipst;
3235 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3236 
3237 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3238 	case IPSEC_MEMORY_ERROR:
3239 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3240 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3241 		freemsg(mp);
3242 		return (NULL);
3243 	case IPSEC_HDR_DONT_PROCESS:
3244 		return (mp);
3245 	}
3246 
3247 	/* Default means send it to AH! */
3248 	ASSERT(nexthdr == IPPROTO_AH);
3249 
3250 	if (!ipsec_loaded(ipss)) {
3251 		ip_proto_not_sup(mp, ira);
3252 		return (NULL);
3253 	}
3254 
3255 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3256 	if (mp == NULL)
3257 		return (NULL);
3258 	ASSERT(ah != NULL);
3259 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3260 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
3261 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3262 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3263 
3264 	if (mp == NULL) {
3265 		/*
3266 		 * Either it failed or is pending. In the former case
3267 		 * ipIfStatsInDiscards was increased.
3268 		 */
3269 		return (NULL);
3270 	}
3271 
3272 	/* we're done with IPsec processing, send it up */
3273 	ip_input_post_ipsec(mp, ira);
3274 	return (NULL);
3275 }
3276 
3277 /*
3278  * Reassemble fragment.
3279  * When it returns a completed message the first mblk will only contain
3280  * the headers prior to the fragment header, with the nexthdr value updated
3281  * to be the header after the fragment header.
3282  */
3283 mblk_t *
3284 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3285     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3286 {
3287 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
3288 	uint16_t	offset;
3289 	boolean_t	more_frags;
3290 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
3291 	in6_addr_t	*v6dst_ptr;
3292 	in6_addr_t	*v6src_ptr;
3293 	uint_t		end;
3294 	uint_t		hdr_length;
3295 	size_t		count;
3296 	ipf_t		*ipf;
3297 	ipf_t		**ipfp;
3298 	ipfb_t		*ipfb;
3299 	mblk_t		*mp1;
3300 	uint8_t		ecn_info = 0;
3301 	size_t		msg_len;
3302 	mblk_t		*tail_mp;
3303 	mblk_t		*t_mp;
3304 	boolean_t	pruned = B_FALSE;
3305 	uint32_t	sum_val;
3306 	uint16_t	sum_flags;
3307 	ill_t		*ill = ira->ira_ill;
3308 	ip_stack_t	*ipst = ill->ill_ipst;
3309 	uint_t		prev_nexthdr_offset;
3310 	uint8_t		prev_nexthdr;
3311 	uint8_t		*ptr;
3312 	uint32_t	packet_size;
3313 
3314 	/*
3315 	 * We utilize hardware computed checksum info only for UDP since
3316 	 * IP fragmentation is a normal occurence for the protocol.  In
3317 	 * addition, checksum offload support for IP fragments carrying
3318 	 * UDP payload is commonly implemented across network adapters.
3319 	 */
3320 	ASSERT(ira->ira_rill != NULL);
3321 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
3322 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3323 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3324 		mblk_t *mp1 = mp->b_cont;
3325 		int32_t len;
3326 
3327 		/* Record checksum information from the packet */
3328 		sum_val = (uint32_t)DB_CKSUM16(mp);
3329 		sum_flags = DB_CKSUMFLAGS(mp);
3330 
3331 		/* fragmented payload offset from beginning of mblk */
3332 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3333 
3334 		if ((sum_flags & HCK_PARTIALCKSUM) &&
3335 		    (mp1 == NULL || mp1->b_cont == NULL) &&
3336 		    offset >= DB_CKSUMSTART(mp) &&
3337 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3338 			uint32_t adj;
3339 			/*
3340 			 * Partial checksum has been calculated by hardware
3341 			 * and attached to the packet; in addition, any
3342 			 * prepended extraneous data is even byte aligned.
3343 			 * If any such data exists, we adjust the checksum;
3344 			 * this would also handle any postpended data.
3345 			 */
3346 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3347 			    mp, mp1, len, adj);
3348 
3349 			/* One's complement subtract extraneous checksum */
3350 			if (adj >= sum_val)
3351 				sum_val = ~(adj - sum_val) & 0xFFFF;
3352 			else
3353 				sum_val -= adj;
3354 		}
3355 	} else {
3356 		sum_val = 0;
3357 		sum_flags = 0;
3358 	}
3359 
3360 	/* Clear hardware checksumming flag */
3361 	DB_CKSUMFLAGS(mp) = 0;
3362 
3363 	/*
3364 	 * Determine the offset (from the begining of the IP header)
3365 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3366 	 * this when removing the fragment header from the packet.
3367 	 * This packet consists of the IPv6 header, a potential
3368 	 * hop-by-hop options header, a potential pre-routing-header
3369 	 * destination options header, and a potential routing header.
3370 	 */
3371 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3372 	prev_nexthdr = ip6h->ip6_nxt;
3373 	ptr = (uint8_t *)&ip6h[1];
3374 
3375 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
3376 		ip6_hbh_t	*hbh_hdr;
3377 		uint_t		hdr_len;
3378 
3379 		hbh_hdr = (ip6_hbh_t *)ptr;
3380 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3381 		prev_nexthdr = hbh_hdr->ip6h_nxt;
3382 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3383 		    - (uint8_t *)ip6h;
3384 		ptr += hdr_len;
3385 	}
3386 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
3387 		ip6_dest_t	*dest_hdr;
3388 		uint_t		hdr_len;
3389 
3390 		dest_hdr = (ip6_dest_t *)ptr;
3391 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3392 		prev_nexthdr = dest_hdr->ip6d_nxt;
3393 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3394 		    - (uint8_t *)ip6h;
3395 		ptr += hdr_len;
3396 	}
3397 	if (prev_nexthdr == IPPROTO_ROUTING) {
3398 		ip6_rthdr_t	*rthdr;
3399 		uint_t		hdr_len;
3400 
3401 		rthdr = (ip6_rthdr_t *)ptr;
3402 		prev_nexthdr = rthdr->ip6r_nxt;
3403 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3404 		    - (uint8_t *)ip6h;
3405 		hdr_len = 8 * (rthdr->ip6r_len + 1);
3406 		ptr += hdr_len;
3407 	}
3408 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
3409 		/* Can't handle other headers before the fragment header */
3410 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3411 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3412 		freemsg(mp);
3413 		return (NULL);
3414 	}
3415 
3416 	/*
3417 	 * Note: Fragment offset in header is in 8-octet units.
3418 	 * Clearing least significant 3 bits not only extracts
3419 	 * it but also gets it in units of octets.
3420 	 */
3421 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3422 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3423 
3424 	/*
3425 	 * Is the more frags flag on and the payload length not a multiple
3426 	 * of eight?
3427 	 */
3428 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3429 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3430 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3431 		    (uint32_t)((char *)&ip6h->ip6_plen -
3432 		    (char *)ip6h), B_FALSE, ira);
3433 		return (NULL);
3434 	}
3435 
3436 	v6src_ptr = &ip6h->ip6_src;
3437 	v6dst_ptr = &ip6h->ip6_dst;
3438 	end = remlen;
3439 
3440 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3441 	end += offset;
3442 
3443 	/*
3444 	 * Would fragment cause reassembled packet to have a payload length
3445 	 * greater than IP_MAXPACKET - the max payload size?
3446 	 */
3447 	if (end > IP_MAXPACKET) {
3448 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3449 		ip_drop_input("Reassembled packet too large", mp, ill);
3450 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3451 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
3452 		    (char *)ip6h), B_FALSE, ira);
3453 		return (NULL);
3454 	}
3455 
3456 	/*
3457 	 * This packet just has one fragment. Reassembly not
3458 	 * needed.
3459 	 */
3460 	if (!more_frags && offset == 0) {
3461 		goto reass_done;
3462 	}
3463 
3464 	/*
3465 	 * Drop the fragmented as early as possible, if
3466 	 * we don't have resource(s) to re-assemble.
3467 	 */
3468 	if (ipst->ips_ip_reass_queue_bytes == 0) {
3469 		freemsg(mp);
3470 		return (NULL);
3471 	}
3472 
3473 	/* Record the ECN field info. */
3474 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3475 	/*
3476 	 * If this is not the first fragment, dump the unfragmentable
3477 	 * portion of the packet.
3478 	 */
3479 	if (offset)
3480 		mp->b_rptr = (uchar_t *)&fraghdr[1];
3481 
3482 	/*
3483 	 * Fragmentation reassembly.  Each ILL has a hash table for
3484 	 * queueing packets undergoing reassembly for all IPIFs
3485 	 * associated with the ILL.  The hash is based on the packet
3486 	 * IP ident field.  The ILL frag hash table was allocated
3487 	 * as a timer block at the time the ILL was created.  Whenever
3488 	 * there is anything on the reassembly queue, the timer will
3489 	 * be running.
3490 	 */
3491 	/* Handle vnic loopback of fragments */
3492 	if (mp->b_datap->db_ref > 2)
3493 		msg_len = 0;
3494 	else
3495 		msg_len = MBLKSIZE(mp);
3496 
3497 	tail_mp = mp;
3498 	while (tail_mp->b_cont != NULL) {
3499 		tail_mp = tail_mp->b_cont;
3500 		if (tail_mp->b_datap->db_ref <= 2)
3501 			msg_len += MBLKSIZE(tail_mp);
3502 	}
3503 	/*
3504 	 * If the reassembly list for this ILL will get too big
3505 	 * prune it.
3506 	 */
3507 
3508 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3509 	    ipst->ips_ip_reass_queue_bytes) {
3510 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3511 		    uint_t, ill->ill_frag_count,
3512 		    uint_t, ipst->ips_ip_reass_queue_bytes);
3513 		ill_frag_prune(ill,
3514 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3515 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
3516 		pruned = B_TRUE;
3517 	}
3518 
3519 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3520 	mutex_enter(&ipfb->ipfb_lock);
3521 
3522 	ipfp = &ipfb->ipfb_ipf;
3523 	/* Try to find an existing fragment queue for this packet. */
3524 	for (;;) {
3525 		ipf = ipfp[0];
3526 		if (ipf) {
3527 			/*
3528 			 * It has to match on ident, source address, and
3529 			 * dest address.
3530 			 */
3531 			if (ipf->ipf_ident == ident &&
3532 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3533 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3534 
3535 				/*
3536 				 * If we have received too many
3537 				 * duplicate fragments for this packet
3538 				 * free it.
3539 				 */
3540 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
3541 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
3542 					freemsg(mp);
3543 					mutex_exit(&ipfb->ipfb_lock);
3544 					return (NULL);
3545 				}
3546 
3547 				break;
3548 			}
3549 			ipfp = &ipf->ipf_hash_next;
3550 			continue;
3551 		}
3552 
3553 
3554 		/*
3555 		 * If we pruned the list, do we want to store this new
3556 		 * fragment?. We apply an optimization here based on the
3557 		 * fact that most fragments will be received in order.
3558 		 * So if the offset of this incoming fragment is zero,
3559 		 * it is the first fragment of a new packet. We will
3560 		 * keep it.  Otherwise drop the fragment, as we have
3561 		 * probably pruned the packet already (since the
3562 		 * packet cannot be found).
3563 		 */
3564 
3565 		if (pruned && offset != 0) {
3566 			mutex_exit(&ipfb->ipfb_lock);
3567 			freemsg(mp);
3568 			return (NULL);
3569 		}
3570 
3571 		/* New guy.  Allocate a frag message. */
3572 		mp1 = allocb(sizeof (*ipf), BPRI_MED);
3573 		if (!mp1) {
3574 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3575 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3576 			freemsg(mp);
3577 	partial_reass_done:
3578 			mutex_exit(&ipfb->ipfb_lock);
3579 			return (NULL);
3580 		}
3581 
3582 		if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3583 			/*
3584 			 * Too many fragmented packets in this hash bucket.
3585 			 * Free the oldest.
3586 			 */
3587 			ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3588 		}
3589 
3590 		mp1->b_cont = mp;
3591 
3592 		/* Initialize the fragment header. */
3593 		ipf = (ipf_t *)mp1->b_rptr;
3594 		ipf->ipf_mp = mp1;
3595 		ipf->ipf_ptphn = ipfp;
3596 		ipfp[0] = ipf;
3597 		ipf->ipf_hash_next = NULL;
3598 		ipf->ipf_ident = ident;
3599 		ipf->ipf_v6src = *v6src_ptr;
3600 		ipf->ipf_v6dst = *v6dst_ptr;
3601 		/* Record reassembly start time. */
3602 		ipf->ipf_timestamp = gethrestime_sec();
3603 		/* Record ipf generation and account for frag header */
3604 		ipf->ipf_gen = ill->ill_ipf_gen++;
3605 		ipf->ipf_count = MBLKSIZE(mp1);
3606 		ipf->ipf_protocol = nexthdr;
3607 		ipf->ipf_nf_hdr_len = 0;
3608 		ipf->ipf_prev_nexthdr_offset = 0;
3609 		ipf->ipf_last_frag_seen = B_FALSE;
3610 		ipf->ipf_ecn = ecn_info;
3611 		ipf->ipf_num_dups = 0;
3612 		ipfb->ipfb_frag_pkts++;
3613 		ipf->ipf_checksum = 0;
3614 		ipf->ipf_checksum_flags = 0;
3615 
3616 		/* Store checksum value in fragment header */
3617 		if (sum_flags != 0) {
3618 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3619 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3620 			ipf->ipf_checksum = sum_val;
3621 			ipf->ipf_checksum_flags = sum_flags;
3622 		}
3623 
3624 		/*
3625 		 * We handle reassembly two ways.  In the easy case,
3626 		 * where all the fragments show up in order, we do
3627 		 * minimal bookkeeping, and just clip new pieces on
3628 		 * the end.  If we ever see a hole, then we go off
3629 		 * to ip_reassemble which has to mark the pieces and
3630 		 * keep track of the number of holes, etc.  Obviously,
3631 		 * the point of having both mechanisms is so we can
3632 		 * handle the easy case as efficiently as possible.
3633 		 */
3634 		if (offset == 0) {
3635 			/* Easy case, in-order reassembly so far. */
3636 			/* Update the byte count */
3637 			ipf->ipf_count += msg_len;
3638 			ipf->ipf_tail_mp = tail_mp;
3639 			/*
3640 			 * Keep track of next expected offset in
3641 			 * ipf_end.
3642 			 */
3643 			ipf->ipf_end = end;
3644 			ipf->ipf_nf_hdr_len = hdr_length;
3645 			ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3646 		} else {
3647 			/* Hard case, hole at the beginning. */
3648 			ipf->ipf_tail_mp = NULL;
3649 			/*
3650 			 * ipf_end == 0 means that we have given up
3651 			 * on easy reassembly.
3652 			 */
3653 			ipf->ipf_end = 0;
3654 
3655 			/* Forget checksum offload from now on */
3656 			ipf->ipf_checksum_flags = 0;
3657 
3658 			/*
3659 			 * ipf_hole_cnt is set by ip_reassemble.
3660 			 * ipf_count is updated by ip_reassemble.
3661 			 * No need to check for return value here
3662 			 * as we don't expect reassembly to complete or
3663 			 * fail for the first fragment itself.
3664 			 */
3665 			(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3666 			    msg_len);
3667 		}
3668 		/* Update per ipfb and ill byte counts */
3669 		ipfb->ipfb_count += ipf->ipf_count;
3670 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3671 		atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3672 		/* If the frag timer wasn't already going, start it. */
3673 		mutex_enter(&ill->ill_lock);
3674 		ill_frag_timer_start(ill);
3675 		mutex_exit(&ill->ill_lock);
3676 		goto partial_reass_done;
3677 	}
3678 
3679 	/*
3680 	 * If the packet's flag has changed (it could be coming up
3681 	 * from an interface different than the previous, therefore
3682 	 * possibly different checksum capability), then forget about
3683 	 * any stored checksum states.  Otherwise add the value to
3684 	 * the existing one stored in the fragment header.
3685 	 */
3686 	if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3687 		sum_val += ipf->ipf_checksum;
3688 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3689 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3690 		ipf->ipf_checksum = sum_val;
3691 	} else if (ipf->ipf_checksum_flags != 0) {
3692 		/* Forget checksum offload from now on */
3693 		ipf->ipf_checksum_flags = 0;
3694 	}
3695 
3696 	/*
3697 	 * We have a new piece of a datagram which is already being
3698 	 * reassembled.  Update the ECN info if all IP fragments
3699 	 * are ECN capable.  If there is one which is not, clear
3700 	 * all the info.  If there is at least one which has CE
3701 	 * code point, IP needs to report that up to transport.
3702 	 */
3703 	if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3704 		if (ecn_info == IPH_ECN_CE)
3705 			ipf->ipf_ecn = IPH_ECN_CE;
3706 	} else {
3707 		ipf->ipf_ecn = IPH_ECN_NECT;
3708 	}
3709 
3710 	if (offset && ipf->ipf_end == offset) {
3711 		/* The new fragment fits at the end */
3712 		ipf->ipf_tail_mp->b_cont = mp;
3713 		/* Update the byte count */
3714 		ipf->ipf_count += msg_len;
3715 		/* Update per ipfb and ill byte counts */
3716 		ipfb->ipfb_count += msg_len;
3717 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3718 		atomic_add_32(&ill->ill_frag_count, msg_len);
3719 		if (more_frags) {
3720 			/* More to come. */
3721 			ipf->ipf_end = end;
3722 			ipf->ipf_tail_mp = tail_mp;
3723 			goto partial_reass_done;
3724 		}
3725 	} else {
3726 		/*
3727 		 * Go do the hard cases.
3728 		 * Call ip_reassemble().
3729 		 */
3730 		int ret;
3731 
3732 		if (offset == 0) {
3733 			if (ipf->ipf_prev_nexthdr_offset == 0) {
3734 				ipf->ipf_nf_hdr_len = hdr_length;
3735 				ipf->ipf_prev_nexthdr_offset =
3736 				    prev_nexthdr_offset;
3737 			}
3738 		}
3739 		/* Save current byte count */
3740 		count = ipf->ipf_count;
3741 		ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3742 
3743 		/* Count of bytes added and subtracted (freeb()ed) */
3744 		count = ipf->ipf_count - count;
3745 		if (count) {
3746 			/* Update per ipfb and ill byte counts */
3747 			ipfb->ipfb_count += count;
3748 			ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3749 			atomic_add_32(&ill->ill_frag_count, count);
3750 		}
3751 		if (ret == IP_REASS_PARTIAL) {
3752 			goto partial_reass_done;
3753 		} else if (ret == IP_REASS_FAILED) {
3754 			/* Reassembly failed. Free up all resources */
3755 			ill_frag_free_pkts(ill, ipfb, ipf, 1);
3756 			for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3757 				IP_REASS_SET_START(t_mp, 0);
3758 				IP_REASS_SET_END(t_mp, 0);
3759 			}
3760 			freemsg(mp);
3761 			goto partial_reass_done;
3762 		}
3763 
3764 		/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3765 	}
3766 	/*
3767 	 * We have completed reassembly.  Unhook the frag header from
3768 	 * the reassembly list.
3769 	 *
3770 	 * Grab the unfragmentable header length next header value out
3771 	 * of the first fragment
3772 	 */
3773 	ASSERT(ipf->ipf_nf_hdr_len != 0);
3774 	hdr_length = ipf->ipf_nf_hdr_len;
3775 
3776 	/*
3777 	 * Before we free the frag header, record the ECN info
3778 	 * to report back to the transport.
3779 	 */
3780 	ecn_info = ipf->ipf_ecn;
3781 
3782 	/*
3783 	 * Store the nextheader field in the header preceding the fragment
3784 	 * header
3785 	 */
3786 	nexthdr = ipf->ipf_protocol;
3787 	prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3788 	ipfp = ipf->ipf_ptphn;
3789 
3790 	/* We need to supply these to caller */
3791 	if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3792 		sum_val = ipf->ipf_checksum;
3793 	else
3794 		sum_val = 0;
3795 
3796 	mp1 = ipf->ipf_mp;
3797 	count = ipf->ipf_count;
3798 	ipf = ipf->ipf_hash_next;
3799 	if (ipf)
3800 		ipf->ipf_ptphn = ipfp;
3801 	ipfp[0] = ipf;
3802 	atomic_add_32(&ill->ill_frag_count, -count);
3803 	ASSERT(ipfb->ipfb_count >= count);
3804 	ipfb->ipfb_count -= count;
3805 	ipfb->ipfb_frag_pkts--;
3806 	mutex_exit(&ipfb->ipfb_lock);
3807 	/* Ditch the frag header. */
3808 	mp = mp1->b_cont;
3809 	freeb(mp1);
3810 
3811 	/*
3812 	 * Make sure the packet is good by doing some sanity
3813 	 * check. If bad we can silentely drop the packet.
3814 	 */
3815 reass_done:
3816 	if (hdr_length < sizeof (ip6_frag_t)) {
3817 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3818 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3819 		ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3820 		freemsg(mp);
3821 		return (NULL);
3822 	}
3823 
3824 	/*
3825 	 * Remove the fragment header from the initial header by
3826 	 * splitting the mblk into the non-fragmentable header and
3827 	 * everthing after the fragment extension header.  This has the
3828 	 * side effect of putting all the headers that need destination
3829 	 * processing into the b_cont block-- on return this fact is
3830 	 * used in order to avoid having to look at the extensions
3831 	 * already processed.
3832 	 *
3833 	 * Note that this code assumes that the unfragmentable portion
3834 	 * of the header is in the first mblk and increments
3835 	 * the read pointer past it.  If this assumption is broken
3836 	 * this code fails badly.
3837 	 */
3838 	if (mp->b_rptr + hdr_length != mp->b_wptr) {
3839 		mblk_t *nmp;
3840 
3841 		if (!(nmp = dupb(mp))) {
3842 			ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3843 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3844 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3845 			freemsg(mp);
3846 			return (NULL);
3847 		}
3848 		nmp->b_cont = mp->b_cont;
3849 		mp->b_cont = nmp;
3850 		nmp->b_rptr += hdr_length;
3851 	}
3852 	mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3853 
3854 	ip6h = (ip6_t *)mp->b_rptr;
3855 	((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3856 
3857 	/* Restore original IP length in header. */
3858 	packet_size = msgdsize(mp);
3859 	ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3860 	/* Record the ECN info. */
3861 	ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3862 	ip6h->ip6_vcf |= htonl(ecn_info << 20);
3863 
3864 	/* Update the receive attributes */
3865 	ira->ira_pktlen = packet_size;
3866 	ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3867 	ira->ira_protocol = nexthdr;
3868 
3869 	/* Reassembly is successful; set checksum information in packet */
3870 	DB_CKSUM16(mp) = (uint16_t)sum_val;
3871 	DB_CKSUMFLAGS(mp) = sum_flags;
3872 	DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3873 
3874 	return (mp);
3875 }
3876 
3877 /*
3878  * Given an mblk and a ptr, find the destination address in an IPv6 routing
3879  * header.
3880  */
3881 static in6_addr_t
3882 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3883 {
3884 	ip6_rthdr0_t *rt0;
3885 	int segleft, numaddr;
3886 	in6_addr_t *ap, rv = oldrv;
3887 
3888 	rt0 = (ip6_rthdr0_t *)whereptr;
3889 	if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3890 		DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3891 		    uint8_t *, whereptr);
3892 		return (rv);
3893 	}
3894 	segleft = rt0->ip6r0_segleft;
3895 	numaddr = rt0->ip6r0_len / 2;
3896 
3897 	if ((rt0->ip6r0_len & 0x1) ||
3898 	    (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3899 	    (segleft > rt0->ip6r0_len / 2)) {
3900 		/*
3901 		 * Corrupt packet.  Either the routing header length is odd
3902 		 * (can't happen) or mismatched compared to the packet, or the
3903 		 * number of addresses is.  Return what we can.  This will
3904 		 * only be a problem on forwarded packets that get squeezed
3905 		 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3906 		 */
3907 		DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3908 		    whereptr);
3909 		return (rv);
3910 	}
3911 
3912 	if (segleft != 0) {
3913 		ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3914 		rv = ap[numaddr - 1];
3915 	}
3916 
3917 	return (rv);
3918 }
3919 
3920 /*
3921  * Walk through the options to see if there is a routing header.
3922  * If present get the destination which is the last address of
3923  * the option.
3924  * mp needs to be provided in cases when the extension headers might span
3925  * b_cont; mp is never modified by this function.
3926  */
3927 in6_addr_t
3928 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3929 {
3930 	const mblk_t *current_mp = mp;
3931 	uint8_t nexthdr;
3932 	uint8_t *whereptr;
3933 	int ehdrlen;
3934 	in6_addr_t rv;
3935 
3936 	whereptr = (uint8_t *)ip6h;
3937 	ehdrlen = sizeof (ip6_t);
3938 
3939 	/* We assume at least the IPv6 base header is within one mblk. */
3940 	ASSERT(mp == NULL ||
3941 	    (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3942 
3943 	rv = ip6h->ip6_dst;
3944 	nexthdr = ip6h->ip6_nxt;
3945 	if (is_fragment != NULL)
3946 		*is_fragment = B_FALSE;
3947 
3948 	/*
3949 	 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3950 	 * no extension headers will be split across mblks.
3951 	 */
3952 
3953 	while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3954 	    nexthdr == IPPROTO_ROUTING) {
3955 		if (nexthdr == IPPROTO_ROUTING)
3956 			rv = pluck_out_dst(current_mp, whereptr, rv);
3957 
3958 		/*
3959 		 * All IPv6 extension headers have the next-header in byte
3960 		 * 0, and the (length - 8) in 8-byte-words.
3961 		 */
3962 		while (current_mp != NULL &&
3963 		    whereptr + ehdrlen >= current_mp->b_wptr) {
3964 			ehdrlen -= (current_mp->b_wptr - whereptr);
3965 			current_mp = current_mp->b_cont;
3966 			if (current_mp == NULL) {
3967 				/* Bad packet.  Return what we can. */
3968 				DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3969 				    mp, mblk_t *, current_mp, ip6_t *, ip6h);
3970 				goto done;
3971 			}
3972 			whereptr = current_mp->b_rptr;
3973 		}
3974 		whereptr += ehdrlen;
3975 
3976 		nexthdr = *whereptr;
3977 		ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3978 		ehdrlen = (*(whereptr + 1) + 1) * 8;
3979 	}
3980 
3981 done:
3982 	if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3983 		*is_fragment = B_TRUE;
3984 	return (rv);
3985 }
3986 
3987 /*
3988  * ip_source_routed_v6:
3989  * This function is called by redirect code (called from ip_input_v6) to
3990  * know whether this packet is source routed through this node i.e
3991  * whether this node (router) is part of the journey. This
3992  * function is called under two cases :
3993  *
3994  * case 1 : Routing header was processed by this node and
3995  *	    ip_process_rthdr replaced ip6_dst with the next hop
3996  *	    and we are forwarding the packet to the next hop.
3997  *
3998  * case 2 : Routing header was not processed by this node and we
3999  *	    are just forwarding the packet.
4000  *
4001  * For case (1) we don't want to send redirects. For case(2) we
4002  * want to send redirects.
4003  */
4004 static boolean_t
4005 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4006 {
4007 	uint8_t		nexthdr;
4008 	in6_addr_t	*addrptr;
4009 	ip6_rthdr0_t	*rthdr;
4010 	uint8_t		numaddr;
4011 	ip6_hbh_t	*hbhhdr;
4012 	uint_t		ehdrlen;
4013 	uint8_t		*byteptr;
4014 
4015 	ip2dbg(("ip_source_routed_v6\n"));
4016 	nexthdr = ip6h->ip6_nxt;
4017 	ehdrlen = IPV6_HDR_LEN;
4018 
4019 	/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4020 	while (nexthdr == IPPROTO_HOPOPTS ||
4021 	    nexthdr == IPPROTO_DSTOPTS) {
4022 		byteptr = (uint8_t *)ip6h + ehdrlen;
4023 		/*
4024 		 * Check if we have already processed
4025 		 * packets or we are just a forwarding
4026 		 * router which only pulled up msgs up
4027 		 * to IPV6HDR and  one HBH ext header
4028 		 */
4029 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4030 			ip2dbg(("ip_source_routed_v6: Extension"
4031 			    " headers not processed\n"));
4032 			return (B_FALSE);
4033 		}
4034 		hbhhdr = (ip6_hbh_t *)byteptr;
4035 		nexthdr = hbhhdr->ip6h_nxt;
4036 		ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4037 	}
4038 	switch (nexthdr) {
4039 	case IPPROTO_ROUTING:
4040 		byteptr = (uint8_t *)ip6h + ehdrlen;
4041 		/*
4042 		 * If for some reason, we haven't pulled up
4043 		 * the routing hdr data mblk, then we must
4044 		 * not have processed it at all. So for sure
4045 		 * we are not part of the source routed journey.
4046 		 */
4047 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4048 			ip2dbg(("ip_source_routed_v6: Routing"
4049 			    " header not processed\n"));
4050 			return (B_FALSE);
4051 		}
4052 		rthdr = (ip6_rthdr0_t *)byteptr;
4053 		/*
4054 		 * Either we are an intermediate router or the
4055 		 * last hop before destination and we have
4056 		 * already processed the routing header.
4057 		 * If segment_left is greater than or equal to zero,
4058 		 * then we must be the (numaddr - segleft) entry
4059 		 * of the routing header. Although ip6r0_segleft
4060 		 * is a unit8_t variable, we still check for zero
4061 		 * or greater value, if in case the data type
4062 		 * is changed someday in future.
4063 		 */
4064 		if (rthdr->ip6r0_segleft > 0 ||
4065 		    rthdr->ip6r0_segleft == 0) {
4066 			numaddr = rthdr->ip6r0_len / 2;
4067 			addrptr = (in6_addr_t *)((char *)rthdr +
4068 			    sizeof (*rthdr));
4069 			addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4070 			if (addrptr != NULL) {
4071 				if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4072 					return (B_TRUE);
4073 				ip1dbg(("ip_source_routed_v6: Not local\n"));
4074 			}
4075 		}
4076 	/* FALLTHROUGH */
4077 	default:
4078 		ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4079 		return (B_FALSE);
4080 	}
4081 }
4082 
4083 /*
4084  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4085  * We have not optimized this in terms of number of mblks
4086  * allocated. For instance, for each fragment sent we always allocate a
4087  * mblk to hold the IPv6 header and fragment header.
4088  *
4089  * Assumes that all the extension headers are contained in the first mblk
4090  * and that the fragment header has has already been added by calling
4091  * ip_fraghdr_add_v6.
4092  */
4093 int
4094 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4095     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4096     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4097 {
4098 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4099 	ip6_t		*fip6h;
4100 	mblk_t		*hmp;
4101 	mblk_t		*hmp0;
4102 	mblk_t		*dmp;
4103 	ip6_frag_t	*fraghdr;
4104 	size_t		unfragmentable_len;
4105 	size_t		mlen;
4106 	size_t		max_chunk;
4107 	uint16_t	off_flags;
4108 	uint16_t	offset = 0;
4109 	ill_t		*ill = nce->nce_ill;
4110 	uint8_t		nexthdr;
4111 	uint8_t		*ptr;
4112 	ip_stack_t	*ipst = ill->ill_ipst;
4113 	uint_t		priority = mp->b_band;
4114 	int		error = 0;
4115 
4116 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4117 	if (max_frag == 0) {
4118 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4119 		ip_drop_output("FragFails: zero max_frag", mp, ill);
4120 		freemsg(mp);
4121 		return (EINVAL);
4122 	}
4123 
4124 	/*
4125 	 * Caller should have added fraghdr_t to pkt_len, and also
4126 	 * updated ip6_plen.
4127 	 */
4128 	ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4129 	ASSERT(msgdsize(mp) == pkt_len);
4130 
4131 	/*
4132 	 * Determine the length of the unfragmentable portion of this
4133 	 * datagram.  This consists of the IPv6 header, a potential
4134 	 * hop-by-hop options header, a potential pre-routing-header
4135 	 * destination options header, and a potential routing header.
4136 	 */
4137 	nexthdr = ip6h->ip6_nxt;
4138 	ptr = (uint8_t *)&ip6h[1];
4139 
4140 	if (nexthdr == IPPROTO_HOPOPTS) {
4141 		ip6_hbh_t	*hbh_hdr;
4142 		uint_t		hdr_len;
4143 
4144 		hbh_hdr = (ip6_hbh_t *)ptr;
4145 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4146 		nexthdr = hbh_hdr->ip6h_nxt;
4147 		ptr += hdr_len;
4148 	}
4149 	if (nexthdr == IPPROTO_DSTOPTS) {
4150 		ip6_dest_t	*dest_hdr;
4151 		uint_t		hdr_len;
4152 
4153 		dest_hdr = (ip6_dest_t *)ptr;
4154 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4155 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4156 			nexthdr = dest_hdr->ip6d_nxt;
4157 			ptr += hdr_len;
4158 		}
4159 	}
4160 	if (nexthdr == IPPROTO_ROUTING) {
4161 		ip6_rthdr_t	*rthdr;
4162 		uint_t		hdr_len;
4163 
4164 		rthdr = (ip6_rthdr_t *)ptr;
4165 		nexthdr = rthdr->ip6r_nxt;
4166 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4167 		ptr += hdr_len;
4168 	}
4169 	if (nexthdr != IPPROTO_FRAGMENT) {
4170 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4171 		ip_drop_output("FragFails: bad nexthdr", mp, ill);
4172 		freemsg(mp);
4173 		return (EINVAL);
4174 	}
4175 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4176 	unfragmentable_len += sizeof (ip6_frag_t);
4177 
4178 	max_chunk = (max_frag - unfragmentable_len) & ~7;
4179 
4180 	/*
4181 	 * Allocate an mblk with enough room for the link-layer
4182 	 * header and the unfragmentable part of the datagram, which includes
4183 	 * the fragment header.  This (or a copy) will be used as the
4184 	 * first mblk for each fragment we send.
4185 	 */
4186 	hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4187 	if (hmp == NULL) {
4188 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4189 		ip_drop_output("FragFails: no hmp", mp, ill);
4190 		freemsg(mp);
4191 		return (ENOBUFS);
4192 	}
4193 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4194 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4195 
4196 	fip6h = (ip6_t *)hmp->b_rptr;
4197 	bcopy(ip6h, fip6h, unfragmentable_len);
4198 
4199 	/*
4200 	 * pkt_len is set to the total length of the fragmentable data in this
4201 	 * datagram.  For each fragment sent, we will decrement pkt_len
4202 	 * by the amount of fragmentable data sent in that fragment
4203 	 * until len reaches zero.
4204 	 */
4205 	pkt_len -= unfragmentable_len;
4206 
4207 	/*
4208 	 * Move read ptr past unfragmentable portion, we don't want this part
4209 	 * of the data in our fragments.
4210 	 */
4211 	mp->b_rptr += unfragmentable_len;
4212 	if (mp->b_rptr == mp->b_wptr) {
4213 		mblk_t *mp1 = mp->b_cont;
4214 		freeb(mp);
4215 		mp = mp1;
4216 	}
4217 
4218 	while (pkt_len != 0) {
4219 		mlen = MIN(pkt_len, max_chunk);
4220 		pkt_len -= mlen;
4221 		if (pkt_len != 0) {
4222 			/* Not last */
4223 			hmp0 = copyb(hmp);
4224 			if (hmp0 == NULL) {
4225 				BUMP_MIB(ill->ill_ip_mib,
4226 				    ipIfStatsOutFragFails);
4227 				ip_drop_output("FragFails: copyb failed",
4228 				    mp, ill);
4229 				freeb(hmp);
4230 				freemsg(mp);
4231 				ip1dbg(("ip_fragment_v6: copyb failed\n"));
4232 				return (ENOBUFS);
4233 			}
4234 			off_flags = IP6F_MORE_FRAG;
4235 		} else {
4236 			/* Last fragment */
4237 			hmp0 = hmp;
4238 			hmp = NULL;
4239 			off_flags = 0;
4240 		}
4241 		fip6h = (ip6_t *)(hmp0->b_rptr);
4242 		fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4243 		    sizeof (ip6_frag_t));
4244 
4245 		fip6h->ip6_plen = htons((uint16_t)(mlen +
4246 		    unfragmentable_len - IPV6_HDR_LEN));
4247 		/*
4248 		 * Note: Optimization alert.
4249 		 * In IPv6 (and IPv4) protocol header, Fragment Offset
4250 		 * ("offset") is 13 bits wide and in 8-octet units.
4251 		 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4252 		 * it occupies the most significant 13 bits.
4253 		 * (least significant 13 bits in IPv4).
4254 		 * We do not do any shifts here. Not shifting is same effect
4255 		 * as taking offset value in octet units, dividing by 8 and
4256 		 * then shifting 3 bits left to line it up in place in proper
4257 		 * place protocol header.
4258 		 */
4259 		fraghdr->ip6f_offlg = htons(offset) | off_flags;
4260 
4261 		if (!(dmp = ip_carve_mp(&mp, mlen))) {
4262 			/* mp has already been freed by ip_carve_mp() */
4263 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4264 			ip_drop_output("FragFails: could not carve mp",
4265 			    hmp0, ill);
4266 			if (hmp != NULL)
4267 				freeb(hmp);
4268 			freeb(hmp0);
4269 			ip1dbg(("ip_carve_mp: failed\n"));
4270 			return (ENOBUFS);
4271 		}
4272 		hmp0->b_cont = dmp;
4273 		/* Get the priority marking, if any */
4274 		hmp0->b_band = priority;
4275 
4276 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4277 
4278 		error = postfragfn(hmp0, nce, ixaflags,
4279 		    mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4280 		    ixa_cookie);
4281 		if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4282 			/* No point in sending the other fragments */
4283 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4284 			ip_drop_output("FragFails: postfragfn failed",
4285 			    hmp, ill);
4286 			freeb(hmp);
4287 			freemsg(mp);
4288 			return (error);
4289 		}
4290 		/* No need to redo state machine in loop */
4291 		ixaflags &= ~IXAF_REACH_CONF;
4292 
4293 		offset += mlen;
4294 	}
4295 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4296 	return (error);
4297 }
4298 
4299 /*
4300  * Add a fragment header to an IPv6 packet.
4301  * Assumes that all the extension headers are contained in the first mblk.
4302  *
4303  * The fragment header is inserted after an hop-by-hop options header
4304  * and after [an optional destinations header followed by] a routing header.
4305  */
4306 mblk_t *
4307 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4308 {
4309 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4310 	ip6_t		*fip6h;
4311 	mblk_t		*hmp;
4312 	ip6_frag_t	*fraghdr;
4313 	size_t		unfragmentable_len;
4314 	uint8_t		nexthdr;
4315 	uint_t		prev_nexthdr_offset;
4316 	uint8_t		*ptr;
4317 	uint_t		priority = mp->b_band;
4318 	ip_stack_t	*ipst = ixa->ixa_ipst;
4319 
4320 	/*
4321 	 * Determine the length of the unfragmentable portion of this
4322 	 * datagram.  This consists of the IPv6 header, a potential
4323 	 * hop-by-hop options header, a potential pre-routing-header
4324 	 * destination options header, and a potential routing header.
4325 	 */
4326 	nexthdr = ip6h->ip6_nxt;
4327 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4328 	ptr = (uint8_t *)&ip6h[1];
4329 
4330 	if (nexthdr == IPPROTO_HOPOPTS) {
4331 		ip6_hbh_t	*hbh_hdr;
4332 		uint_t		hdr_len;
4333 
4334 		hbh_hdr = (ip6_hbh_t *)ptr;
4335 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4336 		nexthdr = hbh_hdr->ip6h_nxt;
4337 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4338 		    - (uint8_t *)ip6h;
4339 		ptr += hdr_len;
4340 	}
4341 	if (nexthdr == IPPROTO_DSTOPTS) {
4342 		ip6_dest_t	*dest_hdr;
4343 		uint_t		hdr_len;
4344 
4345 		dest_hdr = (ip6_dest_t *)ptr;
4346 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4347 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4348 			nexthdr = dest_hdr->ip6d_nxt;
4349 			prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4350 			    - (uint8_t *)ip6h;
4351 			ptr += hdr_len;
4352 		}
4353 	}
4354 	if (nexthdr == IPPROTO_ROUTING) {
4355 		ip6_rthdr_t	*rthdr;
4356 		uint_t		hdr_len;
4357 
4358 		rthdr = (ip6_rthdr_t *)ptr;
4359 		nexthdr = rthdr->ip6r_nxt;
4360 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4361 		    - (uint8_t *)ip6h;
4362 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4363 		ptr += hdr_len;
4364 	}
4365 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4366 
4367 	/*
4368 	 * Allocate an mblk with enough room for the link-layer
4369 	 * header, the unfragmentable part of the datagram, and the
4370 	 * fragment header.
4371 	 */
4372 	hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4373 	    ipst->ips_ip_wroff_extra, mp);
4374 	if (hmp == NULL) {
4375 		ill_t *ill = ixa->ixa_nce->nce_ill;
4376 
4377 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4378 		ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4379 		freemsg(mp);
4380 		return (NULL);
4381 	}
4382 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4383 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4384 
4385 	fip6h = (ip6_t *)hmp->b_rptr;
4386 	fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4387 
4388 	bcopy(ip6h, fip6h, unfragmentable_len);
4389 	fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4390 	hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4391 
4392 	fraghdr->ip6f_nxt = nexthdr;
4393 	fraghdr->ip6f_reserved = 0;
4394 	fraghdr->ip6f_offlg = 0;
4395 	fraghdr->ip6f_ident = htonl(ident);
4396 
4397 	/* Get the priority marking, if any */
4398 	hmp->b_band = priority;
4399 
4400 	/*
4401 	 * Move read ptr past unfragmentable portion, we don't want this part
4402 	 * of the data in our fragments.
4403 	 */
4404 	mp->b_rptr += unfragmentable_len;
4405 	hmp->b_cont = mp;
4406 	return (hmp);
4407 }
4408 
4409 /*
4410  * Determine if the ill and multicast aspects of that packets
4411  * "matches" the conn.
4412  */
4413 boolean_t
4414 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4415 {
4416 	ill_t		*ill = ira->ira_rill;
4417 	zoneid_t	zoneid = ira->ira_zoneid;
4418 	uint_t		in_ifindex;
4419 	in6_addr_t	*v6dst_ptr = &ip6h->ip6_dst;
4420 	in6_addr_t	*v6src_ptr = &ip6h->ip6_src;
4421 
4422 	/*
4423 	 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4424 	 * scopeid. This is used to limit
4425 	 * unicast and multicast reception to conn_incoming_ifindex.
4426 	 * conn_wantpacket_v6 is called both for unicast and
4427 	 * multicast packets.
4428 	 */
4429 	in_ifindex = connp->conn_incoming_ifindex;
4430 
4431 	/* mpathd can bind to the under IPMP interface, which we allow */
4432 	if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4433 		if (!IS_UNDER_IPMP(ill))
4434 			return (B_FALSE);
4435 
4436 		if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4437 			return (B_FALSE);
4438 	}
4439 
4440 	if (!IPCL_ZONE_MATCH(connp, zoneid))
4441 		return (B_FALSE);
4442 
4443 	if (!(ira->ira_flags & IRAF_MULTICAST))
4444 		return (B_TRUE);
4445 
4446 	if (connp->conn_multi_router)
4447 		return (B_TRUE);
4448 
4449 	if (ira->ira_protocol == IPPROTO_RSVP)
4450 		return (B_TRUE);
4451 
4452 	return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4453 	    ira->ira_ill));
4454 }
4455 
4456 /*
4457  * pr_addr_dbg function provides the needed buffer space to call
4458  * inet_ntop() function's 3rd argument. This function should be
4459  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4460  * stack buffer space in it's own stack frame. This function uses
4461  * a buffer from it's own stack and prints the information.
4462  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4463  *
4464  * Note:    This function can call inet_ntop() once.
4465  */
4466 void
4467 pr_addr_dbg(char *fmt1, int af, const void *addr)
4468 {
4469 	char	buf[INET6_ADDRSTRLEN];
4470 
4471 	if (fmt1 == NULL) {
4472 		ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4473 		return;
4474 	}
4475 
4476 	/*
4477 	 * This does not compare debug level and just prints
4478 	 * out. Thus it is the responsibility of the caller
4479 	 * to check the appropriate debug-level before calling
4480 	 * this function.
4481 	 */
4482 	if (ip_debug > 0) {
4483 		printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4484 	}
4485 
4486 
4487 }
4488 
4489 
4490 /*
4491  * Return the length in bytes of the IPv6 headers (base header
4492  * extension headers) that will be needed based on the
4493  * ip_pkt_t structure passed by the caller.
4494  *
4495  * The returned length does not include the length of the upper level
4496  * protocol (ULP) header.
4497  */
4498 int
4499 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4500 {
4501 	int len;
4502 
4503 	len = IPV6_HDR_LEN;
4504 
4505 	/*
4506 	 * If there's a security label here, then we ignore any hop-by-hop
4507 	 * options the user may try to set.
4508 	 */
4509 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4510 		uint_t hopoptslen;
4511 		/*
4512 		 * Note that ipp_label_len_v6 is just the option - not
4513 		 * the hopopts extension header. It also needs to be padded
4514 		 * to a multiple of 8 bytes.
4515 		 */
4516 		ASSERT(ipp->ipp_label_len_v6 != 0);
4517 		hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4518 		hopoptslen = (hopoptslen + 7)/8 * 8;
4519 		len += hopoptslen;
4520 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4521 		ASSERT(ipp->ipp_hopoptslen != 0);
4522 		len += ipp->ipp_hopoptslen;
4523 	}
4524 
4525 	/*
4526 	 * En-route destination options
4527 	 * Only do them if there's a routing header as well
4528 	 */
4529 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4530 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4531 		ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4532 		len += ipp->ipp_rthdrdstoptslen;
4533 	}
4534 	if (ipp->ipp_fields & IPPF_RTHDR) {
4535 		ASSERT(ipp->ipp_rthdrlen != 0);
4536 		len += ipp->ipp_rthdrlen;
4537 	}
4538 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4539 		ASSERT(ipp->ipp_dstoptslen != 0);
4540 		len += ipp->ipp_dstoptslen;
4541 	}
4542 	return (len);
4543 }
4544 
4545 /*
4546  * All-purpose routine to build a header chain of an IPv6 header
4547  * followed by any required extension headers and a proto header.
4548  *
4549  * The caller has to set the source and destination address as well as
4550  * ip6_plen. The caller has to massage any routing header and compensate
4551  * for the ULP pseudo-header checksum due to the source route.
4552  *
4553  * The extension headers will all be fully filled in.
4554  */
4555 void
4556 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4557     uint8_t protocol, uint32_t flowinfo)
4558 {
4559 	uint8_t *nxthdr_ptr;
4560 	uint8_t *cp;
4561 	ip6_t	*ip6h = (ip6_t *)buf;
4562 
4563 	/* Initialize IPv6 header */
4564 	ip6h->ip6_vcf =
4565 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4566 	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4567 
4568 	if (ipp->ipp_fields & IPPF_TCLASS) {
4569 		/* Overrides the class part of flowinfo */
4570 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4571 		    ipp->ipp_tclass);
4572 	}
4573 
4574 	if (ipp->ipp_fields & IPPF_HOPLIMIT)
4575 		ip6h->ip6_hops = ipp->ipp_hoplimit;
4576 	else
4577 		ip6h->ip6_hops = ipp->ipp_unicast_hops;
4578 
4579 	if ((ipp->ipp_fields & IPPF_ADDR) &&
4580 	    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4581 		ip6h->ip6_src = ipp->ipp_addr;
4582 
4583 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4584 	cp = (uint8_t *)&ip6h[1];
4585 	/*
4586 	 * Here's where we have to start stringing together
4587 	 * any extension headers in the right order:
4588 	 * Hop-by-hop, destination, routing, and final destination opts.
4589 	 */
4590 	/*
4591 	 * If there's a security label here, then we ignore any hop-by-hop
4592 	 * options the user may try to set.
4593 	 */
4594 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4595 		/*
4596 		 * Hop-by-hop options with the label.
4597 		 * Note that ipp_label_v6 is just the option - not
4598 		 * the hopopts extension header. It also needs to be padded
4599 		 * to a multiple of 8 bytes.
4600 		 */
4601 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4602 		uint_t hopoptslen;
4603 		uint_t padlen;
4604 
4605 		padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4606 		hopoptslen = (padlen + 7)/8 * 8;
4607 		padlen = hopoptslen - padlen;
4608 
4609 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4610 		nxthdr_ptr = &hbh->ip6h_nxt;
4611 		hbh->ip6h_len = hopoptslen/8 - 1;
4612 		cp += sizeof (ip6_hbh_t);
4613 		bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4614 		cp += ipp->ipp_label_len_v6;
4615 
4616 		ASSERT(padlen <= 7);
4617 		switch (padlen) {
4618 		case 0:
4619 			break;
4620 		case 1:
4621 			cp[0] = IP6OPT_PAD1;
4622 			break;
4623 		default:
4624 			cp[0] = IP6OPT_PADN;
4625 			cp[1] = padlen - 2;
4626 			bzero(&cp[2], padlen - 2);
4627 			break;
4628 		}
4629 		cp += padlen;
4630 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4631 		/* Hop-by-hop options */
4632 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4633 
4634 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4635 		nxthdr_ptr = &hbh->ip6h_nxt;
4636 
4637 		bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4638 		cp += ipp->ipp_hopoptslen;
4639 	}
4640 	/*
4641 	 * En-route destination options
4642 	 * Only do them if there's a routing header as well
4643 	 */
4644 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4645 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4646 		ip6_dest_t *dst = (ip6_dest_t *)cp;
4647 
4648 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4649 		nxthdr_ptr = &dst->ip6d_nxt;
4650 
4651 		bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4652 		cp += ipp->ipp_rthdrdstoptslen;
4653 	}
4654 	/*
4655 	 * Routing header next
4656 	 */
4657 	if (ipp->ipp_fields & IPPF_RTHDR) {
4658 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4659 
4660 		*nxthdr_ptr = IPPROTO_ROUTING;
4661 		nxthdr_ptr = &rt->ip6r_nxt;
4662 
4663 		bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4664 		cp += ipp->ipp_rthdrlen;
4665 	}
4666 	/*
4667 	 * Do ultimate destination options
4668 	 */
4669 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4670 		ip6_dest_t *dest = (ip6_dest_t *)cp;
4671 
4672 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4673 		nxthdr_ptr = &dest->ip6d_nxt;
4674 
4675 		bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4676 		cp += ipp->ipp_dstoptslen;
4677 	}
4678 	/*
4679 	 * Now set the last header pointer to the proto passed in
4680 	 */
4681 	*nxthdr_ptr = protocol;
4682 	ASSERT((int)(cp - buf) == buf_len);
4683 }
4684 
4685 /*
4686  * Return a pointer to the routing header extension header
4687  * in the IPv6 header(s) chain passed in.
4688  * If none found, return NULL
4689  * Assumes that all extension headers are in same mblk as the v6 header
4690  */
4691 ip6_rthdr_t *
4692 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4693 {
4694 	ip6_dest_t	*desthdr;
4695 	ip6_frag_t	*fraghdr;
4696 	uint_t		hdrlen;
4697 	uint8_t		nexthdr;
4698 	uint8_t		*ptr = (uint8_t *)&ip6h[1];
4699 
4700 	if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4701 		return ((ip6_rthdr_t *)ptr);
4702 
4703 	/*
4704 	 * The routing header will precede all extension headers
4705 	 * other than the hop-by-hop and destination options
4706 	 * extension headers, so if we see anything other than those,
4707 	 * we're done and didn't find it.
4708 	 * We could see a destination options header alone but no
4709 	 * routing header, in which case we'll return NULL as soon as
4710 	 * we see anything after that.
4711 	 * Hop-by-hop and destination option headers are identical,
4712 	 * so we can use either one we want as a template.
4713 	 */
4714 	nexthdr = ip6h->ip6_nxt;
4715 	while (ptr < endptr) {
4716 		/* Is there enough left for len + nexthdr? */
4717 		if (ptr + MIN_EHDR_LEN > endptr)
4718 			return (NULL);
4719 
4720 		switch (nexthdr) {
4721 		case IPPROTO_HOPOPTS:
4722 		case IPPROTO_DSTOPTS:
4723 			/* Assumes the headers are identical for hbh and dst */
4724 			desthdr = (ip6_dest_t *)ptr;
4725 			hdrlen = 8 * (desthdr->ip6d_len + 1);
4726 			nexthdr = desthdr->ip6d_nxt;
4727 			break;
4728 
4729 		case IPPROTO_ROUTING:
4730 			return ((ip6_rthdr_t *)ptr);
4731 
4732 		case IPPROTO_FRAGMENT:
4733 			fraghdr = (ip6_frag_t *)ptr;
4734 			hdrlen = sizeof (ip6_frag_t);
4735 			nexthdr = fraghdr->ip6f_nxt;
4736 			break;
4737 
4738 		default:
4739 			return (NULL);
4740 		}
4741 		ptr += hdrlen;
4742 	}
4743 	return (NULL);
4744 }
4745 
4746 /*
4747  * Called for source-routed packets originating on this node.
4748  * Manipulates the original routing header by moving every entry up
4749  * one slot, placing the first entry in the v6 header's v6_dst field,
4750  * and placing the ultimate destination in the routing header's last
4751  * slot.
4752  *
4753  * Returns the checksum diference between the ultimate destination
4754  * (last hop in the routing header when the packet is sent) and
4755  * the first hop (ip6_dst when the packet is sent)
4756  */
4757 /* ARGSUSED2 */
4758 uint32_t
4759 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4760 {
4761 	uint_t		numaddr;
4762 	uint_t		i;
4763 	in6_addr_t	*addrptr;
4764 	in6_addr_t	tmp;
4765 	ip6_rthdr0_t	*rthdr = (ip6_rthdr0_t *)rth;
4766 	uint32_t	cksm;
4767 	uint32_t	addrsum = 0;
4768 	uint16_t	*ptr;
4769 
4770 	/*
4771 	 * Perform any processing needed for source routing.
4772 	 * We know that all extension headers will be in the same mblk
4773 	 * as the IPv6 header.
4774 	 */
4775 
4776 	/*
4777 	 * If no segments left in header, or the header length field is zero,
4778 	 * don't move hop addresses around;
4779 	 * Checksum difference is zero.
4780 	 */
4781 	if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4782 		return (0);
4783 
4784 	ptr = (uint16_t *)&ip6h->ip6_dst;
4785 	cksm = 0;
4786 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4787 		cksm += ptr[i];
4788 	}
4789 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4790 
4791 	/*
4792 	 * Here's where the fun begins - we have to
4793 	 * move all addresses up one spot, take the
4794 	 * first hop and make it our first ip6_dst,
4795 	 * and place the ultimate destination in the
4796 	 * newly-opened last slot.
4797 	 */
4798 	addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4799 	numaddr = rthdr->ip6r0_len / 2;
4800 	tmp = *addrptr;
4801 	for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4802 		*addrptr = addrptr[1];
4803 	}
4804 	*addrptr = ip6h->ip6_dst;
4805 	ip6h->ip6_dst = tmp;
4806 
4807 	/*
4808 	 * From the checksummed ultimate destination subtract the checksummed
4809 	 * current ip6_dst (the first hop address). Return that number.
4810 	 * (In the v4 case, the second part of this is done in each routine
4811 	 *  that calls ip_massage_options(). We do it all in this one place
4812 	 *  for v6).
4813 	 */
4814 	ptr = (uint16_t *)&ip6h->ip6_dst;
4815 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4816 		addrsum += ptr[i];
4817 	}
4818 	cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4819 	if ((int)cksm < 0)
4820 		cksm--;
4821 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4822 
4823 	return (cksm);
4824 }
4825 
4826 void
4827 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4828 {
4829 	kstat_t *ksp;
4830 
4831 	ip6_stat_t template = {
4832 		{ "ip6_udp_fannorm",	KSTAT_DATA_UINT64 },
4833 		{ "ip6_udp_fanmb",	KSTAT_DATA_UINT64 },
4834 		{ "ip6_recv_pullup",		KSTAT_DATA_UINT64 },
4835 		{ "ip6_db_ref",			KSTAT_DATA_UINT64 },
4836 		{ "ip6_notaligned",		KSTAT_DATA_UINT64 },
4837 		{ "ip6_multimblk",		KSTAT_DATA_UINT64 },
4838 		{ "ipsec_proto_ahesp",		KSTAT_DATA_UINT64 },
4839 		{ "ip6_out_sw_cksum",			KSTAT_DATA_UINT64 },
4840 		{ "ip6_out_sw_cksum_bytes",		KSTAT_DATA_UINT64 },
4841 		{ "ip6_in_sw_cksum",			KSTAT_DATA_UINT64 },
4842 		{ "ip6_tcp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4843 		{ "ip6_tcp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4844 		{ "ip6_tcp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4845 		{ "ip6_udp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4846 		{ "ip6_udp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4847 		{ "ip6_udp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4848 	};
4849 	ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4850 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4851 	    KSTAT_FLAG_VIRTUAL, stackid);
4852 
4853 	if (ksp == NULL)
4854 		return (NULL);
4855 
4856 	bcopy(&template, ip6_statisticsp, sizeof (template));
4857 	ksp->ks_data = (void *)ip6_statisticsp;
4858 	ksp->ks_private = (void *)(uintptr_t)stackid;
4859 
4860 	kstat_install(ksp);
4861 	return (ksp);
4862 }
4863 
4864 void
4865 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4866 {
4867 	if (ksp != NULL) {
4868 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4869 		kstat_delete_netstack(ksp, stackid);
4870 	}
4871 }
4872 
4873 /*
4874  * The following two functions set and get the value for the
4875  * IPV6_SRC_PREFERENCES socket option.
4876  */
4877 int
4878 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4879 {
4880 	/*
4881 	 * We only support preferences that are covered by
4882 	 * IPV6_PREFER_SRC_MASK.
4883 	 */
4884 	if (prefs & ~IPV6_PREFER_SRC_MASK)
4885 		return (EINVAL);
4886 
4887 	/*
4888 	 * Look for conflicting preferences or default preferences.  If
4889 	 * both bits of a related pair are clear, the application wants the
4890 	 * system's default value for that pair.  Both bits in a pair can't
4891 	 * be set.
4892 	 */
4893 	if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4894 		prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4895 	} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4896 	    IPV6_PREFER_SRC_MIPMASK) {
4897 		return (EINVAL);
4898 	}
4899 	if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4900 		prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4901 	} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4902 	    IPV6_PREFER_SRC_TMPMASK) {
4903 		return (EINVAL);
4904 	}
4905 	if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4906 		prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4907 	} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4908 	    IPV6_PREFER_SRC_CGAMASK) {
4909 		return (EINVAL);
4910 	}
4911 
4912 	ixa->ixa_src_preferences = prefs;
4913 	return (0);
4914 }
4915 
4916 size_t
4917 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4918 {
4919 	*val = ixa->ixa_src_preferences;
4920 	return (sizeof (ixa->ixa_src_preferences));
4921 }
4922 
4923 /*
4924  * Get the size of the IP options (including the IP headers size)
4925  * without including the AH header's size. If till_ah is B_FALSE,
4926  * and if AH header is present, dest options beyond AH header will
4927  * also be included in the returned size.
4928  */
4929 int
4930 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4931 {
4932 	ip6_t *ip6h;
4933 	uint8_t nexthdr;
4934 	uint8_t *whereptr;
4935 	ip6_hbh_t *hbhhdr;
4936 	ip6_dest_t *dsthdr;
4937 	ip6_rthdr_t *rthdr;
4938 	int ehdrlen;
4939 	int size;
4940 	ah_t *ah;
4941 
4942 	ip6h = (ip6_t *)mp->b_rptr;
4943 	size = IPV6_HDR_LEN;
4944 	nexthdr = ip6h->ip6_nxt;
4945 	whereptr = (uint8_t *)&ip6h[1];
4946 	for (;;) {
4947 		/* Assume IP has already stripped it */
4948 		ASSERT(nexthdr != IPPROTO_FRAGMENT);
4949 		switch (nexthdr) {
4950 		case IPPROTO_HOPOPTS:
4951 			hbhhdr = (ip6_hbh_t *)whereptr;
4952 			nexthdr = hbhhdr->ip6h_nxt;
4953 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4954 			break;
4955 		case IPPROTO_DSTOPTS:
4956 			dsthdr = (ip6_dest_t *)whereptr;
4957 			nexthdr = dsthdr->ip6d_nxt;
4958 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4959 			break;
4960 		case IPPROTO_ROUTING:
4961 			rthdr = (ip6_rthdr_t *)whereptr;
4962 			nexthdr = rthdr->ip6r_nxt;
4963 			ehdrlen = 8 * (rthdr->ip6r_len + 1);
4964 			break;
4965 		default :
4966 			if (till_ah) {
4967 				ASSERT(nexthdr == IPPROTO_AH);
4968 				return (size);
4969 			}
4970 			/*
4971 			 * If we don't have a AH header to traverse,
4972 			 * return now. This happens normally for
4973 			 * outbound datagrams where we have not inserted
4974 			 * the AH header.
4975 			 */
4976 			if (nexthdr != IPPROTO_AH) {
4977 				return (size);
4978 			}
4979 
4980 			/*
4981 			 * We don't include the AH header's size
4982 			 * to be symmetrical with other cases where
4983 			 * we either don't have a AH header (outbound)
4984 			 * or peek into the AH header yet (inbound and
4985 			 * not pulled up yet).
4986 			 */
4987 			ah = (ah_t *)whereptr;
4988 			nexthdr = ah->ah_nexthdr;
4989 			ehdrlen = (ah->ah_length << 2) + 8;
4990 
4991 			if (nexthdr == IPPROTO_DSTOPTS) {
4992 				if (whereptr + ehdrlen >= mp->b_wptr) {
4993 					/*
4994 					 * The destination options header
4995 					 * is not part of the first mblk.
4996 					 */
4997 					whereptr = mp->b_cont->b_rptr;
4998 				} else {
4999 					whereptr += ehdrlen;
5000 				}
5001 
5002 				dsthdr = (ip6_dest_t *)whereptr;
5003 				ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5004 				size += ehdrlen;
5005 			}
5006 			return (size);
5007 		}
5008 		whereptr += ehdrlen;
5009 		size += ehdrlen;
5010 	}
5011 }
5012 
5013 /*
5014  * Utility routine that checks if `v6srcp' is a valid address on underlying
5015  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5016  * associated with `v6srcp' on success.  NOTE: if this is not called from
5017  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5018  * group during or after this lookup.
5019  */
5020 boolean_t
5021 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5022 {
5023 	ipif_t *ipif;
5024 
5025 
5026 	ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5027 	if (ipif != NULL) {
5028 		if (ipifp != NULL)
5029 			*ipifp = ipif;
5030 		else
5031 			ipif_refrele(ipif);
5032 		return (B_TRUE);
5033 	}
5034 
5035 	if (ip_debug > 2) {
5036 		pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5037 		    "src %s\n", AF_INET6, v6srcp);
5038 	}
5039 	return (B_FALSE);
5040 }
5041