xref: /titanic_52/usr/src/uts/common/inet/ip/ip6.c (revision 269e59f9a28bf47e0f463e64fc5af4a408b73b21)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 1990 Mentat Inc.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/sysmacros.h>
31 #include <sys/strsun.h>
32 #include <sys/strlog.h>
33 #include <sys/strsubr.h>
34 #define	_SUN_TPI_VERSION	2
35 #include <sys/tihdr.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/sdt.h>
41 #include <sys/kobj.h>
42 #include <sys/zone.h>
43 #include <sys/neti.h>
44 #include <sys/hook.h>
45 
46 #include <sys/kmem.h>
47 #include <sys/systm.h>
48 #include <sys/param.h>
49 #include <sys/socket.h>
50 #include <sys/vtrace.h>
51 #include <sys/isa_defs.h>
52 #include <sys/atomic.h>
53 #include <sys/policy.h>
54 #include <sys/mac.h>
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_dl.h>
59 #include <sys/sockio.h>
60 #include <netinet/in.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/sctp.h>
64 
65 #include <inet/common.h>
66 #include <inet/mi.h>
67 #include <inet/optcom.h>
68 #include <inet/mib2.h>
69 #include <inet/nd.h>
70 #include <inet/arp.h>
71 
72 #include <inet/ip.h>
73 #include <inet/ip_impl.h>
74 #include <inet/ip6.h>
75 #include <inet/ip6_asp.h>
76 #include <inet/tcp.h>
77 #include <inet/tcp_impl.h>
78 #include <inet/udp_impl.h>
79 #include <inet/ipp_common.h>
80 
81 #include <inet/ip_multi.h>
82 #include <inet/ip_if.h>
83 #include <inet/ip_ire.h>
84 #include <inet/ip_rts.h>
85 #include <inet/ip_ndp.h>
86 #include <net/pfkeyv2.h>
87 #include <inet/sadb.h>
88 #include <inet/ipsec_impl.h>
89 #include <inet/iptun/iptun_impl.h>
90 #include <inet/sctp_ip.h>
91 #include <sys/pattr.h>
92 #include <inet/ipclassifier.h>
93 #include <inet/ipsecah.h>
94 #include <inet/rawip_impl.h>
95 #include <inet/rts_impl.h>
96 #include <sys/squeue_impl.h>
97 #include <sys/squeue.h>
98 
99 #include <sys/tsol/label.h>
100 #include <sys/tsol/tnet.h>
101 
102 /* Temporary; for CR 6451644 work-around */
103 #include <sys/ethernet.h>
104 
105 /*
106  * Naming conventions:
107  *      These rules should be judiciously applied
108  *	if there is a need to identify something as IPv6 versus IPv4
109  *	IPv6 funcions will end with _v6 in the ip module.
110  *	IPv6 funcions will end with _ipv6 in the transport modules.
111  *	IPv6 macros:
112  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
113  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
114  *		And then there are ..V4_PART_OF_V6.
115  *		The intent is that macros in the ip module end with _V6.
116  *	IPv6 global variables will start with ipv6_
117  *	IPv6 structures will start with ipv6
118  *	IPv6 defined constants should start with IPV6_
119  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
120  */
121 
122 /*
123  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
124  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
125  * from IANA. This mechanism will remain in effect until an official
126  * number is obtained.
127  */
128 uchar_t ip6opt_ls;
129 
130 const in6_addr_t ipv6_all_ones =
131 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
132 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
133 
134 #ifdef	_BIG_ENDIAN
135 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
136 #else	/* _BIG_ENDIAN */
137 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
138 #endif	/* _BIG_ENDIAN */
139 
140 #ifdef	_BIG_ENDIAN
141 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
142 #else  /* _BIG_ENDIAN */
143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
144 #endif /* _BIG_ENDIAN */
145 
146 #ifdef _BIG_ENDIAN
147 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
148 #else  /* _BIG_ENDIAN */
149 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
150 #endif /* _BIG_ENDIAN */
151 
152 #ifdef _BIG_ENDIAN
153 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
154 #else  /* _BIG_ENDIAN */
155 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
156 #endif /* _BIG_ENDIAN */
157 
158 #ifdef _BIG_ENDIAN
159 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
160 #else  /* _BIG_ENDIAN */
161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
162 #endif /* _BIG_ENDIAN */
163 
164 #ifdef _BIG_ENDIAN
165 const in6_addr_t ipv6_solicited_node_mcast =
166 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
167 #else  /* _BIG_ENDIAN */
168 const in6_addr_t ipv6_solicited_node_mcast =
169 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
170 #endif /* _BIG_ENDIAN */
171 
172 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
173 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
174 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
175     ip_recv_attr_t *);
176 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
177     ip_recv_attr_t *);
178 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
179     in6_addr_t *, ip_recv_attr_t *);
180 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
181     ip_recv_attr_t *);
182 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
183 
184 /*
185  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
186  * If the ICMP message is consumed by IP, i.e., it should not be delivered
187  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
188  * Likewise, if the ICMP error is misformed (too short, etc), then it
189  * returns NULL. The caller uses this to determine whether or not to send
190  * to raw sockets.
191  *
192  * All error messages are passed to the matching transport stream.
193  *
194  * See comment for icmp_inbound_v4() on how IPsec is handled.
195  */
196 mblk_t *
197 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
198 {
199 	icmp6_t		*icmp6;
200 	ip6_t		*ip6h;		/* Outer header */
201 	int		ip_hdr_length;	/* Outer header length */
202 	boolean_t	interested;
203 	ill_t		*ill = ira->ira_ill;
204 	ip_stack_t	*ipst = ill->ill_ipst;
205 	mblk_t		*mp_ret = NULL;
206 
207 	ip6h = (ip6_t *)mp->b_rptr;
208 
209 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
210 
211 	/* Check for Martian packets  */
212 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
213 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
214 		ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
215 		freemsg(mp);
216 		return (NULL);
217 	}
218 
219 	/* Make sure ira_l2src is set for ndp_input */
220 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
221 		ip_setl2src(mp, ira, ira->ira_rill);
222 
223 	ip_hdr_length = ira->ira_ip_hdr_length;
224 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
225 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
226 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
227 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
228 			freemsg(mp);
229 			return (NULL);
230 		}
231 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
232 		if (ip6h == NULL) {
233 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
234 			freemsg(mp);
235 			return (NULL);
236 		}
237 	}
238 
239 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
240 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
241 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
242 	    icmp6->icmp6_code));
243 
244 	/*
245 	 * We will set "interested" to "true" if we should pass a copy to
246 	 * the transport i.e., if it is an error message.
247 	 */
248 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
249 
250 	switch (icmp6->icmp6_type) {
251 	case ICMP6_DST_UNREACH:
252 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
253 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
254 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
255 		break;
256 
257 	case ICMP6_TIME_EXCEEDED:
258 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
259 		break;
260 
261 	case ICMP6_PARAM_PROB:
262 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
263 		break;
264 
265 	case ICMP6_PACKET_TOO_BIG:
266 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
267 		break;
268 
269 	case ICMP6_ECHO_REQUEST:
270 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
271 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
272 		    !ipst->ips_ipv6_resp_echo_mcast)
273 			break;
274 
275 		/*
276 		 * We must have exclusive use of the mblk to convert it to
277 		 * a response.
278 		 * If not, we copy it.
279 		 */
280 		if (mp->b_datap->db_ref > 1) {
281 			mblk_t	*mp1;
282 
283 			mp1 = copymsg(mp);
284 			if (mp1 == NULL) {
285 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
286 				ip_drop_input("ipIfStatsInDiscards - copymsg",
287 				    mp, ill);
288 				freemsg(mp);
289 				return (NULL);
290 			}
291 			freemsg(mp);
292 			mp = mp1;
293 			ip6h = (ip6_t *)mp->b_rptr;
294 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
295 		}
296 
297 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
298 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
299 		return (NULL);
300 
301 	case ICMP6_ECHO_REPLY:
302 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
303 		break;
304 
305 	case ND_ROUTER_SOLICIT:
306 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
307 		break;
308 
309 	case ND_ROUTER_ADVERT:
310 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
311 		break;
312 
313 	case ND_NEIGHBOR_SOLICIT:
314 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
315 		ndp_input(mp, ira);
316 		return (NULL);
317 
318 	case ND_NEIGHBOR_ADVERT:
319 		BUMP_MIB(ill->ill_icmp6_mib,
320 		    ipv6IfIcmpInNeighborAdvertisements);
321 		ndp_input(mp, ira);
322 		return (NULL);
323 
324 	case ND_REDIRECT:
325 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
326 
327 		if (ipst->ips_ipv6_ignore_redirect)
328 			break;
329 
330 		/* We now allow a RAW socket to receive this. */
331 		interested = B_TRUE;
332 		break;
333 
334 	/*
335 	 * The next three icmp messages will be handled by MLD.
336 	 * Pass all valid MLD packets up to any process(es)
337 	 * listening on a raw ICMP socket.
338 	 */
339 	case MLD_LISTENER_QUERY:
340 	case MLD_LISTENER_REPORT:
341 	case MLD_LISTENER_REDUCTION:
342 		mp = mld_input(mp, ira);
343 		return (mp);
344 	default:
345 		break;
346 	}
347 	/*
348 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
349 	 * if there isn't one.
350 	 */
351 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
352 		/* If there is an ICMP client and we want one too, copy it. */
353 
354 		if (!interested) {
355 			/* Caller will deliver to RAW sockets */
356 			return (mp);
357 		}
358 		mp_ret = copymsg(mp);
359 		if (mp_ret == NULL) {
360 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
361 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
362 		}
363 	} else if (!interested) {
364 		/* Neither we nor raw sockets are interested. Drop packet now */
365 		freemsg(mp);
366 		return (NULL);
367 	}
368 
369 	/*
370 	 * ICMP error or redirect packet. Make sure we have enough of
371 	 * the header and that db_ref == 1 since we might end up modifying
372 	 * the packet.
373 	 */
374 	if (mp->b_cont != NULL) {
375 		if (ip_pullup(mp, -1, ira) == NULL) {
376 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
377 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
378 			    mp, ill);
379 			freemsg(mp);
380 			return (mp_ret);
381 		}
382 	}
383 
384 	if (mp->b_datap->db_ref > 1) {
385 		mblk_t	*mp1;
386 
387 		mp1 = copymsg(mp);
388 		if (mp1 == NULL) {
389 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
390 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
391 			freemsg(mp);
392 			return (mp_ret);
393 		}
394 		freemsg(mp);
395 		mp = mp1;
396 	}
397 
398 	/*
399 	 * In case mp has changed, verify the message before any further
400 	 * processes.
401 	 */
402 	ip6h = (ip6_t *)mp->b_rptr;
403 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
404 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
405 		freemsg(mp);
406 		return (mp_ret);
407 	}
408 
409 	switch (icmp6->icmp6_type) {
410 	case ND_REDIRECT:
411 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
412 		break;
413 	case ICMP6_PACKET_TOO_BIG:
414 		/* Update DCE and adjust MTU is icmp header if needed */
415 		icmp_inbound_too_big_v6(icmp6, ira);
416 		/* FALLTHRU */
417 	default:
418 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
419 		break;
420 	}
421 
422 	return (mp_ret);
423 }
424 
425 /*
426  * Send an ICMP echo reply.
427  * The caller has already updated the payload part of the packet.
428  * We handle the ICMP checksum, IP source address selection and feed
429  * the packet into ip_output_simple.
430  */
431 static void
432 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
433     ip_recv_attr_t *ira)
434 {
435 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
436 	ill_t		*ill = ira->ira_ill;
437 	ip_stack_t	*ipst = ill->ill_ipst;
438 	ip_xmit_attr_t	ixas;
439 	in6_addr_t	origsrc;
440 
441 	/*
442 	 * Remove any extension headers (do not reverse a source route)
443 	 * and clear the flow id (keep traffic class for now).
444 	 */
445 	if (ip_hdr_length != IPV6_HDR_LEN) {
446 		int	i;
447 
448 		for (i = 0; i < IPV6_HDR_LEN; i++) {
449 			mp->b_rptr[ip_hdr_length - i - 1] =
450 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
451 		}
452 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
453 		ip6h = (ip6_t *)mp->b_rptr;
454 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
455 		i = ntohs(ip6h->ip6_plen);
456 		i -= (ip_hdr_length - IPV6_HDR_LEN);
457 		ip6h->ip6_plen = htons(i);
458 		ip_hdr_length = IPV6_HDR_LEN;
459 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
460 	}
461 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
462 
463 	/* Reverse the source and destination addresses. */
464 	origsrc = ip6h->ip6_src;
465 	ip6h->ip6_src = ip6h->ip6_dst;
466 	ip6h->ip6_dst = origsrc;
467 
468 	/* set the hop limit */
469 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
470 
471 	/*
472 	 * Prepare for checksum by putting icmp length in the icmp
473 	 * checksum field. The checksum is calculated in ip_output
474 	 */
475 	icmp6->icmp6_cksum = ip6h->ip6_plen;
476 
477 	bzero(&ixas, sizeof (ixas));
478 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
479 	ixas.ixa_zoneid = ira->ira_zoneid;
480 	ixas.ixa_cred = kcred;
481 	ixas.ixa_cpid = NOPID;
482 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
483 	ixas.ixa_ifindex = 0;
484 	ixas.ixa_ipst = ipst;
485 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
486 
487 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
488 		/*
489 		 * This packet should go out the same way as it
490 		 * came in i.e in clear, independent of the IPsec
491 		 * policy for transmitting packets.
492 		 */
493 		ixas.ixa_flags |= IXAF_NO_IPSEC;
494 	} else {
495 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
496 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
497 			/* Note: mp already consumed and ip_drop_packet done */
498 			return;
499 		}
500 	}
501 
502 	/* Was the destination (now source) link-local? Send out same group */
503 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
504 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
505 		if (IS_UNDER_IPMP(ill))
506 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
507 		else
508 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
509 	}
510 
511 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
512 		/*
513 		 * Not one or our addresses (IRE_LOCALs), thus we let
514 		 * ip_output_simple pick the source.
515 		 */
516 		ip6h->ip6_src = ipv6_all_zeros;
517 		ixas.ixa_flags |= IXAF_SET_SOURCE;
518 	}
519 
520 	/* Should we send using dce_pmtu? */
521 	if (ipst->ips_ipv6_icmp_return_pmtu)
522 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
523 
524 	(void) ip_output_simple(mp, &ixas);
525 	ixa_cleanup(&ixas);
526 
527 }
528 
529 /*
530  * Verify the ICMP messages for either for ICMP error or redirect packet.
531  * The caller should have fully pulled up the message. If it's a redirect
532  * packet, only basic checks on IP header will be done; otherwise, verify
533  * the packet by looking at the included ULP header.
534  *
535  * Called before icmp_inbound_error_fanout_v6 is called.
536  */
537 static boolean_t
538 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
539 {
540 	ill_t		*ill = ira->ira_ill;
541 	uint16_t	hdr_length;
542 	uint8_t		*nexthdrp;
543 	uint8_t		nexthdr;
544 	ip_stack_t	*ipst = ill->ill_ipst;
545 	conn_t		*connp;
546 	ip6_t		*ip6h;	/* Inner header */
547 
548 	ip6h = (ip6_t *)&icmp6[1];
549 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
550 		goto truncated;
551 
552 	if (icmp6->icmp6_type == ND_REDIRECT) {
553 		hdr_length = sizeof (nd_redirect_t);
554 	} else {
555 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
556 			goto discard_pkt;
557 		hdr_length = IPV6_HDR_LEN;
558 	}
559 
560 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
561 		goto truncated;
562 
563 	/*
564 	 * Stop here for ICMP_REDIRECT.
565 	 */
566 	if (icmp6->icmp6_type == ND_REDIRECT)
567 		return (B_TRUE);
568 
569 	/*
570 	 * ICMP errors only.
571 	 */
572 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
573 		goto discard_pkt;
574 	nexthdr = *nexthdrp;
575 
576 	/* Try to pass the ICMP message to clients who need it */
577 	switch (nexthdr) {
578 	case IPPROTO_UDP:
579 		/*
580 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
581 		 * transport header.
582 		 */
583 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
584 		    mp->b_wptr)
585 			goto truncated;
586 		break;
587 	case IPPROTO_TCP: {
588 		tcpha_t		*tcpha;
589 
590 		/*
591 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
592 		 * transport header.
593 		 */
594 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
595 		    mp->b_wptr)
596 			goto truncated;
597 
598 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
599 		/*
600 		 * With IPMP we need to match across group, which we do
601 		 * since we have the upper ill from ira_ill.
602 		 */
603 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
604 		    ill->ill_phyint->phyint_ifindex, ipst);
605 		if (connp == NULL)
606 			goto discard_pkt;
607 
608 		if ((connp->conn_verifyicmp != NULL) &&
609 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
610 			CONN_DEC_REF(connp);
611 			goto discard_pkt;
612 		}
613 		CONN_DEC_REF(connp);
614 		break;
615 	}
616 	case IPPROTO_SCTP:
617 		/*
618 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
619 		 * transport header.
620 		 */
621 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
622 		    mp->b_wptr)
623 			goto truncated;
624 		break;
625 	case IPPROTO_ESP:
626 	case IPPROTO_AH:
627 		break;
628 	case IPPROTO_ENCAP:
629 	case IPPROTO_IPV6: {
630 		/* Look for self-encapsulated packets that caused an error */
631 		ip6_t *in_ip6h;
632 
633 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
634 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
635 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
636 			goto truncated;
637 		break;
638 	}
639 	default:
640 		break;
641 	}
642 
643 	return (B_TRUE);
644 
645 discard_pkt:
646 	/* Bogus ICMP error. */
647 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
648 	return (B_FALSE);
649 
650 truncated:
651 	/* We pulled up everthing already. Must be truncated */
652 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
653 	return (B_FALSE);
654 }
655 
656 /*
657  * Process received IPv6 ICMP Packet too big.
658  * The caller is responsible for validating the packet before passing it in
659  * and also to fanout the ICMP error to any matching transport conns. Assumes
660  * the message has been fully pulled up.
661  *
662  * Before getting here, the caller has called icmp_inbound_verify_v6()
663  * that should have verified with ULP to prevent undoing the changes we're
664  * going to make to DCE. For example, TCP might have verified that the packet
665  * which generated error is in the send window.
666  *
667  * In some cases modified this MTU in the ICMP header packet; the caller
668  * should pass to the matching ULP after this returns.
669  */
670 static void
671 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
672 {
673 	uint32_t	mtu;
674 	dce_t		*dce;
675 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
676 	ip_stack_t	*ipst = ill->ill_ipst;
677 	int		old_max_frag;
678 	in6_addr_t	final_dst;
679 	ip6_t		*ip6h;	/* Inner IP header */
680 
681 	/* Caller has already pulled up everything. */
682 	ip6h = (ip6_t *)&icmp6[1];
683 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
684 
685 	/*
686 	 * For link local destinations matching simply on address is not
687 	 * sufficient. Same link local addresses for different ILL's is
688 	 * possible.
689 	 */
690 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
691 		dce = dce_lookup_and_add_v6(&final_dst,
692 		    ill->ill_phyint->phyint_ifindex, ipst);
693 	} else {
694 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
695 	}
696 	if (dce == NULL) {
697 		/* Couldn't add a unique one - ENOMEM */
698 		if (ip_debug > 2) {
699 			/* ip1dbg */
700 			pr_addr_dbg("icmp_inbound_too_big_v6:"
701 			    "no dce for dst %s\n", AF_INET6,
702 			    &final_dst);
703 		}
704 		return;
705 	}
706 
707 	mtu = ntohl(icmp6->icmp6_mtu);
708 
709 	mutex_enter(&dce->dce_lock);
710 	if (dce->dce_flags & DCEF_PMTU)
711 		old_max_frag = dce->dce_pmtu;
712 	else
713 		old_max_frag = ill->ill_mtu;
714 
715 	if (mtu < IPV6_MIN_MTU) {
716 		ip1dbg(("Received mtu less than IPv6 "
717 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
718 		mtu = IPV6_MIN_MTU;
719 		/*
720 		 * If an mtu less than IPv6 min mtu is received,
721 		 * we must include a fragment header in
722 		 * subsequent packets.
723 		 */
724 		dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
725 	} else {
726 		dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
727 	}
728 	ip1dbg(("Received mtu from router: %d\n", mtu));
729 	dce->dce_pmtu = MIN(old_max_frag, mtu);
730 
731 	/* Prepare to send the new max frag size for the ULP. */
732 	if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
733 		/*
734 		 * If we need a fragment header in every packet
735 		 * (above case or multirouting), make sure the
736 		 * ULP takes it into account when computing the
737 		 * payload size.
738 		 */
739 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
740 	} else {
741 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
742 	}
743 	/* We now have a PMTU for sure */
744 	dce->dce_flags |= DCEF_PMTU;
745 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
746 	mutex_exit(&dce->dce_lock);
747 	/*
748 	 * After dropping the lock the new value is visible to everyone.
749 	 * Then we bump the generation number so any cached values reinspect
750 	 * the dce_t.
751 	 */
752 	dce_increment_generation(dce);
753 	dce_refrele(dce);
754 }
755 
756 /*
757  * Fanout received ICMPv6 error packets to the transports.
758  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
759  *
760  * The caller must have called icmp_inbound_verify_v6.
761  */
762 void
763 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
764 {
765 	uint16_t	*up;	/* Pointer to ports in ULP header */
766 	uint32_t	ports;	/* reversed ports for fanout */
767 	ip6_t		rip6h;	/* With reversed addresses */
768 	ip6_t		*ip6h;	/* Inner IP header */
769 	uint16_t	hdr_length; /* Inner IP header length */
770 	uint8_t		*nexthdrp;
771 	uint8_t		nexthdr;
772 	tcpha_t		*tcpha;
773 	conn_t		*connp;
774 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
775 	ip_stack_t	*ipst = ill->ill_ipst;
776 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
777 
778 	/* Caller has already pulled up everything. */
779 	ip6h = (ip6_t *)&icmp6[1];
780 	ASSERT(mp->b_cont == NULL);
781 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
782 
783 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
784 		goto drop_pkt;
785 	nexthdr = *nexthdrp;
786 	ira->ira_protocol = nexthdr;
787 
788 	/*
789 	 * We need a separate IP header with the source and destination
790 	 * addresses reversed to do fanout/classification because the ip6h in
791 	 * the ICMPv6 error is in the form we sent it out.
792 	 */
793 	rip6h.ip6_src = ip6h->ip6_dst;
794 	rip6h.ip6_dst = ip6h->ip6_src;
795 	rip6h.ip6_nxt = nexthdr;
796 
797 	/* Try to pass the ICMP message to clients who need it */
798 	switch (nexthdr) {
799 	case IPPROTO_UDP: {
800 		/* Attempt to find a client stream based on port. */
801 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
802 
803 		/* Note that we send error to all matches. */
804 		ira->ira_flags |= IRAF_ICMP_ERROR;
805 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
806 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
807 		return;
808 	}
809 	case IPPROTO_TCP: {
810 		/*
811 		 * Attempt to find a client stream based on port.
812 		 * Note that we do a reverse lookup since the header is
813 		 * in the form we sent it out.
814 		 */
815 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
816 		/*
817 		 * With IPMP we need to match across group, which we do
818 		 * since we have the upper ill from ira_ill.
819 		 */
820 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
821 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
822 		if (connp == NULL) {
823 			goto drop_pkt;
824 		}
825 
826 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
827 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
828 			mp = ipsec_check_inbound_policy(mp, connp,
829 			    NULL, ip6h, ira);
830 			if (mp == NULL) {
831 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
832 				/* Note that mp is NULL */
833 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
834 				CONN_DEC_REF(connp);
835 				return;
836 			}
837 		}
838 
839 		ira->ira_flags |= IRAF_ICMP_ERROR;
840 		if (IPCL_IS_TCP(connp)) {
841 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
842 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
843 			    SQTAG_TCP6_INPUT_ICMP_ERR);
844 		} else {
845 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
846 			ill_t *rill = ira->ira_rill;
847 
848 			ira->ira_ill = ira->ira_rill = NULL;
849 			(connp->conn_recv)(connp, mp, NULL, ira);
850 			CONN_DEC_REF(connp);
851 			ira->ira_ill = ill;
852 			ira->ira_rill = rill;
853 		}
854 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
855 		return;
856 
857 	}
858 	case IPPROTO_SCTP:
859 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
860 		/* Find a SCTP client stream for this packet. */
861 		((uint16_t *)&ports)[0] = up[1];
862 		((uint16_t *)&ports)[1] = up[0];
863 
864 		ira->ira_flags |= IRAF_ICMP_ERROR;
865 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
866 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
867 		return;
868 
869 	case IPPROTO_ESP:
870 	case IPPROTO_AH:
871 		if (!ipsec_loaded(ipss)) {
872 			ip_proto_not_sup(mp, ira);
873 			return;
874 		}
875 
876 		if (nexthdr == IPPROTO_ESP)
877 			mp = ipsecesp_icmp_error(mp, ira);
878 		else
879 			mp = ipsecah_icmp_error(mp, ira);
880 		if (mp == NULL)
881 			return;
882 
883 		/* Just in case ipsec didn't preserve the NULL b_cont */
884 		if (mp->b_cont != NULL) {
885 			if (!pullupmsg(mp, -1))
886 				goto drop_pkt;
887 		}
888 
889 		/*
890 		 * If succesful, the mp has been modified to not include
891 		 * the ESP/AH header so we can fanout to the ULP's icmp
892 		 * error handler.
893 		 */
894 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
895 			goto drop_pkt;
896 
897 		ip6h = (ip6_t *)mp->b_rptr;
898 		/* Don't call hdr_length_v6() unless you have to. */
899 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
900 			hdr_length = ip_hdr_length_v6(mp, ip6h);
901 		else
902 			hdr_length = IPV6_HDR_LEN;
903 
904 		/* Verify the modified message before any further processes. */
905 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
906 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
907 			freemsg(mp);
908 			return;
909 		}
910 
911 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
912 		return;
913 
914 	case IPPROTO_IPV6: {
915 		/* Look for self-encapsulated packets that caused an error */
916 		ip6_t *in_ip6h;
917 
918 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
919 
920 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
921 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
922 			/*
923 			 * Self-encapsulated case. As in the ipv4 case,
924 			 * we need to strip the 2nd IP header. Since mp
925 			 * is already pulled-up, we can simply bcopy
926 			 * the 3rd header + data over the 2nd header.
927 			 */
928 			uint16_t unused_len;
929 
930 			/*
931 			 * Make sure we don't do recursion more than once.
932 			 */
933 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
934 			    &unused_len, &nexthdrp) ||
935 			    *nexthdrp == IPPROTO_IPV6) {
936 				goto drop_pkt;
937 			}
938 
939 			/*
940 			 * Copy the 3rd header + remaining data on top
941 			 * of the 2nd header.
942 			 */
943 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
944 
945 			/*
946 			 * Subtract length of the 2nd header.
947 			 */
948 			mp->b_wptr -= hdr_length;
949 
950 			ip6h = (ip6_t *)mp->b_rptr;
951 			/* Don't call hdr_length_v6() unless you have to. */
952 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
953 				hdr_length = ip_hdr_length_v6(mp, ip6h);
954 			else
955 				hdr_length = IPV6_HDR_LEN;
956 
957 			/*
958 			 * Verify the modified message before any further
959 			 * processes.
960 			 */
961 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
962 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
963 				freemsg(mp);
964 				return;
965 			}
966 
967 			/*
968 			 * Now recurse, and see what I _really_ should be
969 			 * doing here.
970 			 */
971 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
972 			return;
973 		}
974 		/* FALLTHRU */
975 	}
976 	case IPPROTO_ENCAP:
977 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
978 		    &rip6h.ip6_dst, ipst)) != NULL) {
979 			ira->ira_flags |= IRAF_ICMP_ERROR;
980 			connp->conn_recvicmp(connp, mp, NULL, ira);
981 			CONN_DEC_REF(connp);
982 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
983 			return;
984 		}
985 		/*
986 		 * No IP tunnel is interested, fallthrough and see
987 		 * if a raw socket will want it.
988 		 */
989 		/* FALLTHRU */
990 	default:
991 		ira->ira_flags |= IRAF_ICMP_ERROR;
992 		ASSERT(ira->ira_protocol == nexthdr);
993 		ip_fanout_proto_v6(mp, &rip6h, ira);
994 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
995 		return;
996 	}
997 	/* NOTREACHED */
998 drop_pkt:
999 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1000 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
1001 	freemsg(mp);
1002 }
1003 
1004 /*
1005  * Process received IPv6 ICMP Redirect messages.
1006  * Assumes the caller has verified that the headers are in the pulled up mblk.
1007  * Consumes mp.
1008  */
1009 /* ARGSUSED */
1010 static void
1011 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1012     ip_recv_attr_t *ira)
1013 {
1014 	ire_t		*ire, *nire;
1015 	ire_t		*prev_ire = NULL;
1016 	ire_t		*redir_ire;
1017 	in6_addr_t	*src, *dst, *gateway;
1018 	nd_opt_hdr_t	*opt;
1019 	nce_t		*nce;
1020 	int		ncec_flags = 0;
1021 	int		err = 0;
1022 	boolean_t	redirect_to_router = B_FALSE;
1023 	int		len;
1024 	int		optlen;
1025 	ill_t		*ill = ira->ira_rill;
1026 	ill_t		*rill = ira->ira_rill;
1027 	ip_stack_t	*ipst = ill->ill_ipst;
1028 
1029 	/*
1030 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1031 	 * and make it be the IPMP upper so avoid being confused by a packet
1032 	 * addressed to a unicast address on a different ill.
1033 	 */
1034 	if (IS_UNDER_IPMP(rill)) {
1035 		rill = ipmp_ill_hold_ipmp_ill(rill);
1036 		if (rill == NULL) {
1037 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1038 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1039 			    mp, ill);
1040 			freemsg(mp);
1041 			return;
1042 		}
1043 		ASSERT(rill != ira->ira_rill);
1044 	}
1045 
1046 	len = mp->b_wptr - (uchar_t *)rd;
1047 	src = &ip6h->ip6_src;
1048 	dst = &rd->nd_rd_dst;
1049 	gateway = &rd->nd_rd_target;
1050 
1051 	/* Verify if it is a valid redirect */
1052 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1053 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1054 	    (rd->nd_rd_code != 0) ||
1055 	    (len < sizeof (nd_redirect_t)) ||
1056 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
1057 	    (IN6_IS_ADDR_MULTICAST(dst))) {
1058 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1059 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1060 		goto fail_redirect;
1061 	}
1062 
1063 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1064 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1065 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1066 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1067 		    mp, ill);
1068 		goto fail_redirect;
1069 	}
1070 
1071 	optlen = len - sizeof (nd_redirect_t);
1072 	if (optlen != 0) {
1073 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1074 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1075 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1076 			    mp, ill);
1077 			goto fail_redirect;
1078 		}
1079 	}
1080 
1081 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1082 		redirect_to_router = B_TRUE;
1083 		ncec_flags |= NCE_F_ISROUTER;
1084 	} else {
1085 		gateway = dst;	/* Add nce for dst */
1086 	}
1087 
1088 
1089 	/*
1090 	 * Verify that the IP source address of the redirect is
1091 	 * the same as the current first-hop router for the specified
1092 	 * ICMP destination address.
1093 	 * Also, Make sure we had a route for the dest in question and
1094 	 * that route was pointing to the old gateway (the source of the
1095 	 * redirect packet.)
1096 	 * We do longest match and then compare ire_gateway_addr_v6 below.
1097 	 */
1098 	prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1099 	    ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1100 
1101 	/*
1102 	 * Check that
1103 	 *	the redirect was not from ourselves
1104 	 *	old gateway is still directly reachable
1105 	 */
1106 	if (prev_ire == NULL ||
1107 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1108 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1109 	    !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1110 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1111 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1112 		goto fail_redirect;
1113 	}
1114 
1115 	ASSERT(prev_ire->ire_ill != NULL);
1116 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1117 		ncec_flags |= NCE_F_NONUD;
1118 
1119 	opt = (nd_opt_hdr_t *)&rd[1];
1120 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1121 	if (opt != NULL) {
1122 		err = nce_lookup_then_add_v6(rill,
1123 		    (uchar_t *)&opt[1],		/* Link layer address */
1124 		    rill->ill_phys_addr_length,
1125 		    gateway, ncec_flags, ND_STALE, &nce);
1126 		switch (err) {
1127 		case 0:
1128 			nce_refrele(nce);
1129 			break;
1130 		case EEXIST:
1131 			/*
1132 			 * Check to see if link layer address has changed and
1133 			 * process the ncec_state accordingly.
1134 			 */
1135 			nce_process(nce->nce_common,
1136 			    (uchar_t *)&opt[1], 0, B_FALSE);
1137 			nce_refrele(nce);
1138 			break;
1139 		default:
1140 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1141 			    err));
1142 			goto fail_redirect;
1143 		}
1144 	}
1145 	if (redirect_to_router) {
1146 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1147 
1148 		/*
1149 		 * Create a Route Association.  This will allow us to remember
1150 		 * a router told us to use the particular gateway.
1151 		 */
1152 		ire = ire_create_v6(
1153 		    dst,
1154 		    &ipv6_all_ones,		/* mask */
1155 		    gateway,			/* gateway addr */
1156 		    IRE_HOST,
1157 		    prev_ire->ire_ill,
1158 		    ALL_ZONES,
1159 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1160 		    NULL,
1161 		    ipst);
1162 	} else {
1163 		ipif_t *ipif;
1164 		in6_addr_t gw;
1165 
1166 		/*
1167 		 * Just create an on link entry, i.e. interface route.
1168 		 * The gateway field is our link-local on the ill.
1169 		 */
1170 		mutex_enter(&rill->ill_lock);
1171 		for (ipif = rill->ill_ipif; ipif != NULL;
1172 		    ipif = ipif->ipif_next) {
1173 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1174 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1175 				break;
1176 		}
1177 		if (ipif == NULL) {
1178 			/* We have no link-local address! */
1179 			mutex_exit(&rill->ill_lock);
1180 			goto fail_redirect;
1181 		}
1182 		gw = ipif->ipif_v6lcl_addr;
1183 		mutex_exit(&rill->ill_lock);
1184 
1185 		ire = ire_create_v6(
1186 		    dst,				/* gateway == dst */
1187 		    &ipv6_all_ones,			/* mask */
1188 		    &gw,				/* gateway addr */
1189 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
1190 		    prev_ire->ire_ill,
1191 		    ALL_ZONES,
1192 		    (RTF_DYNAMIC | RTF_HOST),
1193 		    NULL,
1194 		    ipst);
1195 	}
1196 
1197 	if (ire == NULL)
1198 		goto fail_redirect;
1199 
1200 	nire = ire_add(ire);
1201 	/* Check if it was a duplicate entry */
1202 	if (nire != NULL && nire != ire) {
1203 		ASSERT(nire->ire_identical_ref > 1);
1204 		ire_delete(nire);
1205 		ire_refrele(nire);
1206 		nire = NULL;
1207 	}
1208 	ire = nire;
1209 	if (ire != NULL) {
1210 		ire_refrele(ire);		/* Held in ire_add */
1211 
1212 		/* tell routing sockets that we received a redirect */
1213 		ip_rts_change_v6(RTM_REDIRECT,
1214 		    &rd->nd_rd_dst,
1215 		    &rd->nd_rd_target,
1216 		    &ipv6_all_ones, 0, src,
1217 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1218 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1219 
1220 		/*
1221 		 * Delete any existing IRE_HOST type ires for this destination.
1222 		 * This together with the added IRE has the effect of
1223 		 * modifying an existing redirect.
1224 		 */
1225 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1226 		    prev_ire->ire_ill, ALL_ZONES, NULL,
1227 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1228 		    NULL);
1229 
1230 		if (redir_ire != NULL) {
1231 			if (redir_ire->ire_flags & RTF_DYNAMIC)
1232 				ire_delete(redir_ire);
1233 			ire_refrele(redir_ire);
1234 		}
1235 	}
1236 
1237 	ire_refrele(prev_ire);
1238 	prev_ire = NULL;
1239 
1240 fail_redirect:
1241 	if (prev_ire != NULL)
1242 		ire_refrele(prev_ire);
1243 	freemsg(mp);
1244 	if (rill != ira->ira_rill)
1245 		ill_refrele(rill);
1246 }
1247 
1248 /*
1249  * Build and ship an IPv6 ICMP message using the packet data in mp,
1250  * and the ICMP header pointed to by "stuff".  (May be called as
1251  * writer.)
1252  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1253  * verify that an icmp error packet can be sent.
1254  *
1255  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1256  * source address (see above function).
1257  */
1258 static void
1259 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1260     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1261 {
1262 	ip6_t		*ip6h;
1263 	in6_addr_t	v6dst;
1264 	size_t		len_needed;
1265 	size_t		msg_len;
1266 	mblk_t		*mp1;
1267 	icmp6_t		*icmp6;
1268 	in6_addr_t	v6src;
1269 	ill_t		*ill = ira->ira_ill;
1270 	ip_stack_t	*ipst = ill->ill_ipst;
1271 	ip_xmit_attr_t	ixas;
1272 
1273 	ip6h = (ip6_t *)mp->b_rptr;
1274 
1275 	bzero(&ixas, sizeof (ixas));
1276 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1277 	ixas.ixa_zoneid = ira->ira_zoneid;
1278 	ixas.ixa_ifindex = 0;
1279 	ixas.ixa_ipst = ipst;
1280 	ixas.ixa_cred = kcred;
1281 	ixas.ixa_cpid = NOPID;
1282 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
1283 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1284 
1285 	/*
1286 	 * If the source of the original packet was link-local, then
1287 	 * make sure we send on the same ill (group) as we received it on.
1288 	 */
1289 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1290 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
1291 		if (IS_UNDER_IPMP(ill))
1292 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1293 		else
1294 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1295 	}
1296 
1297 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1298 		/*
1299 		 * Apply IPsec based on how IPsec was applied to
1300 		 * the packet that had the error.
1301 		 *
1302 		 * If it was an outbound packet that caused the ICMP
1303 		 * error, then the caller will have setup the IRA
1304 		 * appropriately.
1305 		 */
1306 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1307 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1308 			/* Note: mp already consumed and ip_drop_packet done */
1309 			return;
1310 		}
1311 	} else {
1312 		/*
1313 		 * This is in clear. The icmp message we are building
1314 		 * here should go out in clear, independent of our policy.
1315 		 */
1316 		ixas.ixa_flags |= IXAF_NO_IPSEC;
1317 	}
1318 
1319 	/*
1320 	 * If the caller specified the source we use that.
1321 	 * Otherwise, if the packet was for one of our unicast addresses, make
1322 	 * sure we respond with that as the source. Otherwise
1323 	 * have ip_output_simple pick the source address.
1324 	 */
1325 	if (v6src_ptr != NULL) {
1326 		v6src = *v6src_ptr;
1327 	} else {
1328 		ire_t *ire;
1329 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1330 
1331 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1332 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1333 			match_flags |= MATCH_IRE_ILL;
1334 
1335 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1336 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1337 		    match_flags, 0, ipst, NULL);
1338 		if (ire != NULL) {
1339 			v6src = ip6h->ip6_dst;
1340 			ire_refrele(ire);
1341 		} else {
1342 			v6src = ipv6_all_zeros;
1343 			ixas.ixa_flags |= IXAF_SET_SOURCE;
1344 		}
1345 	}
1346 	v6dst = ip6h->ip6_src;
1347 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1348 	msg_len = msgdsize(mp);
1349 	if (msg_len > len_needed) {
1350 		if (!adjmsg(mp, len_needed - msg_len)) {
1351 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1352 			freemsg(mp);
1353 			return;
1354 		}
1355 		msg_len = len_needed;
1356 	}
1357 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1358 	if (mp1 == NULL) {
1359 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1360 		freemsg(mp);
1361 		return;
1362 	}
1363 	mp1->b_cont = mp;
1364 	mp = mp1;
1365 
1366 	/*
1367 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1368 	 * node generates be accepted in peace by all on-host destinations.
1369 	 * If we do NOT assume that all on-host destinations trust
1370 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1371 	 * (Look for IXAF_TRUSTED_ICMP).
1372 	 */
1373 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1374 
1375 	ip6h = (ip6_t *)mp->b_rptr;
1376 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1377 
1378 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1379 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
1380 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1381 	ip6h->ip6_dst = v6dst;
1382 	ip6h->ip6_src = v6src;
1383 	msg_len += IPV6_HDR_LEN + len;
1384 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1385 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1386 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1387 	}
1388 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1389 	icmp6 = (icmp6_t *)&ip6h[1];
1390 	bcopy(stuff, (char *)icmp6, len);
1391 	/*
1392 	 * Prepare for checksum by putting icmp length in the icmp
1393 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
1394 	 */
1395 	icmp6->icmp6_cksum = ip6h->ip6_plen;
1396 	if (icmp6->icmp6_type == ND_REDIRECT) {
1397 		ip6h->ip6_hops = IPV6_MAX_HOPS;
1398 	}
1399 
1400 	(void) ip_output_simple(mp, &ixas);
1401 	ixa_cleanup(&ixas);
1402 }
1403 
1404 /*
1405  * Update the output mib when ICMPv6 packets are sent.
1406  */
1407 void
1408 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1409 {
1410 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1411 
1412 	switch (icmp6->icmp6_type) {
1413 	case ICMP6_DST_UNREACH:
1414 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1415 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1416 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1417 		break;
1418 
1419 	case ICMP6_TIME_EXCEEDED:
1420 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1421 		break;
1422 
1423 	case ICMP6_PARAM_PROB:
1424 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1425 		break;
1426 
1427 	case ICMP6_PACKET_TOO_BIG:
1428 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1429 		break;
1430 
1431 	case ICMP6_ECHO_REQUEST:
1432 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1433 		break;
1434 
1435 	case ICMP6_ECHO_REPLY:
1436 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1437 		break;
1438 
1439 	case ND_ROUTER_SOLICIT:
1440 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1441 		break;
1442 
1443 	case ND_ROUTER_ADVERT:
1444 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1445 		break;
1446 
1447 	case ND_NEIGHBOR_SOLICIT:
1448 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1449 		break;
1450 
1451 	case ND_NEIGHBOR_ADVERT:
1452 		BUMP_MIB(ill->ill_icmp6_mib,
1453 		    ipv6IfIcmpOutNeighborAdvertisements);
1454 		break;
1455 
1456 	case ND_REDIRECT:
1457 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1458 		break;
1459 
1460 	case MLD_LISTENER_QUERY:
1461 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1462 		break;
1463 
1464 	case MLD_LISTENER_REPORT:
1465 	case MLD_V2_LISTENER_REPORT:
1466 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1467 		break;
1468 
1469 	case MLD_LISTENER_REDUCTION:
1470 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1471 		break;
1472 	}
1473 }
1474 
1475 /*
1476  * Check if it is ok to send an ICMPv6 error packet in
1477  * response to the IP packet in mp.
1478  * Free the message and return null if no
1479  * ICMP error packet should be sent.
1480  */
1481 static mblk_t *
1482 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1483 {
1484 	ill_t		*ill = ira->ira_ill;
1485 	ip_stack_t	*ipst = ill->ill_ipst;
1486 	boolean_t	llbcast;
1487 	ip6_t		*ip6h;
1488 
1489 	if (!mp)
1490 		return (NULL);
1491 
1492 	/* We view multicast and broadcast as the same.. */
1493 	llbcast = (ira->ira_flags &
1494 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1495 	ip6h = (ip6_t *)mp->b_rptr;
1496 
1497 	/* Check if source address uniquely identifies the host */
1498 
1499 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1500 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1501 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1502 		freemsg(mp);
1503 		return (NULL);
1504 	}
1505 
1506 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1507 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1508 		icmp6_t		*icmp6;
1509 
1510 		if (mp->b_wptr - mp->b_rptr < len_needed) {
1511 			if (!pullupmsg(mp, len_needed)) {
1512 				BUMP_MIB(ill->ill_icmp6_mib,
1513 				    ipv6IfIcmpInErrors);
1514 				freemsg(mp);
1515 				return (NULL);
1516 			}
1517 			ip6h = (ip6_t *)mp->b_rptr;
1518 		}
1519 		icmp6 = (icmp6_t *)&ip6h[1];
1520 		/* Explicitly do not generate errors in response to redirects */
1521 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1522 		    icmp6->icmp6_type == ND_REDIRECT) {
1523 			freemsg(mp);
1524 			return (NULL);
1525 		}
1526 	}
1527 	/*
1528 	 * Check that the destination is not multicast and that the packet
1529 	 * was not sent on link layer broadcast or multicast.  (Exception
1530 	 * is Packet too big message as per the draft - when mcast_ok is set.)
1531 	 */
1532 	if (!mcast_ok &&
1533 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1534 		freemsg(mp);
1535 		return (NULL);
1536 	}
1537 	/*
1538 	 * If this is a labeled system, then check to see if we're allowed to
1539 	 * send a response to this particular sender.  If not, then just drop.
1540 	 */
1541 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1542 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1543 		freemsg(mp);
1544 		return (NULL);
1545 	}
1546 
1547 	if (icmp_err_rate_limit(ipst)) {
1548 		/*
1549 		 * Only send ICMP error packets every so often.
1550 		 * This should be done on a per port/source basis,
1551 		 * but for now this will suffice.
1552 		 */
1553 		freemsg(mp);
1554 		return (NULL);
1555 	}
1556 	return (mp);
1557 }
1558 
1559 /*
1560  * Called when a packet was sent out the same link that it arrived on.
1561  * Check if it is ok to send a redirect and then send it.
1562  */
1563 void
1564 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1565     ip_recv_attr_t *ira)
1566 {
1567 	ill_t		*ill = ira->ira_ill;
1568 	ip_stack_t	*ipst = ill->ill_ipst;
1569 	in6_addr_t	*v6targ;
1570 	ire_t		*src_ire_v6 = NULL;
1571 	mblk_t		*mp1;
1572 	ire_t		*nhop_ire = NULL;
1573 
1574 	/*
1575 	 * Don't send a redirect when forwarding a source
1576 	 * routed packet.
1577 	 */
1578 	if (ip_source_routed_v6(ip6h, mp, ipst))
1579 		return;
1580 
1581 	if (ire->ire_type & IRE_ONLINK) {
1582 		/* Target is directly connected */
1583 		v6targ = &ip6h->ip6_dst;
1584 	} else {
1585 		/* Determine the most specific IRE used to send the packets */
1586 		nhop_ire = ire_nexthop(ire);
1587 		if (nhop_ire == NULL)
1588 			return;
1589 
1590 		/*
1591 		 * We won't send redirects to a router
1592 		 * that doesn't have a link local
1593 		 * address, but will forward.
1594 		 */
1595 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1596 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1597 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1598 			ire_refrele(nhop_ire);
1599 			return;
1600 		}
1601 		v6targ = &nhop_ire->ire_addr_v6;
1602 	}
1603 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1604 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1605 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1606 
1607 	if (src_ire_v6 == NULL) {
1608 		if (nhop_ire != NULL)
1609 			ire_refrele(nhop_ire);
1610 		return;
1611 	}
1612 
1613 	/*
1614 	 * The source is directly connected.
1615 	 */
1616 	mp1 = copymsg(mp);
1617 	if (mp1 != NULL)
1618 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1619 
1620 	if (nhop_ire != NULL)
1621 		ire_refrele(nhop_ire);
1622 	ire_refrele(src_ire_v6);
1623 }
1624 
1625 /*
1626  * Generate an ICMPv6 redirect message.
1627  * Include target link layer address option if it exits.
1628  * Always include redirect header.
1629  */
1630 static void
1631 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1632     ip_recv_attr_t *ira)
1633 {
1634 	nd_redirect_t	*rd;
1635 	nd_opt_rd_hdr_t	*rdh;
1636 	uchar_t		*buf;
1637 	ncec_t		*ncec = NULL;
1638 	nd_opt_hdr_t	*opt;
1639 	int		len;
1640 	int		ll_opt_len = 0;
1641 	int		max_redir_hdr_data_len;
1642 	int		pkt_len;
1643 	in6_addr_t	*srcp;
1644 	ill_t		*ill;
1645 	boolean_t	need_refrele;
1646 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
1647 
1648 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1649 	if (mp == NULL)
1650 		return;
1651 
1652 	if (IS_UNDER_IPMP(ira->ira_ill)) {
1653 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1654 		if (ill == NULL) {
1655 			ill = ira->ira_ill;
1656 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1657 			ip_drop_output("no IPMP ill for sending redirect",
1658 			    mp, ill);
1659 			freemsg(mp);
1660 			return;
1661 		}
1662 		need_refrele = B_TRUE;
1663 	} else {
1664 		ill = ira->ira_ill;
1665 		need_refrele = B_FALSE;
1666 	}
1667 
1668 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
1669 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1670 	    ncec->ncec_lladdr != NULL) {
1671 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
1672 		    ill->ill_phys_addr_length + 7)/8 * 8;
1673 	}
1674 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1675 	ASSERT(len % 4 == 0);
1676 	buf = kmem_alloc(len, KM_NOSLEEP);
1677 	if (buf == NULL) {
1678 		if (ncec != NULL)
1679 			ncec_refrele(ncec);
1680 		if (need_refrele)
1681 			ill_refrele(ill);
1682 		freemsg(mp);
1683 		return;
1684 	}
1685 
1686 	rd = (nd_redirect_t *)buf;
1687 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1688 	rd->nd_rd_code = 0;
1689 	rd->nd_rd_reserved = 0;
1690 	rd->nd_rd_target = *targetp;
1691 	rd->nd_rd_dst = *dest;
1692 
1693 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1694 	if (ncec != NULL && ll_opt_len != 0) {
1695 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1696 		opt->nd_opt_len = ll_opt_len/8;
1697 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
1698 		    ill->ill_phys_addr_length);
1699 	}
1700 	if (ncec != NULL)
1701 		ncec_refrele(ncec);
1702 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1703 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1704 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1705 	max_redir_hdr_data_len =
1706 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1707 	pkt_len = msgdsize(mp);
1708 	/* Make sure mp is 8 byte aligned */
1709 	if (pkt_len > max_redir_hdr_data_len) {
1710 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1711 		    sizeof (nd_opt_rd_hdr_t))/8;
1712 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1713 	} else {
1714 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1715 		(void) adjmsg(mp, -(pkt_len % 8));
1716 	}
1717 	rdh->nd_opt_rh_reserved1 = 0;
1718 	rdh->nd_opt_rh_reserved2 = 0;
1719 	/* ipif_v6lcl_addr contains the link-local source address */
1720 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1721 
1722 	/* Redirects sent by router, and router is global zone */
1723 	ASSERT(ira->ira_zoneid == ALL_ZONES);
1724 	ira->ira_zoneid = GLOBAL_ZONEID;
1725 	icmp_pkt_v6(mp, buf, len, srcp, ira);
1726 	kmem_free(buf, len);
1727 	if (need_refrele)
1728 		ill_refrele(ill);
1729 }
1730 
1731 
1732 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1733 void
1734 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1735     ip_recv_attr_t *ira)
1736 {
1737 	icmp6_t	icmp6;
1738 
1739 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1740 	if (mp == NULL)
1741 		return;
1742 
1743 	bzero(&icmp6, sizeof (icmp6_t));
1744 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1745 	icmp6.icmp6_code = code;
1746 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1747 }
1748 
1749 /*
1750  * Generate an ICMP unreachable message.
1751  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1752  * constructed by the caller.
1753  */
1754 void
1755 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1756     ip_recv_attr_t *ira)
1757 {
1758 	icmp6_t	icmp6;
1759 
1760 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1761 	if (mp == NULL)
1762 		return;
1763 
1764 	bzero(&icmp6, sizeof (icmp6_t));
1765 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
1766 	icmp6.icmp6_code = code;
1767 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1768 }
1769 
1770 /*
1771  * Generate an ICMP pkt too big message.
1772  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1773  * constructed by the caller.
1774  */
1775 void
1776 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1777     ip_recv_attr_t *ira)
1778 {
1779 	icmp6_t	icmp6;
1780 
1781 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1782 	if (mp == NULL)
1783 		return;
1784 
1785 	bzero(&icmp6, sizeof (icmp6_t));
1786 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1787 	icmp6.icmp6_code = 0;
1788 	icmp6.icmp6_mtu = htonl(mtu);
1789 
1790 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1791 }
1792 
1793 /*
1794  * Generate an ICMP parameter problem message. (May be called as writer.)
1795  * 'offset' is the offset from the beginning of the packet in error.
1796  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1797  * constructed by the caller.
1798  */
1799 static void
1800 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1801     boolean_t mcast_ok, ip_recv_attr_t *ira)
1802 {
1803 	icmp6_t	icmp6;
1804 
1805 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1806 	if (mp == NULL)
1807 		return;
1808 
1809 	bzero((char *)&icmp6, sizeof (icmp6_t));
1810 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
1811 	icmp6.icmp6_code = code;
1812 	icmp6.icmp6_pptr = htonl(offset);
1813 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1814 }
1815 
1816 void
1817 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1818     ip_recv_attr_t *ira)
1819 {
1820 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
1821 	uint16_t	hdr_length;
1822 	uint8_t		*nexthdrp;
1823 	uint32_t	offset;
1824 	ill_t		*ill = ira->ira_ill;
1825 
1826 	/* Determine the offset of the bad nexthdr value */
1827 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
1828 		/* Malformed packet */
1829 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1830 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
1831 		freemsg(mp);
1832 		return;
1833 	}
1834 
1835 	offset = nexthdrp - mp->b_rptr;
1836 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1837 	    mcast_ok, ira);
1838 }
1839 
1840 /*
1841  * Verify whether or not the IP address is a valid local address.
1842  * Could be a unicast, including one for a down interface.
1843  * If allow_mcbc then a multicast or broadcast address is also
1844  * acceptable.
1845  *
1846  * In the case of a multicast address, however, the
1847  * upper protocol is expected to reset the src address
1848  * to zero when we return IPVL_MCAST so that
1849  * no packets are emitted with multicast address as
1850  * source address.
1851  * The addresses valid for bind are:
1852  *	(1) - in6addr_any
1853  *	(2) - IP address of an UP interface
1854  *	(3) - IP address of a DOWN interface
1855  *	(4) - a multicast address. In this case
1856  *	the conn will only receive packets destined to
1857  *	the specified multicast address. Note: the
1858  *	application still has to issue an
1859  *	IPV6_JOIN_GROUP socket option.
1860  *
1861  * In all the above cases, the bound address must be valid in the current zone.
1862  * When the address is loopback or multicast, there might be many matching IREs
1863  * so bind has to look up based on the zone.
1864  */
1865 ip_laddr_t
1866 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1867     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1868 {
1869 	ire_t		*src_ire;
1870 	uint_t		match_flags;
1871 	ill_t		*ill = NULL;
1872 
1873 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1874 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1875 
1876 	match_flags = MATCH_IRE_ZONEONLY;
1877 	if (scopeid != 0) {
1878 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1879 		if (ill == NULL)
1880 			return (IPVL_BAD);
1881 		match_flags |= MATCH_IRE_ILL;
1882 	}
1883 
1884 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1885 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1886 	if (ill != NULL)
1887 		ill_refrele(ill);
1888 
1889 	/*
1890 	 * If an address other than in6addr_any is requested,
1891 	 * we verify that it is a valid address for bind
1892 	 * Note: Following code is in if-else-if form for
1893 	 * readability compared to a condition check.
1894 	 */
1895 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1896 		/*
1897 		 * (2) Bind to address of local UP interface
1898 		 */
1899 		ire_refrele(src_ire);
1900 		return (IPVL_UNICAST_UP);
1901 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1902 		/* (4) bind to multicast address. */
1903 		if (src_ire != NULL)
1904 			ire_refrele(src_ire);
1905 
1906 		/*
1907 		 * Note: caller should take IPV6_MULTICAST_IF
1908 		 * into account when selecting a real source address.
1909 		 */
1910 		if (allow_mcbc)
1911 			return (IPVL_MCAST);
1912 		else
1913 			return (IPVL_BAD);
1914 	} else {
1915 		ipif_t *ipif;
1916 
1917 		/*
1918 		 * (3) Bind to address of local DOWN interface?
1919 		 * (ipif_lookup_addr() looks up all interfaces
1920 		 * but we do not get here for UP interfaces
1921 		 * - case (2) above)
1922 		 */
1923 		if (src_ire != NULL)
1924 			ire_refrele(src_ire);
1925 
1926 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1927 		if (ipif == NULL)
1928 			return (IPVL_BAD);
1929 
1930 		/* Not a useful source? */
1931 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1932 			ipif_refrele(ipif);
1933 			return (IPVL_BAD);
1934 		}
1935 		ipif_refrele(ipif);
1936 		return (IPVL_UNICAST_DOWN);
1937 	}
1938 }
1939 
1940 /*
1941  * Verify that both the source and destination addresses are valid.  If
1942  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1943  * i.e. have no route to it.  Protocols like TCP want to verify destination
1944  * reachability, while tunnels do not.
1945  *
1946  * Determine the route, the interface, and (optionally) the source address
1947  * to use to reach a given destination.
1948  * Note that we allow connect to broadcast and multicast addresses when
1949  * IPDF_ALLOW_MCBC is set.
1950  * first_hop and dst_addr are normally the same, but if source routing
1951  * they will differ; in that case the first_hop is what we'll use for the
1952  * routing lookup but the dce and label checks will be done on dst_addr,
1953  *
1954  * If uinfo is set, then we fill in the best available information
1955  * we have for the destination. This is based on (in priority order) any
1956  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1957  * ill_mtu.
1958  *
1959  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1960  * always do the label check on dst_addr.
1961  *
1962  * Assumes that the caller has set ixa_scopeid for link-local communication.
1963  */
1964 int
1965 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1966     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1967     uint32_t flags, uint_t mac_mode)
1968 {
1969 	ire_t		*ire;
1970 	int		error = 0;
1971 	in6_addr_t	setsrc;				/* RTF_SETSRC */
1972 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
1973 	ip_stack_t	*ipst = ixa->ixa_ipst;
1974 	dce_t		*dce;
1975 	uint_t		pmtu;
1976 	uint_t		ifindex;
1977 	uint_t		generation;
1978 	nce_t		*nce;
1979 	ill_t		*ill = NULL;
1980 	boolean_t	multirt = B_FALSE;
1981 
1982 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1983 
1984 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1985 
1986 	/*
1987 	 * We never send to zero; the ULPs map it to the loopback address.
1988 	 * We can't allow it since we use zero to mean unitialized in some
1989 	 * places.
1990 	 */
1991 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1992 
1993 	if (is_system_labeled()) {
1994 		ts_label_t *tsl = NULL;
1995 
1996 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1997 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1998 		if (error != 0)
1999 			return (error);
2000 		if (tsl != NULL) {
2001 			/* Update the label */
2002 			ip_xmit_attr_replace_tsl(ixa, tsl);
2003 		}
2004 	}
2005 
2006 	setsrc = ipv6_all_zeros;
2007 	/*
2008 	 * Select a route; For IPMP interfaces, we would only select
2009 	 * a "hidden" route (i.e., going through a specific under_ill)
2010 	 * if ixa_ifindex has been specified.
2011 	 */
2012 	ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2013 	    &setsrc, &error, &multirt);
2014 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
2015 	if (error != 0)
2016 		goto bad_addr;
2017 
2018 	/*
2019 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2020 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2021 	 * Otherwise the destination needn't be reachable.
2022 	 *
2023 	 * If we match on a reject or black hole, then we've got a
2024 	 * local failure.  May as well fail out the connect() attempt,
2025 	 * since it's never going to succeed.
2026 	 */
2027 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2028 		/*
2029 		 * If we're verifying destination reachability, we always want
2030 		 * to complain here.
2031 		 *
2032 		 * If we're not verifying destination reachability but the
2033 		 * destination has a route, we still want to fail on the
2034 		 * temporary address and broadcast address tests.
2035 		 *
2036 		 * In both cases do we let the code continue so some reasonable
2037 		 * information is returned to the caller. That enables the
2038 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
2039 		 * use the generation mismatch path to check for the unreachable
2040 		 * case thereby avoiding any specific check in the main path.
2041 		 */
2042 		ASSERT(generation == IRE_GENERATION_VERIFY);
2043 		if (flags & IPDF_VERIFY_DST) {
2044 			/*
2045 			 * Set errno but continue to set up ixa_ire to be
2046 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2047 			 * That allows callers to use ip_output to get an
2048 			 * ICMP error back.
2049 			 */
2050 			if (!(ire->ire_type & IRE_HOST))
2051 				error = ENETUNREACH;
2052 			else
2053 				error = EHOSTUNREACH;
2054 		}
2055 	}
2056 
2057 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2058 	    !(flags & IPDF_ALLOW_MCBC)) {
2059 		ire_refrele(ire);
2060 		ire = ire_reject(ipst, B_FALSE);
2061 		generation = IRE_GENERATION_VERIFY;
2062 		error = ENETUNREACH;
2063 	}
2064 
2065 	/* Cache things */
2066 	if (ixa->ixa_ire != NULL)
2067 		ire_refrele_notr(ixa->ixa_ire);
2068 #ifdef DEBUG
2069 	ire_refhold_notr(ire);
2070 	ire_refrele(ire);
2071 #endif
2072 	ixa->ixa_ire = ire;
2073 	ixa->ixa_ire_generation = generation;
2074 
2075 	/*
2076 	 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2077 	 * since some callers will send a packet to conn_ip_output() even if
2078 	 * there's an error.
2079 	 */
2080 	ifindex = 0;
2081 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2082 		/* If we are creating a DCE we'd better have an ifindex */
2083 		if (ill != NULL)
2084 			ifindex = ill->ill_phyint->phyint_ifindex;
2085 		else
2086 			flags &= ~IPDF_UNIQUE_DCE;
2087 	}
2088 
2089 	if (flags & IPDF_UNIQUE_DCE) {
2090 		/* Fallback to the default dce if allocation fails */
2091 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2092 		if (dce != NULL) {
2093 			generation = dce->dce_generation;
2094 		} else {
2095 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2096 			    &generation);
2097 		}
2098 	} else {
2099 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2100 	}
2101 	ASSERT(dce != NULL);
2102 	if (ixa->ixa_dce != NULL)
2103 		dce_refrele_notr(ixa->ixa_dce);
2104 #ifdef DEBUG
2105 	dce_refhold_notr(dce);
2106 	dce_refrele(dce);
2107 #endif
2108 	ixa->ixa_dce = dce;
2109 	ixa->ixa_dce_generation = generation;
2110 
2111 
2112 	/*
2113 	 * For multicast with multirt we have a flag passed back from
2114 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2115 	 * possible multicast address.
2116 	 * We also need a flag for multicast since we can't check
2117 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2118 	 */
2119 	if (multirt) {
2120 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2121 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2122 	} else {
2123 		ixa->ixa_postfragfn = ire->ire_postfragfn;
2124 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2125 	}
2126 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2127 		/* Get an nce to cache. */
2128 		nce = ire_to_nce(ire, NULL, firsthop);
2129 		if (nce == NULL) {
2130 			/* Allocation failure? */
2131 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2132 		} else {
2133 			if (ixa->ixa_nce != NULL)
2134 				nce_refrele(ixa->ixa_nce);
2135 			ixa->ixa_nce = nce;
2136 		}
2137 	}
2138 
2139 	/*
2140 	 * If the source address is a loopback address, the
2141 	 * destination had best be local or multicast.
2142 	 * If we are sending to an IRE_LOCAL using a loopback source then
2143 	 * it had better be the same zoneid.
2144 	 */
2145 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2146 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2147 			ire = NULL;	/* Stored in ixa_ire */
2148 			error = EADDRNOTAVAIL;
2149 			goto bad_addr;
2150 		}
2151 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2152 			ire = NULL;	/* Stored in ixa_ire */
2153 			error = EADDRNOTAVAIL;
2154 			goto bad_addr;
2155 		}
2156 	}
2157 
2158 	/*
2159 	 * Does the caller want us to pick a source address?
2160 	 */
2161 	if (flags & IPDF_SELECT_SRC) {
2162 		in6_addr_t	src_addr;
2163 
2164 		/*
2165 		 * We use use ire_nexthop_ill to avoid the under ipmp
2166 		 * interface for source address selection. Note that for ipmp
2167 		 * probe packets, ixa_ifindex would have been specified, and
2168 		 * the ip_select_route() invocation would have picked an ire
2169 		 * will ire_ill pointing at an under interface.
2170 		 */
2171 		ill = ire_nexthop_ill(ire);
2172 
2173 		/* If unreachable we have no ill but need some source */
2174 		if (ill == NULL) {
2175 			src_addr = ipv6_loopback;
2176 			/* Make sure we look for a better source address */
2177 			generation = SRC_GENERATION_VERIFY;
2178 		} else {
2179 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
2180 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2181 			    &src_addr, &generation, NULL);
2182 			if (error != 0) {
2183 				ire = NULL;	/* Stored in ixa_ire */
2184 				goto bad_addr;
2185 			}
2186 		}
2187 
2188 		/*
2189 		 * We allow the source address to to down.
2190 		 * However, we check that we don't use the loopback address
2191 		 * as a source when sending out on the wire.
2192 		 */
2193 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2194 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2195 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2196 			ire = NULL;	/* Stored in ixa_ire */
2197 			error = EADDRNOTAVAIL;
2198 			goto bad_addr;
2199 		}
2200 
2201 		*src_addrp = src_addr;
2202 		ixa->ixa_src_generation = generation;
2203 	}
2204 
2205 	/*
2206 	 * Make sure we don't leave an unreachable ixa_nce in place
2207 	 * since ip_select_route is used when we unplumb i.e., remove
2208 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2209 	 */
2210 	nce = ixa->ixa_nce;
2211 	if (nce != NULL && nce->nce_is_condemned) {
2212 		nce_refrele(nce);
2213 		ixa->ixa_nce = NULL;
2214 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2215 	}
2216 
2217 	/*
2218 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2219 	 * multicast. But pmtu discovery is only enabled for connected
2220 	 * sockets in general.
2221 	 */
2222 
2223 	/*
2224 	 * Set initial value for fragmentation limit.  Either conn_ip_output
2225 	 * or ULP might updates it when there are routing changes.
2226 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2227 	 */
2228 	pmtu = ip_get_pmtu(ixa);
2229 	ixa->ixa_fragsize = pmtu;
2230 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
2231 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2232 		ixa->ixa_pmtu = pmtu;
2233 
2234 	/*
2235 	 * Extract information useful for some transports.
2236 	 * First we look for DCE metrics. Then we take what we have in
2237 	 * the metrics in the route, where the offlink is used if we have
2238 	 * one.
2239 	 */
2240 	if (uinfo != NULL) {
2241 		bzero(uinfo, sizeof (*uinfo));
2242 
2243 		if (dce->dce_flags & DCEF_UINFO)
2244 			*uinfo = dce->dce_uinfo;
2245 
2246 		rts_merge_metrics(uinfo, &ire->ire_metrics);
2247 
2248 		/* Allow ire_metrics to decrease the path MTU from above */
2249 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2250 			uinfo->iulp_mtu = pmtu;
2251 
2252 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2253 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2254 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2255 	}
2256 
2257 	if (ill != NULL)
2258 		ill_refrele(ill);
2259 
2260 	return (error);
2261 
2262 bad_addr:
2263 	if (ire != NULL)
2264 		ire_refrele(ire);
2265 
2266 	if (ill != NULL)
2267 		ill_refrele(ill);
2268 
2269 	/*
2270 	 * Make sure we don't leave an unreachable ixa_nce in place
2271 	 * since ip_select_route is used when we unplumb i.e., remove
2272 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2273 	 */
2274 	nce = ixa->ixa_nce;
2275 	if (nce != NULL && nce->nce_is_condemned) {
2276 		nce_refrele(nce);
2277 		ixa->ixa_nce = NULL;
2278 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2279 	}
2280 
2281 	return (error);
2282 }
2283 
2284 /*
2285  * Handle protocols with which IP is less intimate.  There
2286  * can be more than one stream bound to a particular
2287  * protocol.  When this is the case, normally each one gets a copy
2288  * of any incoming packets.
2289  *
2290  * Zones notes:
2291  * Packets will be distributed to conns in all zones. This is really only
2292  * useful for ICMPv6 as only applications in the global zone can create raw
2293  * sockets for other protocols.
2294  */
2295 void
2296 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2297 {
2298 	mblk_t		*mp1;
2299 	in6_addr_t	laddr = ip6h->ip6_dst;
2300 	conn_t		*connp, *first_connp, *next_connp;
2301 	connf_t		*connfp;
2302 	ill_t		*ill = ira->ira_ill;
2303 	ip_stack_t	*ipst = ill->ill_ipst;
2304 
2305 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2306 	mutex_enter(&connfp->connf_lock);
2307 	connp = connfp->connf_head;
2308 	for (connp = connfp->connf_head; connp != NULL;
2309 	    connp = connp->conn_next) {
2310 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2311 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2312 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2313 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2314 			break;
2315 	}
2316 
2317 	if (connp == NULL) {
2318 		/*
2319 		 * No one bound to this port.  Is
2320 		 * there a client that wants all
2321 		 * unclaimed datagrams?
2322 		 */
2323 		mutex_exit(&connfp->connf_lock);
2324 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2325 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
2326 		return;
2327 	}
2328 
2329 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2330 
2331 	CONN_INC_REF(connp);
2332 	first_connp = connp;
2333 
2334 	/*
2335 	 * XXX: Fix the multiple protocol listeners case. We should not
2336 	 * be walking the conn->conn_next list here.
2337 	 */
2338 	connp = connp->conn_next;
2339 	for (;;) {
2340 		while (connp != NULL) {
2341 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2342 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2343 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2344 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2345 			    ira, connp)))
2346 				break;
2347 			connp = connp->conn_next;
2348 		}
2349 
2350 		if (connp == NULL) {
2351 			/* No more interested clients */
2352 			connp = first_connp;
2353 			break;
2354 		}
2355 		if (((mp1 = dupmsg(mp)) == NULL) &&
2356 		    ((mp1 = copymsg(mp)) == NULL)) {
2357 			/* Memory allocation failed */
2358 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2359 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
2360 			connp = first_connp;
2361 			break;
2362 		}
2363 
2364 		CONN_INC_REF(connp);
2365 		mutex_exit(&connfp->connf_lock);
2366 
2367 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2368 		    ira);
2369 
2370 		mutex_enter(&connfp->connf_lock);
2371 		/* Follow the next pointer before releasing the conn. */
2372 		next_connp = connp->conn_next;
2373 		CONN_DEC_REF(connp);
2374 		connp = next_connp;
2375 	}
2376 
2377 	/* Last one.  Send it upstream. */
2378 	mutex_exit(&connfp->connf_lock);
2379 
2380 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2381 
2382 	CONN_DEC_REF(connp);
2383 }
2384 
2385 /*
2386  * Called when it is conceptually a ULP that would sent the packet
2387  * e.g., port unreachable and nexthdr unknown. Check that the packet
2388  * would have passed the IPsec global policy before sending the error.
2389  *
2390  * Send an ICMP error after patching up the packet appropriately.
2391  * Uses ip_drop_input and bumps the appropriate MIB.
2392  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2393  */
2394 void
2395 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2396     ip_recv_attr_t *ira)
2397 {
2398 	ip6_t		*ip6h;
2399 	boolean_t	secure;
2400 	ill_t		*ill = ira->ira_ill;
2401 	ip_stack_t	*ipst = ill->ill_ipst;
2402 	netstack_t	*ns = ipst->ips_netstack;
2403 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2404 
2405 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2406 
2407 	/*
2408 	 * We are generating an icmp error for some inbound packet.
2409 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
2410 	 * Before we generate an error, check with global policy
2411 	 * to see whether this is allowed to enter the system. As
2412 	 * there is no "conn", we are checking with global policy.
2413 	 */
2414 	ip6h = (ip6_t *)mp->b_rptr;
2415 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
2416 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2417 		if (mp == NULL)
2418 			return;
2419 	}
2420 
2421 	/* We never send errors for protocols that we do implement */
2422 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
2423 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2424 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2425 		freemsg(mp);
2426 		return;
2427 	}
2428 
2429 	switch (icmp_type) {
2430 	case ICMP6_DST_UNREACH:
2431 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2432 
2433 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2434 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
2435 
2436 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2437 		break;
2438 	case ICMP6_PARAM_PROB:
2439 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2440 
2441 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2442 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2443 
2444 		/* Let the system determine the offset for this one */
2445 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2446 		break;
2447 	default:
2448 #ifdef DEBUG
2449 		panic("ip_fanout_send_icmp_v6: wrong type");
2450 		/*NOTREACHED*/
2451 #else
2452 		freemsg(mp);
2453 		break;
2454 #endif
2455 	}
2456 }
2457 
2458 /*
2459  * Fanout for UDP packets that are multicast or ICMP errors.
2460  * (Unicast fanout is handled in ip_input_v6.)
2461  *
2462  * If SO_REUSEADDR is set all multicast packets
2463  * will be delivered to all conns bound to the same port.
2464  *
2465  * Fanout for UDP packets.
2466  * The caller puts <fport, lport> in the ports parameter.
2467  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2468  *
2469  * If SO_REUSEADDR is set all multicast and broadcast packets
2470  * will be delivered to all conns bound to the same port.
2471  *
2472  * Zones notes:
2473  * Earlier in ip_input on a system with multiple shared-IP zones we
2474  * duplicate the multicast and broadcast packets and send them up
2475  * with each explicit zoneid that exists on that ill.
2476  * This means that here we can match the zoneid with SO_ALLZONES being special.
2477  */
2478 void
2479 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2480     ip_recv_attr_t *ira)
2481 {
2482 	in6_addr_t	laddr;
2483 	conn_t		*connp;
2484 	connf_t		*connfp;
2485 	in6_addr_t	faddr;
2486 	ill_t		*ill = ira->ira_ill;
2487 	ip_stack_t	*ipst = ill->ill_ipst;
2488 
2489 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2490 
2491 	laddr = ip6h->ip6_dst;
2492 	faddr = ip6h->ip6_src;
2493 
2494 	/* Attempt to find a client stream based on destination port. */
2495 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2496 	mutex_enter(&connfp->connf_lock);
2497 	connp = connfp->connf_head;
2498 	while (connp != NULL) {
2499 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2500 		    conn_wantpacket_v6(connp, ira, ip6h) &&
2501 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2502 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2503 			break;
2504 		connp = connp->conn_next;
2505 	}
2506 
2507 	if (connp == NULL)
2508 		goto notfound;
2509 
2510 	CONN_INC_REF(connp);
2511 
2512 	if (connp->conn_reuseaddr) {
2513 		conn_t		*first_connp = connp;
2514 		conn_t		*next_connp;
2515 		mblk_t		*mp1;
2516 
2517 		connp = connp->conn_next;
2518 		for (;;) {
2519 			while (connp != NULL) {
2520 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2521 				    fport, faddr) &&
2522 				    conn_wantpacket_v6(connp, ira, ip6h) &&
2523 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2524 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2525 				    ira, connp)))
2526 					break;
2527 				connp = connp->conn_next;
2528 			}
2529 			if (connp == NULL) {
2530 				/* No more interested clients */
2531 				connp = first_connp;
2532 				break;
2533 			}
2534 			if (((mp1 = dupmsg(mp)) == NULL) &&
2535 			    ((mp1 = copymsg(mp)) == NULL)) {
2536 				/* Memory allocation failed */
2537 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2538 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
2539 				connp = first_connp;
2540 				break;
2541 			}
2542 
2543 			CONN_INC_REF(connp);
2544 			mutex_exit(&connfp->connf_lock);
2545 
2546 			IP6_STAT(ipst, ip6_udp_fanmb);
2547 			ip_fanout_udp_conn(connp, mp1, NULL,
2548 			    (ip6_t *)mp1->b_rptr, ira);
2549 
2550 			mutex_enter(&connfp->connf_lock);
2551 			/* Follow the next pointer before releasing the conn. */
2552 			next_connp = connp->conn_next;
2553 			IP6_STAT(ipst, ip6_udp_fanmb);
2554 			CONN_DEC_REF(connp);
2555 			connp = next_connp;
2556 		}
2557 	}
2558 
2559 	/* Last one.  Send it upstream. */
2560 	mutex_exit(&connfp->connf_lock);
2561 
2562 	IP6_STAT(ipst, ip6_udp_fanmb);
2563 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2564 	CONN_DEC_REF(connp);
2565 	return;
2566 
2567 notfound:
2568 	mutex_exit(&connfp->connf_lock);
2569 	/*
2570 	 * No one bound to this port.  Is
2571 	 * there a client that wants all
2572 	 * unclaimed datagrams?
2573 	 */
2574 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2575 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
2576 		ip_fanout_proto_v6(mp, ip6h, ira);
2577 	} else {
2578 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2579 		    ICMP6_DST_UNREACH_NOPORT, ira);
2580 	}
2581 }
2582 
2583 /*
2584  * int ip_find_hdr_v6()
2585  *
2586  * This routine is used by the upper layer protocols, iptun, and IPsec:
2587  * - Set extension header pointers to appropriate locations
2588  * - Determine IPv6 header length and return it
2589  * - Return a pointer to the last nexthdr value
2590  *
2591  * The caller must initialize ipp_fields.
2592  * The upper layer protocols normally set label_separate which makes the
2593  * routine put the TX label in ipp_label_v6. If this is not set then
2594  * the hop-by-hop options including the label are placed in ipp_hopopts.
2595  *
2596  * NOTE: If multiple extension headers of the same type are present,
2597  * ip_find_hdr_v6() will set the respective extension header pointers
2598  * to the first one that it encounters in the IPv6 header.  It also
2599  * skips fragment headers.  This routine deals with malformed packets
2600  * of various sorts in which case the returned length is up to the
2601  * malformed part.
2602  */
2603 int
2604 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2605     uint8_t *nexthdrp)
2606 {
2607 	uint_t	length, ehdrlen;
2608 	uint8_t nexthdr;
2609 	uint8_t *whereptr, *endptr;
2610 	ip6_dest_t *tmpdstopts;
2611 	ip6_rthdr_t *tmprthdr;
2612 	ip6_hbh_t *tmphopopts;
2613 	ip6_frag_t *tmpfraghdr;
2614 
2615 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2616 	ipp->ipp_hoplimit = ip6h->ip6_hops;
2617 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2618 	ipp->ipp_addr = ip6h->ip6_dst;
2619 
2620 	length = IPV6_HDR_LEN;
2621 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2622 	endptr = mp->b_wptr;
2623 
2624 	nexthdr = ip6h->ip6_nxt;
2625 	while (whereptr < endptr) {
2626 		/* Is there enough left for len + nexthdr? */
2627 		if (whereptr + MIN_EHDR_LEN > endptr)
2628 			goto done;
2629 
2630 		switch (nexthdr) {
2631 		case IPPROTO_HOPOPTS: {
2632 			/* We check for any CIPSO */
2633 			uchar_t *secopt;
2634 			boolean_t hbh_needed;
2635 			uchar_t *after_secopt;
2636 
2637 			tmphopopts = (ip6_hbh_t *)whereptr;
2638 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2639 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2640 				goto done;
2641 			nexthdr = tmphopopts->ip6h_nxt;
2642 
2643 			if (!label_separate) {
2644 				secopt = NULL;
2645 				after_secopt = whereptr;
2646 			} else {
2647 				/*
2648 				 * We have dropped packets with bad options in
2649 				 * ip6_input. No need to check return value
2650 				 * here.
2651 				 */
2652 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
2653 				    &secopt, &after_secopt, &hbh_needed);
2654 			}
2655 			if (secopt != NULL && after_secopt - whereptr > 0) {
2656 				ipp->ipp_fields |= IPPF_LABEL_V6;
2657 				ipp->ipp_label_v6 = secopt;
2658 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
2659 			} else {
2660 				ipp->ipp_label_len_v6 = 0;
2661 				after_secopt = whereptr;
2662 				hbh_needed = B_TRUE;
2663 			}
2664 			/* return only 1st hbh */
2665 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2666 				ipp->ipp_fields |= IPPF_HOPOPTS;
2667 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2668 				ipp->ipp_hopoptslen = ehdrlen -
2669 				    ipp->ipp_label_len_v6;
2670 			}
2671 			break;
2672 		}
2673 		case IPPROTO_DSTOPTS:
2674 			tmpdstopts = (ip6_dest_t *)whereptr;
2675 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2676 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2677 				goto done;
2678 			nexthdr = tmpdstopts->ip6d_nxt;
2679 			/*
2680 			 * ipp_dstopts is set to the destination header after a
2681 			 * routing header.
2682 			 * Assume it is a post-rthdr destination header
2683 			 * and adjust when we find an rthdr.
2684 			 */
2685 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2686 				ipp->ipp_fields |= IPPF_DSTOPTS;
2687 				ipp->ipp_dstopts = tmpdstopts;
2688 				ipp->ipp_dstoptslen = ehdrlen;
2689 			}
2690 			break;
2691 		case IPPROTO_ROUTING:
2692 			tmprthdr = (ip6_rthdr_t *)whereptr;
2693 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2694 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2695 				goto done;
2696 			nexthdr = tmprthdr->ip6r_nxt;
2697 			/* return only 1st rthdr */
2698 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2699 				ipp->ipp_fields |= IPPF_RTHDR;
2700 				ipp->ipp_rthdr = tmprthdr;
2701 				ipp->ipp_rthdrlen = ehdrlen;
2702 			}
2703 			/*
2704 			 * Make any destination header we've seen be a
2705 			 * pre-rthdr destination header.
2706 			 */
2707 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
2708 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2709 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2710 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2711 				ipp->ipp_dstopts = NULL;
2712 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2713 				ipp->ipp_dstoptslen = 0;
2714 			}
2715 			break;
2716 		case IPPROTO_FRAGMENT:
2717 			tmpfraghdr = (ip6_frag_t *)whereptr;
2718 			ehdrlen = sizeof (ip6_frag_t);
2719 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2720 				goto done;
2721 			nexthdr = tmpfraghdr->ip6f_nxt;
2722 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2723 				ipp->ipp_fields |= IPPF_FRAGHDR;
2724 				ipp->ipp_fraghdr = tmpfraghdr;
2725 				ipp->ipp_fraghdrlen = ehdrlen;
2726 			}
2727 			break;
2728 		case IPPROTO_NONE:
2729 		default:
2730 			goto done;
2731 		}
2732 		length += ehdrlen;
2733 		whereptr += ehdrlen;
2734 	}
2735 done:
2736 	if (nexthdrp != NULL)
2737 		*nexthdrp = nexthdr;
2738 	return (length);
2739 }
2740 
2741 /*
2742  * Try to determine where and what are the IPv6 header length and
2743  * pointer to nexthdr value for the upper layer protocol (or an
2744  * unknown next hdr).
2745  *
2746  * Parameters returns a pointer to the nexthdr value;
2747  * Must handle malformed packets of various sorts.
2748  * Function returns failure for malformed cases.
2749  */
2750 boolean_t
2751 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2752     uint8_t **nexthdrpp)
2753 {
2754 	uint16_t length;
2755 	uint_t	ehdrlen;
2756 	uint8_t	*nexthdrp;
2757 	uint8_t *whereptr;
2758 	uint8_t *endptr;
2759 	ip6_dest_t *desthdr;
2760 	ip6_rthdr_t *rthdr;
2761 	ip6_frag_t *fraghdr;
2762 
2763 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2764 	length = IPV6_HDR_LEN;
2765 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2766 	endptr = mp->b_wptr;
2767 
2768 	nexthdrp = &ip6h->ip6_nxt;
2769 	while (whereptr < endptr) {
2770 		/* Is there enough left for len + nexthdr? */
2771 		if (whereptr + MIN_EHDR_LEN > endptr)
2772 			break;
2773 
2774 		switch (*nexthdrp) {
2775 		case IPPROTO_HOPOPTS:
2776 		case IPPROTO_DSTOPTS:
2777 			/* Assumes the headers are identical for hbh and dst */
2778 			desthdr = (ip6_dest_t *)whereptr;
2779 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
2780 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
2781 				return (B_FALSE);
2782 			nexthdrp = &desthdr->ip6d_nxt;
2783 			break;
2784 		case IPPROTO_ROUTING:
2785 			rthdr = (ip6_rthdr_t *)whereptr;
2786 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
2787 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
2788 				return (B_FALSE);
2789 			nexthdrp = &rthdr->ip6r_nxt;
2790 			break;
2791 		case IPPROTO_FRAGMENT:
2792 			fraghdr = (ip6_frag_t *)whereptr;
2793 			ehdrlen = sizeof (ip6_frag_t);
2794 			if ((uchar_t *)&fraghdr[1] > endptr)
2795 				return (B_FALSE);
2796 			nexthdrp = &fraghdr->ip6f_nxt;
2797 			break;
2798 		case IPPROTO_NONE:
2799 			/* No next header means we're finished */
2800 		default:
2801 			*hdr_length_ptr = length;
2802 			*nexthdrpp = nexthdrp;
2803 			return (B_TRUE);
2804 		}
2805 		length += ehdrlen;
2806 		whereptr += ehdrlen;
2807 		*hdr_length_ptr = length;
2808 		*nexthdrpp = nexthdrp;
2809 	}
2810 	switch (*nexthdrp) {
2811 	case IPPROTO_HOPOPTS:
2812 	case IPPROTO_DSTOPTS:
2813 	case IPPROTO_ROUTING:
2814 	case IPPROTO_FRAGMENT:
2815 		/*
2816 		 * If any know extension headers are still to be processed,
2817 		 * the packet's malformed (or at least all the IP header(s) are
2818 		 * not in the same mblk - and that should never happen.
2819 		 */
2820 		return (B_FALSE);
2821 
2822 	default:
2823 		/*
2824 		 * If we get here, we know that all of the IP headers were in
2825 		 * the same mblk, even if the ULP header is in the next mblk.
2826 		 */
2827 		*hdr_length_ptr = length;
2828 		*nexthdrpp = nexthdrp;
2829 		return (B_TRUE);
2830 	}
2831 }
2832 
2833 /*
2834  * Return the length of the IPv6 related headers (including extension headers)
2835  * Returns a length even if the packet is malformed.
2836  */
2837 int
2838 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2839 {
2840 	uint16_t hdr_len;
2841 	uint8_t	*nexthdrp;
2842 
2843 	(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2844 	return (hdr_len);
2845 }
2846 
2847 /*
2848  * Parse and process any hop-by-hop or destination options.
2849  *
2850  * Assumes that q is an ill read queue so that ICMP errors for link-local
2851  * destinations are sent out the correct interface.
2852  *
2853  * Returns -1 if there was an error and mp has been consumed.
2854  * Returns 0 if no special action is needed.
2855  * Returns 1 if the packet contained a router alert option for this node
2856  * which is verified to be "interesting/known" for our implementation.
2857  *
2858  * XXX Note: In future as more hbh or dest options are defined,
2859  * it may be better to have different routines for hbh and dest
2860  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2861  * may have same value in different namespaces. Or is it same namespace ??
2862  * Current code checks for each opt_type (other than pads) if it is in
2863  * the expected  nexthdr (hbh or dest)
2864  */
2865 int
2866 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2867     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2868 {
2869 	uint8_t opt_type;
2870 	uint_t optused;
2871 	int ret = 0;
2872 	const char *errtype;
2873 	ill_t		*ill = ira->ira_ill;
2874 	ip_stack_t	*ipst = ill->ill_ipst;
2875 
2876 	while (optlen != 0) {
2877 		opt_type = *optptr;
2878 		if (opt_type == IP6OPT_PAD1) {
2879 			optused = 1;
2880 		} else {
2881 			if (optlen < 2)
2882 				goto bad_opt;
2883 			errtype = "malformed";
2884 			if (opt_type == ip6opt_ls) {
2885 				optused = 2 + optptr[1];
2886 				if (optused > optlen)
2887 					goto bad_opt;
2888 			} else switch (opt_type) {
2889 			case IP6OPT_PADN:
2890 				/*
2891 				 * Note:We don't verify that (N-2) pad octets
2892 				 * are zero as required by spec. Adhere to
2893 				 * "be liberal in what you accept..." part of
2894 				 * implementation philosophy (RFC791,RFC1122)
2895 				 */
2896 				optused = 2 + optptr[1];
2897 				if (optused > optlen)
2898 					goto bad_opt;
2899 				break;
2900 
2901 			case IP6OPT_JUMBO:
2902 				if (hdr_type != IPPROTO_HOPOPTS)
2903 					goto opt_error;
2904 				goto opt_error; /* XXX Not implemented! */
2905 
2906 			case IP6OPT_ROUTER_ALERT: {
2907 				struct ip6_opt_router *or;
2908 
2909 				if (hdr_type != IPPROTO_HOPOPTS)
2910 					goto opt_error;
2911 				optused = 2 + optptr[1];
2912 				if (optused > optlen)
2913 					goto bad_opt;
2914 				or = (struct ip6_opt_router *)optptr;
2915 				/* Check total length and alignment */
2916 				if (optused != sizeof (*or) ||
2917 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
2918 					goto opt_error;
2919 				/* Check value */
2920 				switch (*((uint16_t *)or->ip6or_value)) {
2921 				case IP6_ALERT_MLD:
2922 				case IP6_ALERT_RSVP:
2923 					ret = 1;
2924 				}
2925 				break;
2926 			}
2927 			case IP6OPT_HOME_ADDRESS: {
2928 				/*
2929 				 * Minimal support for the home address option
2930 				 * (which is required by all IPv6 nodes).
2931 				 * Implement by just swapping the home address
2932 				 * and source address.
2933 				 * XXX Note: this has IPsec implications since
2934 				 * AH needs to take this into account.
2935 				 * Also, when IPsec is used we need to ensure
2936 				 * that this is only processed once
2937 				 * in the received packet (to avoid swapping
2938 				 * back and forth).
2939 				 * NOTE:This option processing is considered
2940 				 * to be unsafe and prone to a denial of
2941 				 * service attack.
2942 				 * The current processing is not safe even with
2943 				 * IPsec secured IP packets. Since the home
2944 				 * address option processing requirement still
2945 				 * is in the IETF draft and in the process of
2946 				 * being redefined for its usage, it has been
2947 				 * decided to turn off the option by default.
2948 				 * If this section of code needs to be executed,
2949 				 * ndd variable ip6_ignore_home_address_opt
2950 				 * should be set to 0 at the user's own risk.
2951 				 */
2952 				struct ip6_opt_home_address *oh;
2953 				in6_addr_t tmp;
2954 
2955 				if (ipst->ips_ipv6_ignore_home_address_opt)
2956 					goto opt_error;
2957 
2958 				if (hdr_type != IPPROTO_DSTOPTS)
2959 					goto opt_error;
2960 				optused = 2 + optptr[1];
2961 				if (optused > optlen)
2962 					goto bad_opt;
2963 
2964 				/*
2965 				 * We did this dest. opt the first time
2966 				 * around (i.e. before AH processing).
2967 				 * If we've done AH... stop now.
2968 				 */
2969 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2970 				    ira->ira_ipsec_ah_sa != NULL)
2971 					break;
2972 
2973 				oh = (struct ip6_opt_home_address *)optptr;
2974 				/* Check total length and alignment */
2975 				if (optused < sizeof (*oh) ||
2976 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2977 					goto opt_error;
2978 				/* Swap ip6_src and the home address */
2979 				tmp = ip6h->ip6_src;
2980 				/* XXX Note: only 8 byte alignment option */
2981 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2982 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
2983 				break;
2984 			}
2985 
2986 			case IP6OPT_TUNNEL_LIMIT:
2987 				if (hdr_type != IPPROTO_DSTOPTS) {
2988 					goto opt_error;
2989 				}
2990 				optused = 2 + optptr[1];
2991 				if (optused > optlen) {
2992 					goto bad_opt;
2993 				}
2994 				if (optused != 3) {
2995 					goto opt_error;
2996 				}
2997 				break;
2998 
2999 			default:
3000 				errtype = "unknown";
3001 				/* FALLTHROUGH */
3002 			opt_error:
3003 				/* Determine which zone should send error */
3004 				switch (IP6OPT_TYPE(opt_type)) {
3005 				case IP6OPT_TYPE_SKIP:
3006 					optused = 2 + optptr[1];
3007 					if (optused > optlen)
3008 						goto bad_opt;
3009 					ip1dbg(("ip_process_options_v6: %s "
3010 					    "opt 0x%x skipped\n",
3011 					    errtype, opt_type));
3012 					break;
3013 				case IP6OPT_TYPE_DISCARD:
3014 					ip1dbg(("ip_process_options_v6: %s "
3015 					    "opt 0x%x; packet dropped\n",
3016 					    errtype, opt_type));
3017 					BUMP_MIB(ill->ill_ip_mib,
3018 					    ipIfStatsInHdrErrors);
3019 					ip_drop_input("ipIfStatsInHdrErrors",
3020 					    mp, ill);
3021 					freemsg(mp);
3022 					return (-1);
3023 				case IP6OPT_TYPE_ICMP:
3024 					BUMP_MIB(ill->ill_ip_mib,
3025 					    ipIfStatsInHdrErrors);
3026 					ip_drop_input("ipIfStatsInHdrErrors",
3027 					    mp, ill);
3028 					icmp_param_problem_v6(mp,
3029 					    ICMP6_PARAMPROB_OPTION,
3030 					    (uint32_t)(optptr -
3031 					    (uint8_t *)ip6h),
3032 					    B_FALSE, ira);
3033 					return (-1);
3034 				case IP6OPT_TYPE_FORCEICMP:
3035 					BUMP_MIB(ill->ill_ip_mib,
3036 					    ipIfStatsInHdrErrors);
3037 					ip_drop_input("ipIfStatsInHdrErrors",
3038 					    mp, ill);
3039 					icmp_param_problem_v6(mp,
3040 					    ICMP6_PARAMPROB_OPTION,
3041 					    (uint32_t)(optptr -
3042 					    (uint8_t *)ip6h),
3043 					    B_TRUE, ira);
3044 					return (-1);
3045 				default:
3046 					ASSERT(0);
3047 				}
3048 			}
3049 		}
3050 		optlen -= optused;
3051 		optptr += optused;
3052 	}
3053 	return (ret);
3054 
3055 bad_opt:
3056 	/* Determine which zone should send error */
3057 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3058 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3059 	    (uint32_t)(optptr - (uint8_t *)ip6h),
3060 	    B_FALSE, ira);
3061 	return (-1);
3062 }
3063 
3064 /*
3065  * Process a routing header that is not yet empty.
3066  * Because of RFC 5095, we now reject all route headers.
3067  */
3068 void
3069 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3070     ip_recv_attr_t *ira)
3071 {
3072 	ill_t		*ill = ira->ira_ill;
3073 	ip_stack_t	*ipst = ill->ill_ipst;
3074 
3075 	ASSERT(rth->ip6r_segleft != 0);
3076 
3077 	if (!ipst->ips_ipv6_forward_src_routed) {
3078 		/* XXX Check for source routed out same interface? */
3079 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3080 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3081 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3082 		freemsg(mp);
3083 		return;
3084 	}
3085 
3086 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3087 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3088 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3089 	    B_FALSE, ira);
3090 }
3091 
3092 /*
3093  * Read side put procedure for IPv6 module.
3094  */
3095 void
3096 ip_rput_v6(queue_t *q, mblk_t *mp)
3097 {
3098 	ill_t		*ill;
3099 
3100 	ill = (ill_t *)q->q_ptr;
3101 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3102 		union DL_primitives *dl;
3103 
3104 		dl = (union DL_primitives *)mp->b_rptr;
3105 		/*
3106 		 * Things are opening or closing - only accept DLPI
3107 		 * ack messages. If the stream is closing and ip_wsrv
3108 		 * has completed, ip_close is out of the qwait, but has
3109 		 * not yet completed qprocsoff. Don't proceed any further
3110 		 * because the ill has been cleaned up and things hanging
3111 		 * off the ill have been freed.
3112 		 */
3113 		if ((mp->b_datap->db_type != M_PCPROTO) ||
3114 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
3115 			inet_freemsg(mp);
3116 			return;
3117 		}
3118 	}
3119 	if (DB_TYPE(mp) == M_DATA) {
3120 		struct mac_header_info_s mhi;
3121 
3122 		ip_mdata_to_mhi(ill, mp, &mhi);
3123 		ip_input_v6(ill, NULL, mp, &mhi);
3124 	} else {
3125 		ip_rput_notdata(ill, mp);
3126 	}
3127 }
3128 
3129 /*
3130  * Walk through the IPv6 packet in mp and see if there's an AH header
3131  * in it.  See if the AH header needs to get done before other headers in
3132  * the packet.  (Worker function for ipsec_early_ah_v6().)
3133  */
3134 #define	IPSEC_HDR_DONT_PROCESS	0
3135 #define	IPSEC_HDR_PROCESS	1
3136 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
3137 static int
3138 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3139 {
3140 	uint_t	length;
3141 	uint_t	ehdrlen;
3142 	uint8_t *whereptr;
3143 	uint8_t *endptr;
3144 	uint8_t *nexthdrp;
3145 	ip6_dest_t *desthdr;
3146 	ip6_rthdr_t *rthdr;
3147 	ip6_t	*ip6h;
3148 
3149 	/*
3150 	 * For now just pullup everything.  In general, the less pullups,
3151 	 * the better, but there's so much squirrelling through anyway,
3152 	 * it's just easier this way.
3153 	 */
3154 	if (!pullupmsg(mp, -1)) {
3155 		return (IPSEC_MEMORY_ERROR);
3156 	}
3157 
3158 	ip6h = (ip6_t *)mp->b_rptr;
3159 	length = IPV6_HDR_LEN;
3160 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3161 	endptr = mp->b_wptr;
3162 
3163 	/*
3164 	 * We can't just use the argument nexthdr in the place
3165 	 * of nexthdrp becaue we don't dereference nexthdrp
3166 	 * till we confirm whether it is a valid address.
3167 	 */
3168 	nexthdrp = &ip6h->ip6_nxt;
3169 	while (whereptr < endptr) {
3170 		/* Is there enough left for len + nexthdr? */
3171 		if (whereptr + MIN_EHDR_LEN > endptr)
3172 			return (IPSEC_MEMORY_ERROR);
3173 
3174 		switch (*nexthdrp) {
3175 		case IPPROTO_HOPOPTS:
3176 		case IPPROTO_DSTOPTS:
3177 			/* Assumes the headers are identical for hbh and dst */
3178 			desthdr = (ip6_dest_t *)whereptr;
3179 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
3180 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
3181 				return (IPSEC_MEMORY_ERROR);
3182 			/*
3183 			 * Return DONT_PROCESS because the destination
3184 			 * options header may be for each hop in a
3185 			 * routing-header, and we only want AH if we're
3186 			 * finished with routing headers.
3187 			 */
3188 			if (*nexthdrp == IPPROTO_DSTOPTS)
3189 				return (IPSEC_HDR_DONT_PROCESS);
3190 			nexthdrp = &desthdr->ip6d_nxt;
3191 			break;
3192 		case IPPROTO_ROUTING:
3193 			rthdr = (ip6_rthdr_t *)whereptr;
3194 
3195 			/*
3196 			 * If there's more hops left on the routing header,
3197 			 * return now with DON'T PROCESS.
3198 			 */
3199 			if (rthdr->ip6r_segleft > 0)
3200 				return (IPSEC_HDR_DONT_PROCESS);
3201 
3202 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
3203 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
3204 				return (IPSEC_MEMORY_ERROR);
3205 			nexthdrp = &rthdr->ip6r_nxt;
3206 			break;
3207 		case IPPROTO_FRAGMENT:
3208 			/* Wait for reassembly */
3209 			return (IPSEC_HDR_DONT_PROCESS);
3210 		case IPPROTO_AH:
3211 			*nexthdr = IPPROTO_AH;
3212 			return (IPSEC_HDR_PROCESS);
3213 		case IPPROTO_NONE:
3214 			/* No next header means we're finished */
3215 		default:
3216 			return (IPSEC_HDR_DONT_PROCESS);
3217 		}
3218 		length += ehdrlen;
3219 		whereptr += ehdrlen;
3220 	}
3221 	/*
3222 	 * Malformed/truncated packet.
3223 	 */
3224 	return (IPSEC_MEMORY_ERROR);
3225 }
3226 
3227 /*
3228  * Path for AH if options are present.
3229  * Returns NULL if the mblk was consumed.
3230  *
3231  * Sometimes AH needs to be done before other IPv6 headers for security
3232  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3233  * indicates if that is so, and fans out to the appropriate IPsec protocol
3234  * for the datagram passed in.
3235  */
3236 mblk_t *
3237 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3238 {
3239 	uint8_t nexthdr;
3240 	ah_t *ah;
3241 	ill_t		*ill = ira->ira_ill;
3242 	ip_stack_t	*ipst = ill->ill_ipst;
3243 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3244 
3245 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3246 	case IPSEC_MEMORY_ERROR:
3247 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3248 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3249 		freemsg(mp);
3250 		return (NULL);
3251 	case IPSEC_HDR_DONT_PROCESS:
3252 		return (mp);
3253 	}
3254 
3255 	/* Default means send it to AH! */
3256 	ASSERT(nexthdr == IPPROTO_AH);
3257 
3258 	if (!ipsec_loaded(ipss)) {
3259 		ip_proto_not_sup(mp, ira);
3260 		return (NULL);
3261 	}
3262 
3263 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3264 	if (mp == NULL)
3265 		return (NULL);
3266 	ASSERT(ah != NULL);
3267 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3268 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
3269 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3270 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3271 
3272 	if (mp == NULL) {
3273 		/*
3274 		 * Either it failed or is pending. In the former case
3275 		 * ipIfStatsInDiscards was increased.
3276 		 */
3277 		return (NULL);
3278 	}
3279 
3280 	/* we're done with IPsec processing, send it up */
3281 	ip_input_post_ipsec(mp, ira);
3282 	return (NULL);
3283 }
3284 
3285 /*
3286  * Reassemble fragment.
3287  * When it returns a completed message the first mblk will only contain
3288  * the headers prior to the fragment header, with the nexthdr value updated
3289  * to be the header after the fragment header.
3290  */
3291 mblk_t *
3292 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3293     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3294 {
3295 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
3296 	uint16_t	offset;
3297 	boolean_t	more_frags;
3298 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
3299 	in6_addr_t	*v6dst_ptr;
3300 	in6_addr_t	*v6src_ptr;
3301 	uint_t		end;
3302 	uint_t		hdr_length;
3303 	size_t		count;
3304 	ipf_t		*ipf;
3305 	ipf_t		**ipfp;
3306 	ipfb_t		*ipfb;
3307 	mblk_t		*mp1;
3308 	uint8_t		ecn_info = 0;
3309 	size_t		msg_len;
3310 	mblk_t		*tail_mp;
3311 	mblk_t		*t_mp;
3312 	boolean_t	pruned = B_FALSE;
3313 	uint32_t	sum_val;
3314 	uint16_t	sum_flags;
3315 	ill_t		*ill = ira->ira_ill;
3316 	ip_stack_t	*ipst = ill->ill_ipst;
3317 	uint_t		prev_nexthdr_offset;
3318 	uint8_t		prev_nexthdr;
3319 	uint8_t		*ptr;
3320 	uint32_t	packet_size;
3321 
3322 	/*
3323 	 * We utilize hardware computed checksum info only for UDP since
3324 	 * IP fragmentation is a normal occurence for the protocol.  In
3325 	 * addition, checksum offload support for IP fragments carrying
3326 	 * UDP payload is commonly implemented across network adapters.
3327 	 */
3328 	ASSERT(ira->ira_rill != NULL);
3329 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
3330 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3331 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3332 		mblk_t *mp1 = mp->b_cont;
3333 		int32_t len;
3334 
3335 		/* Record checksum information from the packet */
3336 		sum_val = (uint32_t)DB_CKSUM16(mp);
3337 		sum_flags = DB_CKSUMFLAGS(mp);
3338 
3339 		/* fragmented payload offset from beginning of mblk */
3340 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3341 
3342 		if ((sum_flags & HCK_PARTIALCKSUM) &&
3343 		    (mp1 == NULL || mp1->b_cont == NULL) &&
3344 		    offset >= DB_CKSUMSTART(mp) &&
3345 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3346 			uint32_t adj;
3347 			/*
3348 			 * Partial checksum has been calculated by hardware
3349 			 * and attached to the packet; in addition, any
3350 			 * prepended extraneous data is even byte aligned.
3351 			 * If any such data exists, we adjust the checksum;
3352 			 * this would also handle any postpended data.
3353 			 */
3354 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3355 			    mp, mp1, len, adj);
3356 
3357 			/* One's complement subtract extraneous checksum */
3358 			if (adj >= sum_val)
3359 				sum_val = ~(adj - sum_val) & 0xFFFF;
3360 			else
3361 				sum_val -= adj;
3362 		}
3363 	} else {
3364 		sum_val = 0;
3365 		sum_flags = 0;
3366 	}
3367 
3368 	/* Clear hardware checksumming flag */
3369 	DB_CKSUMFLAGS(mp) = 0;
3370 
3371 	/*
3372 	 * Determine the offset (from the begining of the IP header)
3373 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3374 	 * this when removing the fragment header from the packet.
3375 	 * This packet consists of the IPv6 header, a potential
3376 	 * hop-by-hop options header, a potential pre-routing-header
3377 	 * destination options header, and a potential routing header.
3378 	 */
3379 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3380 	prev_nexthdr = ip6h->ip6_nxt;
3381 	ptr = (uint8_t *)&ip6h[1];
3382 
3383 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
3384 		ip6_hbh_t	*hbh_hdr;
3385 		uint_t		hdr_len;
3386 
3387 		hbh_hdr = (ip6_hbh_t *)ptr;
3388 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3389 		prev_nexthdr = hbh_hdr->ip6h_nxt;
3390 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3391 		    - (uint8_t *)ip6h;
3392 		ptr += hdr_len;
3393 	}
3394 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
3395 		ip6_dest_t	*dest_hdr;
3396 		uint_t		hdr_len;
3397 
3398 		dest_hdr = (ip6_dest_t *)ptr;
3399 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3400 		prev_nexthdr = dest_hdr->ip6d_nxt;
3401 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3402 		    - (uint8_t *)ip6h;
3403 		ptr += hdr_len;
3404 	}
3405 	if (prev_nexthdr == IPPROTO_ROUTING) {
3406 		ip6_rthdr_t	*rthdr;
3407 		uint_t		hdr_len;
3408 
3409 		rthdr = (ip6_rthdr_t *)ptr;
3410 		prev_nexthdr = rthdr->ip6r_nxt;
3411 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3412 		    - (uint8_t *)ip6h;
3413 		hdr_len = 8 * (rthdr->ip6r_len + 1);
3414 		ptr += hdr_len;
3415 	}
3416 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
3417 		/* Can't handle other headers before the fragment header */
3418 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3419 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3420 		freemsg(mp);
3421 		return (NULL);
3422 	}
3423 
3424 	/*
3425 	 * Note: Fragment offset in header is in 8-octet units.
3426 	 * Clearing least significant 3 bits not only extracts
3427 	 * it but also gets it in units of octets.
3428 	 */
3429 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3430 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3431 
3432 	/*
3433 	 * Is the more frags flag on and the payload length not a multiple
3434 	 * of eight?
3435 	 */
3436 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3437 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3438 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3439 		    (uint32_t)((char *)&ip6h->ip6_plen -
3440 		    (char *)ip6h), B_FALSE, ira);
3441 		return (NULL);
3442 	}
3443 
3444 	v6src_ptr = &ip6h->ip6_src;
3445 	v6dst_ptr = &ip6h->ip6_dst;
3446 	end = remlen;
3447 
3448 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3449 	end += offset;
3450 
3451 	/*
3452 	 * Would fragment cause reassembled packet to have a payload length
3453 	 * greater than IP_MAXPACKET - the max payload size?
3454 	 */
3455 	if (end > IP_MAXPACKET) {
3456 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3457 		ip_drop_input("Reassembled packet too large", mp, ill);
3458 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3459 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
3460 		    (char *)ip6h), B_FALSE, ira);
3461 		return (NULL);
3462 	}
3463 
3464 	/*
3465 	 * This packet just has one fragment. Reassembly not
3466 	 * needed.
3467 	 */
3468 	if (!more_frags && offset == 0) {
3469 		goto reass_done;
3470 	}
3471 
3472 	/*
3473 	 * Drop the fragmented as early as possible, if
3474 	 * we don't have resource(s) to re-assemble.
3475 	 */
3476 	if (ipst->ips_ip_reass_queue_bytes == 0) {
3477 		freemsg(mp);
3478 		return (NULL);
3479 	}
3480 
3481 	/* Record the ECN field info. */
3482 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3483 	/*
3484 	 * If this is not the first fragment, dump the unfragmentable
3485 	 * portion of the packet.
3486 	 */
3487 	if (offset)
3488 		mp->b_rptr = (uchar_t *)&fraghdr[1];
3489 
3490 	/*
3491 	 * Fragmentation reassembly.  Each ILL has a hash table for
3492 	 * queueing packets undergoing reassembly for all IPIFs
3493 	 * associated with the ILL.  The hash is based on the packet
3494 	 * IP ident field.  The ILL frag hash table was allocated
3495 	 * as a timer block at the time the ILL was created.  Whenever
3496 	 * there is anything on the reassembly queue, the timer will
3497 	 * be running.
3498 	 */
3499 	/* Handle vnic loopback of fragments */
3500 	if (mp->b_datap->db_ref > 2)
3501 		msg_len = 0;
3502 	else
3503 		msg_len = MBLKSIZE(mp);
3504 
3505 	tail_mp = mp;
3506 	while (tail_mp->b_cont != NULL) {
3507 		tail_mp = tail_mp->b_cont;
3508 		if (tail_mp->b_datap->db_ref <= 2)
3509 			msg_len += MBLKSIZE(tail_mp);
3510 	}
3511 	/*
3512 	 * If the reassembly list for this ILL will get too big
3513 	 * prune it.
3514 	 */
3515 
3516 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3517 	    ipst->ips_ip_reass_queue_bytes) {
3518 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3519 		    uint_t, ill->ill_frag_count,
3520 		    uint_t, ipst->ips_ip_reass_queue_bytes);
3521 		ill_frag_prune(ill,
3522 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3523 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
3524 		pruned = B_TRUE;
3525 	}
3526 
3527 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3528 	mutex_enter(&ipfb->ipfb_lock);
3529 
3530 	ipfp = &ipfb->ipfb_ipf;
3531 	/* Try to find an existing fragment queue for this packet. */
3532 	for (;;) {
3533 		ipf = ipfp[0];
3534 		if (ipf) {
3535 			/*
3536 			 * It has to match on ident, source address, and
3537 			 * dest address.
3538 			 */
3539 			if (ipf->ipf_ident == ident &&
3540 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3541 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3542 
3543 				/*
3544 				 * If we have received too many
3545 				 * duplicate fragments for this packet
3546 				 * free it.
3547 				 */
3548 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
3549 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
3550 					freemsg(mp);
3551 					mutex_exit(&ipfb->ipfb_lock);
3552 					return (NULL);
3553 				}
3554 
3555 				break;
3556 			}
3557 			ipfp = &ipf->ipf_hash_next;
3558 			continue;
3559 		}
3560 
3561 
3562 		/*
3563 		 * If we pruned the list, do we want to store this new
3564 		 * fragment?. We apply an optimization here based on the
3565 		 * fact that most fragments will be received in order.
3566 		 * So if the offset of this incoming fragment is zero,
3567 		 * it is the first fragment of a new packet. We will
3568 		 * keep it.  Otherwise drop the fragment, as we have
3569 		 * probably pruned the packet already (since the
3570 		 * packet cannot be found).
3571 		 */
3572 
3573 		if (pruned && offset != 0) {
3574 			mutex_exit(&ipfb->ipfb_lock);
3575 			freemsg(mp);
3576 			return (NULL);
3577 		}
3578 
3579 		/* New guy.  Allocate a frag message. */
3580 		mp1 = allocb(sizeof (*ipf), BPRI_MED);
3581 		if (!mp1) {
3582 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3583 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3584 			freemsg(mp);
3585 	partial_reass_done:
3586 			mutex_exit(&ipfb->ipfb_lock);
3587 			return (NULL);
3588 		}
3589 
3590 		if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3591 			/*
3592 			 * Too many fragmented packets in this hash bucket.
3593 			 * Free the oldest.
3594 			 */
3595 			ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3596 		}
3597 
3598 		mp1->b_cont = mp;
3599 
3600 		/* Initialize the fragment header. */
3601 		ipf = (ipf_t *)mp1->b_rptr;
3602 		ipf->ipf_mp = mp1;
3603 		ipf->ipf_ptphn = ipfp;
3604 		ipfp[0] = ipf;
3605 		ipf->ipf_hash_next = NULL;
3606 		ipf->ipf_ident = ident;
3607 		ipf->ipf_v6src = *v6src_ptr;
3608 		ipf->ipf_v6dst = *v6dst_ptr;
3609 		/* Record reassembly start time. */
3610 		ipf->ipf_timestamp = gethrestime_sec();
3611 		/* Record ipf generation and account for frag header */
3612 		ipf->ipf_gen = ill->ill_ipf_gen++;
3613 		ipf->ipf_count = MBLKSIZE(mp1);
3614 		ipf->ipf_protocol = nexthdr;
3615 		ipf->ipf_nf_hdr_len = 0;
3616 		ipf->ipf_prev_nexthdr_offset = 0;
3617 		ipf->ipf_last_frag_seen = B_FALSE;
3618 		ipf->ipf_ecn = ecn_info;
3619 		ipf->ipf_num_dups = 0;
3620 		ipfb->ipfb_frag_pkts++;
3621 		ipf->ipf_checksum = 0;
3622 		ipf->ipf_checksum_flags = 0;
3623 
3624 		/* Store checksum value in fragment header */
3625 		if (sum_flags != 0) {
3626 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3627 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3628 			ipf->ipf_checksum = sum_val;
3629 			ipf->ipf_checksum_flags = sum_flags;
3630 		}
3631 
3632 		/*
3633 		 * We handle reassembly two ways.  In the easy case,
3634 		 * where all the fragments show up in order, we do
3635 		 * minimal bookkeeping, and just clip new pieces on
3636 		 * the end.  If we ever see a hole, then we go off
3637 		 * to ip_reassemble which has to mark the pieces and
3638 		 * keep track of the number of holes, etc.  Obviously,
3639 		 * the point of having both mechanisms is so we can
3640 		 * handle the easy case as efficiently as possible.
3641 		 */
3642 		if (offset == 0) {
3643 			/* Easy case, in-order reassembly so far. */
3644 			/* Update the byte count */
3645 			ipf->ipf_count += msg_len;
3646 			ipf->ipf_tail_mp = tail_mp;
3647 			/*
3648 			 * Keep track of next expected offset in
3649 			 * ipf_end.
3650 			 */
3651 			ipf->ipf_end = end;
3652 			ipf->ipf_nf_hdr_len = hdr_length;
3653 			ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3654 		} else {
3655 			/* Hard case, hole at the beginning. */
3656 			ipf->ipf_tail_mp = NULL;
3657 			/*
3658 			 * ipf_end == 0 means that we have given up
3659 			 * on easy reassembly.
3660 			 */
3661 			ipf->ipf_end = 0;
3662 
3663 			/* Forget checksum offload from now on */
3664 			ipf->ipf_checksum_flags = 0;
3665 
3666 			/*
3667 			 * ipf_hole_cnt is set by ip_reassemble.
3668 			 * ipf_count is updated by ip_reassemble.
3669 			 * No need to check for return value here
3670 			 * as we don't expect reassembly to complete or
3671 			 * fail for the first fragment itself.
3672 			 */
3673 			(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3674 			    msg_len);
3675 		}
3676 		/* Update per ipfb and ill byte counts */
3677 		ipfb->ipfb_count += ipf->ipf_count;
3678 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3679 		atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3680 		/* If the frag timer wasn't already going, start it. */
3681 		mutex_enter(&ill->ill_lock);
3682 		ill_frag_timer_start(ill);
3683 		mutex_exit(&ill->ill_lock);
3684 		goto partial_reass_done;
3685 	}
3686 
3687 	/*
3688 	 * If the packet's flag has changed (it could be coming up
3689 	 * from an interface different than the previous, therefore
3690 	 * possibly different checksum capability), then forget about
3691 	 * any stored checksum states.  Otherwise add the value to
3692 	 * the existing one stored in the fragment header.
3693 	 */
3694 	if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3695 		sum_val += ipf->ipf_checksum;
3696 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3697 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3698 		ipf->ipf_checksum = sum_val;
3699 	} else if (ipf->ipf_checksum_flags != 0) {
3700 		/* Forget checksum offload from now on */
3701 		ipf->ipf_checksum_flags = 0;
3702 	}
3703 
3704 	/*
3705 	 * We have a new piece of a datagram which is already being
3706 	 * reassembled.  Update the ECN info if all IP fragments
3707 	 * are ECN capable.  If there is one which is not, clear
3708 	 * all the info.  If there is at least one which has CE
3709 	 * code point, IP needs to report that up to transport.
3710 	 */
3711 	if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3712 		if (ecn_info == IPH_ECN_CE)
3713 			ipf->ipf_ecn = IPH_ECN_CE;
3714 	} else {
3715 		ipf->ipf_ecn = IPH_ECN_NECT;
3716 	}
3717 
3718 	if (offset && ipf->ipf_end == offset) {
3719 		/* The new fragment fits at the end */
3720 		ipf->ipf_tail_mp->b_cont = mp;
3721 		/* Update the byte count */
3722 		ipf->ipf_count += msg_len;
3723 		/* Update per ipfb and ill byte counts */
3724 		ipfb->ipfb_count += msg_len;
3725 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3726 		atomic_add_32(&ill->ill_frag_count, msg_len);
3727 		if (more_frags) {
3728 			/* More to come. */
3729 			ipf->ipf_end = end;
3730 			ipf->ipf_tail_mp = tail_mp;
3731 			goto partial_reass_done;
3732 		}
3733 	} else {
3734 		/*
3735 		 * Go do the hard cases.
3736 		 * Call ip_reassemble().
3737 		 */
3738 		int ret;
3739 
3740 		if (offset == 0) {
3741 			if (ipf->ipf_prev_nexthdr_offset == 0) {
3742 				ipf->ipf_nf_hdr_len = hdr_length;
3743 				ipf->ipf_prev_nexthdr_offset =
3744 				    prev_nexthdr_offset;
3745 			}
3746 		}
3747 		/* Save current byte count */
3748 		count = ipf->ipf_count;
3749 		ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3750 
3751 		/* Count of bytes added and subtracted (freeb()ed) */
3752 		count = ipf->ipf_count - count;
3753 		if (count) {
3754 			/* Update per ipfb and ill byte counts */
3755 			ipfb->ipfb_count += count;
3756 			ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3757 			atomic_add_32(&ill->ill_frag_count, count);
3758 		}
3759 		if (ret == IP_REASS_PARTIAL) {
3760 			goto partial_reass_done;
3761 		} else if (ret == IP_REASS_FAILED) {
3762 			/* Reassembly failed. Free up all resources */
3763 			ill_frag_free_pkts(ill, ipfb, ipf, 1);
3764 			for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3765 				IP_REASS_SET_START(t_mp, 0);
3766 				IP_REASS_SET_END(t_mp, 0);
3767 			}
3768 			freemsg(mp);
3769 			goto partial_reass_done;
3770 		}
3771 
3772 		/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3773 	}
3774 	/*
3775 	 * We have completed reassembly.  Unhook the frag header from
3776 	 * the reassembly list.
3777 	 *
3778 	 * Grab the unfragmentable header length next header value out
3779 	 * of the first fragment
3780 	 */
3781 	ASSERT(ipf->ipf_nf_hdr_len != 0);
3782 	hdr_length = ipf->ipf_nf_hdr_len;
3783 
3784 	/*
3785 	 * Before we free the frag header, record the ECN info
3786 	 * to report back to the transport.
3787 	 */
3788 	ecn_info = ipf->ipf_ecn;
3789 
3790 	/*
3791 	 * Store the nextheader field in the header preceding the fragment
3792 	 * header
3793 	 */
3794 	nexthdr = ipf->ipf_protocol;
3795 	prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3796 	ipfp = ipf->ipf_ptphn;
3797 
3798 	/* We need to supply these to caller */
3799 	if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3800 		sum_val = ipf->ipf_checksum;
3801 	else
3802 		sum_val = 0;
3803 
3804 	mp1 = ipf->ipf_mp;
3805 	count = ipf->ipf_count;
3806 	ipf = ipf->ipf_hash_next;
3807 	if (ipf)
3808 		ipf->ipf_ptphn = ipfp;
3809 	ipfp[0] = ipf;
3810 	atomic_add_32(&ill->ill_frag_count, -count);
3811 	ASSERT(ipfb->ipfb_count >= count);
3812 	ipfb->ipfb_count -= count;
3813 	ipfb->ipfb_frag_pkts--;
3814 	mutex_exit(&ipfb->ipfb_lock);
3815 	/* Ditch the frag header. */
3816 	mp = mp1->b_cont;
3817 	freeb(mp1);
3818 
3819 	/*
3820 	 * Make sure the packet is good by doing some sanity
3821 	 * check. If bad we can silentely drop the packet.
3822 	 */
3823 reass_done:
3824 	if (hdr_length < sizeof (ip6_frag_t)) {
3825 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3826 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3827 		ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3828 		freemsg(mp);
3829 		return (NULL);
3830 	}
3831 
3832 	/*
3833 	 * Remove the fragment header from the initial header by
3834 	 * splitting the mblk into the non-fragmentable header and
3835 	 * everthing after the fragment extension header.  This has the
3836 	 * side effect of putting all the headers that need destination
3837 	 * processing into the b_cont block-- on return this fact is
3838 	 * used in order to avoid having to look at the extensions
3839 	 * already processed.
3840 	 *
3841 	 * Note that this code assumes that the unfragmentable portion
3842 	 * of the header is in the first mblk and increments
3843 	 * the read pointer past it.  If this assumption is broken
3844 	 * this code fails badly.
3845 	 */
3846 	if (mp->b_rptr + hdr_length != mp->b_wptr) {
3847 		mblk_t *nmp;
3848 
3849 		if (!(nmp = dupb(mp))) {
3850 			ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3851 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3852 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3853 			freemsg(mp);
3854 			return (NULL);
3855 		}
3856 		nmp->b_cont = mp->b_cont;
3857 		mp->b_cont = nmp;
3858 		nmp->b_rptr += hdr_length;
3859 	}
3860 	mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3861 
3862 	ip6h = (ip6_t *)mp->b_rptr;
3863 	((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3864 
3865 	/* Restore original IP length in header. */
3866 	packet_size = msgdsize(mp);
3867 	ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3868 	/* Record the ECN info. */
3869 	ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3870 	ip6h->ip6_vcf |= htonl(ecn_info << 20);
3871 
3872 	/* Update the receive attributes */
3873 	ira->ira_pktlen = packet_size;
3874 	ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3875 	ira->ira_protocol = nexthdr;
3876 
3877 	/* Reassembly is successful; set checksum information in packet */
3878 	DB_CKSUM16(mp) = (uint16_t)sum_val;
3879 	DB_CKSUMFLAGS(mp) = sum_flags;
3880 	DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3881 
3882 	return (mp);
3883 }
3884 
3885 /*
3886  * Given an mblk and a ptr, find the destination address in an IPv6 routing
3887  * header.
3888  */
3889 static in6_addr_t
3890 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3891 {
3892 	ip6_rthdr0_t *rt0;
3893 	int segleft, numaddr;
3894 	in6_addr_t *ap, rv = oldrv;
3895 
3896 	rt0 = (ip6_rthdr0_t *)whereptr;
3897 	if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3898 		DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3899 		    uint8_t *, whereptr);
3900 		return (rv);
3901 	}
3902 	segleft = rt0->ip6r0_segleft;
3903 	numaddr = rt0->ip6r0_len / 2;
3904 
3905 	if ((rt0->ip6r0_len & 0x1) ||
3906 	    (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3907 	    (segleft > rt0->ip6r0_len / 2)) {
3908 		/*
3909 		 * Corrupt packet.  Either the routing header length is odd
3910 		 * (can't happen) or mismatched compared to the packet, or the
3911 		 * number of addresses is.  Return what we can.  This will
3912 		 * only be a problem on forwarded packets that get squeezed
3913 		 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3914 		 */
3915 		DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3916 		    whereptr);
3917 		return (rv);
3918 	}
3919 
3920 	if (segleft != 0) {
3921 		ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3922 		rv = ap[numaddr - 1];
3923 	}
3924 
3925 	return (rv);
3926 }
3927 
3928 /*
3929  * Walk through the options to see if there is a routing header.
3930  * If present get the destination which is the last address of
3931  * the option.
3932  * mp needs to be provided in cases when the extension headers might span
3933  * b_cont; mp is never modified by this function.
3934  */
3935 in6_addr_t
3936 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3937 {
3938 	const mblk_t *current_mp = mp;
3939 	uint8_t nexthdr;
3940 	uint8_t *whereptr;
3941 	int ehdrlen;
3942 	in6_addr_t rv;
3943 
3944 	whereptr = (uint8_t *)ip6h;
3945 	ehdrlen = sizeof (ip6_t);
3946 
3947 	/* We assume at least the IPv6 base header is within one mblk. */
3948 	ASSERT(mp == NULL ||
3949 	    (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3950 
3951 	rv = ip6h->ip6_dst;
3952 	nexthdr = ip6h->ip6_nxt;
3953 	if (is_fragment != NULL)
3954 		*is_fragment = B_FALSE;
3955 
3956 	/*
3957 	 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3958 	 * no extension headers will be split across mblks.
3959 	 */
3960 
3961 	while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3962 	    nexthdr == IPPROTO_ROUTING) {
3963 		if (nexthdr == IPPROTO_ROUTING)
3964 			rv = pluck_out_dst(current_mp, whereptr, rv);
3965 
3966 		/*
3967 		 * All IPv6 extension headers have the next-header in byte
3968 		 * 0, and the (length - 8) in 8-byte-words.
3969 		 */
3970 		while (current_mp != NULL &&
3971 		    whereptr + ehdrlen >= current_mp->b_wptr) {
3972 			ehdrlen -= (current_mp->b_wptr - whereptr);
3973 			current_mp = current_mp->b_cont;
3974 			if (current_mp == NULL) {
3975 				/* Bad packet.  Return what we can. */
3976 				DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3977 				    mp, mblk_t *, current_mp, ip6_t *, ip6h);
3978 				goto done;
3979 			}
3980 			whereptr = current_mp->b_rptr;
3981 		}
3982 		whereptr += ehdrlen;
3983 
3984 		nexthdr = *whereptr;
3985 		ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3986 		ehdrlen = (*(whereptr + 1) + 1) * 8;
3987 	}
3988 
3989 done:
3990 	if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3991 		*is_fragment = B_TRUE;
3992 	return (rv);
3993 }
3994 
3995 /*
3996  * ip_source_routed_v6:
3997  * This function is called by redirect code (called from ip_input_v6) to
3998  * know whether this packet is source routed through this node i.e
3999  * whether this node (router) is part of the journey. This
4000  * function is called under two cases :
4001  *
4002  * case 1 : Routing header was processed by this node and
4003  *	    ip_process_rthdr replaced ip6_dst with the next hop
4004  *	    and we are forwarding the packet to the next hop.
4005  *
4006  * case 2 : Routing header was not processed by this node and we
4007  *	    are just forwarding the packet.
4008  *
4009  * For case (1) we don't want to send redirects. For case(2) we
4010  * want to send redirects.
4011  */
4012 static boolean_t
4013 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4014 {
4015 	uint8_t		nexthdr;
4016 	in6_addr_t	*addrptr;
4017 	ip6_rthdr0_t	*rthdr;
4018 	uint8_t		numaddr;
4019 	ip6_hbh_t	*hbhhdr;
4020 	uint_t		ehdrlen;
4021 	uint8_t		*byteptr;
4022 
4023 	ip2dbg(("ip_source_routed_v6\n"));
4024 	nexthdr = ip6h->ip6_nxt;
4025 	ehdrlen = IPV6_HDR_LEN;
4026 
4027 	/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4028 	while (nexthdr == IPPROTO_HOPOPTS ||
4029 	    nexthdr == IPPROTO_DSTOPTS) {
4030 		byteptr = (uint8_t *)ip6h + ehdrlen;
4031 		/*
4032 		 * Check if we have already processed
4033 		 * packets or we are just a forwarding
4034 		 * router which only pulled up msgs up
4035 		 * to IPV6HDR and  one HBH ext header
4036 		 */
4037 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4038 			ip2dbg(("ip_source_routed_v6: Extension"
4039 			    " headers not processed\n"));
4040 			return (B_FALSE);
4041 		}
4042 		hbhhdr = (ip6_hbh_t *)byteptr;
4043 		nexthdr = hbhhdr->ip6h_nxt;
4044 		ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4045 	}
4046 	switch (nexthdr) {
4047 	case IPPROTO_ROUTING:
4048 		byteptr = (uint8_t *)ip6h + ehdrlen;
4049 		/*
4050 		 * If for some reason, we haven't pulled up
4051 		 * the routing hdr data mblk, then we must
4052 		 * not have processed it at all. So for sure
4053 		 * we are not part of the source routed journey.
4054 		 */
4055 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4056 			ip2dbg(("ip_source_routed_v6: Routing"
4057 			    " header not processed\n"));
4058 			return (B_FALSE);
4059 		}
4060 		rthdr = (ip6_rthdr0_t *)byteptr;
4061 		/*
4062 		 * Either we are an intermediate router or the
4063 		 * last hop before destination and we have
4064 		 * already processed the routing header.
4065 		 * If segment_left is greater than or equal to zero,
4066 		 * then we must be the (numaddr - segleft) entry
4067 		 * of the routing header. Although ip6r0_segleft
4068 		 * is a unit8_t variable, we still check for zero
4069 		 * or greater value, if in case the data type
4070 		 * is changed someday in future.
4071 		 */
4072 		if (rthdr->ip6r0_segleft > 0 ||
4073 		    rthdr->ip6r0_segleft == 0) {
4074 			numaddr = rthdr->ip6r0_len / 2;
4075 			addrptr = (in6_addr_t *)((char *)rthdr +
4076 			    sizeof (*rthdr));
4077 			addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4078 			if (addrptr != NULL) {
4079 				if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4080 					return (B_TRUE);
4081 				ip1dbg(("ip_source_routed_v6: Not local\n"));
4082 			}
4083 		}
4084 	/* FALLTHRU */
4085 	default:
4086 		ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4087 		return (B_FALSE);
4088 	}
4089 }
4090 
4091 /*
4092  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4093  * We have not optimized this in terms of number of mblks
4094  * allocated. For instance, for each fragment sent we always allocate a
4095  * mblk to hold the IPv6 header and fragment header.
4096  *
4097  * Assumes that all the extension headers are contained in the first mblk
4098  * and that the fragment header has has already been added by calling
4099  * ip_fraghdr_add_v6.
4100  */
4101 int
4102 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4103     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4104     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4105 {
4106 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4107 	ip6_t		*fip6h;
4108 	mblk_t		*hmp;
4109 	mblk_t		*hmp0;
4110 	mblk_t		*dmp;
4111 	ip6_frag_t	*fraghdr;
4112 	size_t		unfragmentable_len;
4113 	size_t		mlen;
4114 	size_t		max_chunk;
4115 	uint16_t	off_flags;
4116 	uint16_t	offset = 0;
4117 	ill_t		*ill = nce->nce_ill;
4118 	uint8_t		nexthdr;
4119 	uint8_t		*ptr;
4120 	ip_stack_t	*ipst = ill->ill_ipst;
4121 	uint_t		priority = mp->b_band;
4122 	int		error = 0;
4123 
4124 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4125 	if (max_frag == 0) {
4126 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4127 		ip_drop_output("FragFails: zero max_frag", mp, ill);
4128 		freemsg(mp);
4129 		return (EINVAL);
4130 	}
4131 
4132 	/*
4133 	 * Caller should have added fraghdr_t to pkt_len, and also
4134 	 * updated ip6_plen.
4135 	 */
4136 	ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4137 	ASSERT(msgdsize(mp) == pkt_len);
4138 
4139 	/*
4140 	 * Determine the length of the unfragmentable portion of this
4141 	 * datagram.  This consists of the IPv6 header, a potential
4142 	 * hop-by-hop options header, a potential pre-routing-header
4143 	 * destination options header, and a potential routing header.
4144 	 */
4145 	nexthdr = ip6h->ip6_nxt;
4146 	ptr = (uint8_t *)&ip6h[1];
4147 
4148 	if (nexthdr == IPPROTO_HOPOPTS) {
4149 		ip6_hbh_t	*hbh_hdr;
4150 		uint_t		hdr_len;
4151 
4152 		hbh_hdr = (ip6_hbh_t *)ptr;
4153 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4154 		nexthdr = hbh_hdr->ip6h_nxt;
4155 		ptr += hdr_len;
4156 	}
4157 	if (nexthdr == IPPROTO_DSTOPTS) {
4158 		ip6_dest_t	*dest_hdr;
4159 		uint_t		hdr_len;
4160 
4161 		dest_hdr = (ip6_dest_t *)ptr;
4162 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4163 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4164 			nexthdr = dest_hdr->ip6d_nxt;
4165 			ptr += hdr_len;
4166 		}
4167 	}
4168 	if (nexthdr == IPPROTO_ROUTING) {
4169 		ip6_rthdr_t	*rthdr;
4170 		uint_t		hdr_len;
4171 
4172 		rthdr = (ip6_rthdr_t *)ptr;
4173 		nexthdr = rthdr->ip6r_nxt;
4174 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4175 		ptr += hdr_len;
4176 	}
4177 	if (nexthdr != IPPROTO_FRAGMENT) {
4178 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4179 		ip_drop_output("FragFails: bad nexthdr", mp, ill);
4180 		freemsg(mp);
4181 		return (EINVAL);
4182 	}
4183 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4184 	unfragmentable_len += sizeof (ip6_frag_t);
4185 
4186 	max_chunk = (max_frag - unfragmentable_len) & ~7;
4187 
4188 	/*
4189 	 * Allocate an mblk with enough room for the link-layer
4190 	 * header and the unfragmentable part of the datagram, which includes
4191 	 * the fragment header.  This (or a copy) will be used as the
4192 	 * first mblk for each fragment we send.
4193 	 */
4194 	hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4195 	if (hmp == NULL) {
4196 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4197 		ip_drop_output("FragFails: no hmp", mp, ill);
4198 		freemsg(mp);
4199 		return (ENOBUFS);
4200 	}
4201 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4202 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4203 
4204 	fip6h = (ip6_t *)hmp->b_rptr;
4205 	bcopy(ip6h, fip6h, unfragmentable_len);
4206 
4207 	/*
4208 	 * pkt_len is set to the total length of the fragmentable data in this
4209 	 * datagram.  For each fragment sent, we will decrement pkt_len
4210 	 * by the amount of fragmentable data sent in that fragment
4211 	 * until len reaches zero.
4212 	 */
4213 	pkt_len -= unfragmentable_len;
4214 
4215 	/*
4216 	 * Move read ptr past unfragmentable portion, we don't want this part
4217 	 * of the data in our fragments.
4218 	 */
4219 	mp->b_rptr += unfragmentable_len;
4220 	if (mp->b_rptr == mp->b_wptr) {
4221 		mblk_t *mp1 = mp->b_cont;
4222 		freeb(mp);
4223 		mp = mp1;
4224 	}
4225 
4226 	while (pkt_len != 0) {
4227 		mlen = MIN(pkt_len, max_chunk);
4228 		pkt_len -= mlen;
4229 		if (pkt_len != 0) {
4230 			/* Not last */
4231 			hmp0 = copyb(hmp);
4232 			if (hmp0 == NULL) {
4233 				BUMP_MIB(ill->ill_ip_mib,
4234 				    ipIfStatsOutFragFails);
4235 				ip_drop_output("FragFails: copyb failed",
4236 				    mp, ill);
4237 				freeb(hmp);
4238 				freemsg(mp);
4239 				ip1dbg(("ip_fragment_v6: copyb failed\n"));
4240 				return (ENOBUFS);
4241 			}
4242 			off_flags = IP6F_MORE_FRAG;
4243 		} else {
4244 			/* Last fragment */
4245 			hmp0 = hmp;
4246 			hmp = NULL;
4247 			off_flags = 0;
4248 		}
4249 		fip6h = (ip6_t *)(hmp0->b_rptr);
4250 		fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4251 		    sizeof (ip6_frag_t));
4252 
4253 		fip6h->ip6_plen = htons((uint16_t)(mlen +
4254 		    unfragmentable_len - IPV6_HDR_LEN));
4255 		/*
4256 		 * Note: Optimization alert.
4257 		 * In IPv6 (and IPv4) protocol header, Fragment Offset
4258 		 * ("offset") is 13 bits wide and in 8-octet units.
4259 		 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4260 		 * it occupies the most significant 13 bits.
4261 		 * (least significant 13 bits in IPv4).
4262 		 * We do not do any shifts here. Not shifting is same effect
4263 		 * as taking offset value in octet units, dividing by 8 and
4264 		 * then shifting 3 bits left to line it up in place in proper
4265 		 * place protocol header.
4266 		 */
4267 		fraghdr->ip6f_offlg = htons(offset) | off_flags;
4268 
4269 		if (!(dmp = ip_carve_mp(&mp, mlen))) {
4270 			/* mp has already been freed by ip_carve_mp() */
4271 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4272 			ip_drop_output("FragFails: could not carve mp",
4273 			    hmp0, ill);
4274 			if (hmp != NULL)
4275 				freeb(hmp);
4276 			freeb(hmp0);
4277 			ip1dbg(("ip_carve_mp: failed\n"));
4278 			return (ENOBUFS);
4279 		}
4280 		hmp0->b_cont = dmp;
4281 		/* Get the priority marking, if any */
4282 		hmp0->b_band = priority;
4283 
4284 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4285 
4286 		error = postfragfn(hmp0, nce, ixaflags,
4287 		    mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4288 		    ixa_cookie);
4289 		if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4290 			/* No point in sending the other fragments */
4291 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4292 			ip_drop_output("FragFails: postfragfn failed",
4293 			    hmp, ill);
4294 			freeb(hmp);
4295 			freemsg(mp);
4296 			return (error);
4297 		}
4298 		/* No need to redo state machine in loop */
4299 		ixaflags &= ~IXAF_REACH_CONF;
4300 
4301 		offset += mlen;
4302 	}
4303 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4304 	return (error);
4305 }
4306 
4307 /*
4308  * Add a fragment header to an IPv6 packet.
4309  * Assumes that all the extension headers are contained in the first mblk.
4310  *
4311  * The fragment header is inserted after an hop-by-hop options header
4312  * and after [an optional destinations header followed by] a routing header.
4313  */
4314 mblk_t *
4315 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4316 {
4317 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4318 	ip6_t		*fip6h;
4319 	mblk_t		*hmp;
4320 	ip6_frag_t	*fraghdr;
4321 	size_t		unfragmentable_len;
4322 	uint8_t		nexthdr;
4323 	uint_t		prev_nexthdr_offset;
4324 	uint8_t		*ptr;
4325 	uint_t		priority = mp->b_band;
4326 	ip_stack_t	*ipst = ixa->ixa_ipst;
4327 
4328 	/*
4329 	 * Determine the length of the unfragmentable portion of this
4330 	 * datagram.  This consists of the IPv6 header, a potential
4331 	 * hop-by-hop options header, a potential pre-routing-header
4332 	 * destination options header, and a potential routing header.
4333 	 */
4334 	nexthdr = ip6h->ip6_nxt;
4335 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4336 	ptr = (uint8_t *)&ip6h[1];
4337 
4338 	if (nexthdr == IPPROTO_HOPOPTS) {
4339 		ip6_hbh_t	*hbh_hdr;
4340 		uint_t		hdr_len;
4341 
4342 		hbh_hdr = (ip6_hbh_t *)ptr;
4343 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4344 		nexthdr = hbh_hdr->ip6h_nxt;
4345 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4346 		    - (uint8_t *)ip6h;
4347 		ptr += hdr_len;
4348 	}
4349 	if (nexthdr == IPPROTO_DSTOPTS) {
4350 		ip6_dest_t	*dest_hdr;
4351 		uint_t		hdr_len;
4352 
4353 		dest_hdr = (ip6_dest_t *)ptr;
4354 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4355 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4356 			nexthdr = dest_hdr->ip6d_nxt;
4357 			prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4358 			    - (uint8_t *)ip6h;
4359 			ptr += hdr_len;
4360 		}
4361 	}
4362 	if (nexthdr == IPPROTO_ROUTING) {
4363 		ip6_rthdr_t	*rthdr;
4364 		uint_t		hdr_len;
4365 
4366 		rthdr = (ip6_rthdr_t *)ptr;
4367 		nexthdr = rthdr->ip6r_nxt;
4368 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4369 		    - (uint8_t *)ip6h;
4370 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4371 		ptr += hdr_len;
4372 	}
4373 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4374 
4375 	/*
4376 	 * Allocate an mblk with enough room for the link-layer
4377 	 * header, the unfragmentable part of the datagram, and the
4378 	 * fragment header.
4379 	 */
4380 	hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4381 	    ipst->ips_ip_wroff_extra, mp);
4382 	if (hmp == NULL) {
4383 		ill_t *ill = ixa->ixa_nce->nce_ill;
4384 
4385 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4386 		ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4387 		freemsg(mp);
4388 		return (NULL);
4389 	}
4390 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4391 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4392 
4393 	fip6h = (ip6_t *)hmp->b_rptr;
4394 	fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4395 
4396 	bcopy(ip6h, fip6h, unfragmentable_len);
4397 	fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4398 	hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4399 
4400 	fraghdr->ip6f_nxt = nexthdr;
4401 	fraghdr->ip6f_reserved = 0;
4402 	fraghdr->ip6f_offlg = 0;
4403 	fraghdr->ip6f_ident = htonl(ident);
4404 
4405 	/* Get the priority marking, if any */
4406 	hmp->b_band = priority;
4407 
4408 	/*
4409 	 * Move read ptr past unfragmentable portion, we don't want this part
4410 	 * of the data in our fragments.
4411 	 */
4412 	mp->b_rptr += unfragmentable_len;
4413 	hmp->b_cont = mp;
4414 	return (hmp);
4415 }
4416 
4417 /*
4418  * Determine if the ill and multicast aspects of that packets
4419  * "matches" the conn.
4420  */
4421 boolean_t
4422 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4423 {
4424 	ill_t		*ill = ira->ira_rill;
4425 	zoneid_t	zoneid = ira->ira_zoneid;
4426 	uint_t		in_ifindex;
4427 	in6_addr_t	*v6dst_ptr = &ip6h->ip6_dst;
4428 	in6_addr_t	*v6src_ptr = &ip6h->ip6_src;
4429 
4430 	/*
4431 	 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4432 	 * scopeid. This is used to limit
4433 	 * unicast and multicast reception to conn_incoming_ifindex.
4434 	 * conn_wantpacket_v6 is called both for unicast and
4435 	 * multicast packets.
4436 	 */
4437 	in_ifindex = connp->conn_incoming_ifindex;
4438 
4439 	/* mpathd can bind to the under IPMP interface, which we allow */
4440 	if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4441 		if (!IS_UNDER_IPMP(ill))
4442 			return (B_FALSE);
4443 
4444 		if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4445 			return (B_FALSE);
4446 	}
4447 
4448 	if (!IPCL_ZONE_MATCH(connp, zoneid))
4449 		return (B_FALSE);
4450 
4451 	if (!(ira->ira_flags & IRAF_MULTICAST))
4452 		return (B_TRUE);
4453 
4454 	if (connp->conn_multi_router)
4455 		return (B_TRUE);
4456 
4457 	if (ira->ira_protocol == IPPROTO_RSVP)
4458 		return (B_TRUE);
4459 
4460 	return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4461 	    ira->ira_ill));
4462 }
4463 
4464 /*
4465  * pr_addr_dbg function provides the needed buffer space to call
4466  * inet_ntop() function's 3rd argument. This function should be
4467  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4468  * stack buffer space in it's own stack frame. This function uses
4469  * a buffer from it's own stack and prints the information.
4470  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4471  *
4472  * Note:    This function can call inet_ntop() once.
4473  */
4474 void
4475 pr_addr_dbg(char *fmt1, int af, const void *addr)
4476 {
4477 	char	buf[INET6_ADDRSTRLEN];
4478 
4479 	if (fmt1 == NULL) {
4480 		ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4481 		return;
4482 	}
4483 
4484 	/*
4485 	 * This does not compare debug level and just prints
4486 	 * out. Thus it is the responsibility of the caller
4487 	 * to check the appropriate debug-level before calling
4488 	 * this function.
4489 	 */
4490 	if (ip_debug > 0) {
4491 		printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4492 	}
4493 
4494 
4495 }
4496 
4497 
4498 /*
4499  * Return the length in bytes of the IPv6 headers (base header
4500  * extension headers) that will be needed based on the
4501  * ip_pkt_t structure passed by the caller.
4502  *
4503  * The returned length does not include the length of the upper level
4504  * protocol (ULP) header.
4505  */
4506 int
4507 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4508 {
4509 	int len;
4510 
4511 	len = IPV6_HDR_LEN;
4512 
4513 	/*
4514 	 * If there's a security label here, then we ignore any hop-by-hop
4515 	 * options the user may try to set.
4516 	 */
4517 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4518 		uint_t hopoptslen;
4519 		/*
4520 		 * Note that ipp_label_len_v6 is just the option - not
4521 		 * the hopopts extension header. It also needs to be padded
4522 		 * to a multiple of 8 bytes.
4523 		 */
4524 		ASSERT(ipp->ipp_label_len_v6 != 0);
4525 		hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4526 		hopoptslen = (hopoptslen + 7)/8 * 8;
4527 		len += hopoptslen;
4528 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4529 		ASSERT(ipp->ipp_hopoptslen != 0);
4530 		len += ipp->ipp_hopoptslen;
4531 	}
4532 
4533 	/*
4534 	 * En-route destination options
4535 	 * Only do them if there's a routing header as well
4536 	 */
4537 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4538 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4539 		ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4540 		len += ipp->ipp_rthdrdstoptslen;
4541 	}
4542 	if (ipp->ipp_fields & IPPF_RTHDR) {
4543 		ASSERT(ipp->ipp_rthdrlen != 0);
4544 		len += ipp->ipp_rthdrlen;
4545 	}
4546 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4547 		ASSERT(ipp->ipp_dstoptslen != 0);
4548 		len += ipp->ipp_dstoptslen;
4549 	}
4550 	return (len);
4551 }
4552 
4553 /*
4554  * All-purpose routine to build a header chain of an IPv6 header
4555  * followed by any required extension headers and a proto header.
4556  *
4557  * The caller has to set the source and destination address as well as
4558  * ip6_plen. The caller has to massage any routing header and compensate
4559  * for the ULP pseudo-header checksum due to the source route.
4560  *
4561  * The extension headers will all be fully filled in.
4562  */
4563 void
4564 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4565     uint8_t protocol, uint32_t flowinfo)
4566 {
4567 	uint8_t *nxthdr_ptr;
4568 	uint8_t *cp;
4569 	ip6_t	*ip6h = (ip6_t *)buf;
4570 
4571 	/* Initialize IPv6 header */
4572 	ip6h->ip6_vcf =
4573 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4574 	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4575 
4576 	if (ipp->ipp_fields & IPPF_TCLASS) {
4577 		/* Overrides the class part of flowinfo */
4578 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4579 		    ipp->ipp_tclass);
4580 	}
4581 
4582 	if (ipp->ipp_fields & IPPF_HOPLIMIT)
4583 		ip6h->ip6_hops = ipp->ipp_hoplimit;
4584 	else
4585 		ip6h->ip6_hops = ipp->ipp_unicast_hops;
4586 
4587 	if ((ipp->ipp_fields & IPPF_ADDR) &&
4588 	    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4589 		ip6h->ip6_src = ipp->ipp_addr;
4590 
4591 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4592 	cp = (uint8_t *)&ip6h[1];
4593 	/*
4594 	 * Here's where we have to start stringing together
4595 	 * any extension headers in the right order:
4596 	 * Hop-by-hop, destination, routing, and final destination opts.
4597 	 */
4598 	/*
4599 	 * If there's a security label here, then we ignore any hop-by-hop
4600 	 * options the user may try to set.
4601 	 */
4602 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4603 		/*
4604 		 * Hop-by-hop options with the label.
4605 		 * Note that ipp_label_v6 is just the option - not
4606 		 * the hopopts extension header. It also needs to be padded
4607 		 * to a multiple of 8 bytes.
4608 		 */
4609 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4610 		uint_t hopoptslen;
4611 		uint_t padlen;
4612 
4613 		padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4614 		hopoptslen = (padlen + 7)/8 * 8;
4615 		padlen = hopoptslen - padlen;
4616 
4617 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4618 		nxthdr_ptr = &hbh->ip6h_nxt;
4619 		hbh->ip6h_len = hopoptslen/8 - 1;
4620 		cp += sizeof (ip6_hbh_t);
4621 		bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4622 		cp += ipp->ipp_label_len_v6;
4623 
4624 		ASSERT(padlen <= 7);
4625 		switch (padlen) {
4626 		case 0:
4627 			break;
4628 		case 1:
4629 			cp[0] = IP6OPT_PAD1;
4630 			break;
4631 		default:
4632 			cp[0] = IP6OPT_PADN;
4633 			cp[1] = padlen - 2;
4634 			bzero(&cp[2], padlen - 2);
4635 			break;
4636 		}
4637 		cp += padlen;
4638 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4639 		/* Hop-by-hop options */
4640 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4641 
4642 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4643 		nxthdr_ptr = &hbh->ip6h_nxt;
4644 
4645 		bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4646 		cp += ipp->ipp_hopoptslen;
4647 	}
4648 	/*
4649 	 * En-route destination options
4650 	 * Only do them if there's a routing header as well
4651 	 */
4652 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4653 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4654 		ip6_dest_t *dst = (ip6_dest_t *)cp;
4655 
4656 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4657 		nxthdr_ptr = &dst->ip6d_nxt;
4658 
4659 		bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4660 		cp += ipp->ipp_rthdrdstoptslen;
4661 	}
4662 	/*
4663 	 * Routing header next
4664 	 */
4665 	if (ipp->ipp_fields & IPPF_RTHDR) {
4666 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4667 
4668 		*nxthdr_ptr = IPPROTO_ROUTING;
4669 		nxthdr_ptr = &rt->ip6r_nxt;
4670 
4671 		bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4672 		cp += ipp->ipp_rthdrlen;
4673 	}
4674 	/*
4675 	 * Do ultimate destination options
4676 	 */
4677 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4678 		ip6_dest_t *dest = (ip6_dest_t *)cp;
4679 
4680 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4681 		nxthdr_ptr = &dest->ip6d_nxt;
4682 
4683 		bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4684 		cp += ipp->ipp_dstoptslen;
4685 	}
4686 	/*
4687 	 * Now set the last header pointer to the proto passed in
4688 	 */
4689 	*nxthdr_ptr = protocol;
4690 	ASSERT((int)(cp - buf) == buf_len);
4691 }
4692 
4693 /*
4694  * Return a pointer to the routing header extension header
4695  * in the IPv6 header(s) chain passed in.
4696  * If none found, return NULL
4697  * Assumes that all extension headers are in same mblk as the v6 header
4698  */
4699 ip6_rthdr_t *
4700 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4701 {
4702 	ip6_dest_t	*desthdr;
4703 	ip6_frag_t	*fraghdr;
4704 	uint_t		hdrlen;
4705 	uint8_t		nexthdr;
4706 	uint8_t		*ptr = (uint8_t *)&ip6h[1];
4707 
4708 	if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4709 		return ((ip6_rthdr_t *)ptr);
4710 
4711 	/*
4712 	 * The routing header will precede all extension headers
4713 	 * other than the hop-by-hop and destination options
4714 	 * extension headers, so if we see anything other than those,
4715 	 * we're done and didn't find it.
4716 	 * We could see a destination options header alone but no
4717 	 * routing header, in which case we'll return NULL as soon as
4718 	 * we see anything after that.
4719 	 * Hop-by-hop and destination option headers are identical,
4720 	 * so we can use either one we want as a template.
4721 	 */
4722 	nexthdr = ip6h->ip6_nxt;
4723 	while (ptr < endptr) {
4724 		/* Is there enough left for len + nexthdr? */
4725 		if (ptr + MIN_EHDR_LEN > endptr)
4726 			return (NULL);
4727 
4728 		switch (nexthdr) {
4729 		case IPPROTO_HOPOPTS:
4730 		case IPPROTO_DSTOPTS:
4731 			/* Assumes the headers are identical for hbh and dst */
4732 			desthdr = (ip6_dest_t *)ptr;
4733 			hdrlen = 8 * (desthdr->ip6d_len + 1);
4734 			nexthdr = desthdr->ip6d_nxt;
4735 			break;
4736 
4737 		case IPPROTO_ROUTING:
4738 			return ((ip6_rthdr_t *)ptr);
4739 
4740 		case IPPROTO_FRAGMENT:
4741 			fraghdr = (ip6_frag_t *)ptr;
4742 			hdrlen = sizeof (ip6_frag_t);
4743 			nexthdr = fraghdr->ip6f_nxt;
4744 			break;
4745 
4746 		default:
4747 			return (NULL);
4748 		}
4749 		ptr += hdrlen;
4750 	}
4751 	return (NULL);
4752 }
4753 
4754 /*
4755  * Called for source-routed packets originating on this node.
4756  * Manipulates the original routing header by moving every entry up
4757  * one slot, placing the first entry in the v6 header's v6_dst field,
4758  * and placing the ultimate destination in the routing header's last
4759  * slot.
4760  *
4761  * Returns the checksum diference between the ultimate destination
4762  * (last hop in the routing header when the packet is sent) and
4763  * the first hop (ip6_dst when the packet is sent)
4764  */
4765 /* ARGSUSED2 */
4766 uint32_t
4767 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4768 {
4769 	uint_t		numaddr;
4770 	uint_t		i;
4771 	in6_addr_t	*addrptr;
4772 	in6_addr_t	tmp;
4773 	ip6_rthdr0_t	*rthdr = (ip6_rthdr0_t *)rth;
4774 	uint32_t	cksm;
4775 	uint32_t	addrsum = 0;
4776 	uint16_t	*ptr;
4777 
4778 	/*
4779 	 * Perform any processing needed for source routing.
4780 	 * We know that all extension headers will be in the same mblk
4781 	 * as the IPv6 header.
4782 	 */
4783 
4784 	/*
4785 	 * If no segments left in header, or the header length field is zero,
4786 	 * don't move hop addresses around;
4787 	 * Checksum difference is zero.
4788 	 */
4789 	if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4790 		return (0);
4791 
4792 	ptr = (uint16_t *)&ip6h->ip6_dst;
4793 	cksm = 0;
4794 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4795 		cksm += ptr[i];
4796 	}
4797 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4798 
4799 	/*
4800 	 * Here's where the fun begins - we have to
4801 	 * move all addresses up one spot, take the
4802 	 * first hop and make it our first ip6_dst,
4803 	 * and place the ultimate destination in the
4804 	 * newly-opened last slot.
4805 	 */
4806 	addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4807 	numaddr = rthdr->ip6r0_len / 2;
4808 	tmp = *addrptr;
4809 	for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4810 		*addrptr = addrptr[1];
4811 	}
4812 	*addrptr = ip6h->ip6_dst;
4813 	ip6h->ip6_dst = tmp;
4814 
4815 	/*
4816 	 * From the checksummed ultimate destination subtract the checksummed
4817 	 * current ip6_dst (the first hop address). Return that number.
4818 	 * (In the v4 case, the second part of this is done in each routine
4819 	 *  that calls ip_massage_options(). We do it all in this one place
4820 	 *  for v6).
4821 	 */
4822 	ptr = (uint16_t *)&ip6h->ip6_dst;
4823 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4824 		addrsum += ptr[i];
4825 	}
4826 	cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4827 	if ((int)cksm < 0)
4828 		cksm--;
4829 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4830 
4831 	return (cksm);
4832 }
4833 
4834 void
4835 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4836 {
4837 	kstat_t *ksp;
4838 
4839 	ip6_stat_t template = {
4840 		{ "ip6_udp_fannorm", 	KSTAT_DATA_UINT64 },
4841 		{ "ip6_udp_fanmb", 	KSTAT_DATA_UINT64 },
4842 		{ "ip6_recv_pullup", 		KSTAT_DATA_UINT64 },
4843 		{ "ip6_db_ref",			KSTAT_DATA_UINT64 },
4844 		{ "ip6_notaligned",		KSTAT_DATA_UINT64 },
4845 		{ "ip6_multimblk",		KSTAT_DATA_UINT64 },
4846 		{ "ipsec_proto_ahesp",		KSTAT_DATA_UINT64 },
4847 		{ "ip6_out_sw_cksum",			KSTAT_DATA_UINT64 },
4848 		{ "ip6_out_sw_cksum_bytes",		KSTAT_DATA_UINT64 },
4849 		{ "ip6_in_sw_cksum",			KSTAT_DATA_UINT64 },
4850 		{ "ip6_tcp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4851 		{ "ip6_tcp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4852 		{ "ip6_tcp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4853 		{ "ip6_udp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4854 		{ "ip6_udp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4855 		{ "ip6_udp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4856 	};
4857 	ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4858 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4859 	    KSTAT_FLAG_VIRTUAL, stackid);
4860 
4861 	if (ksp == NULL)
4862 		return (NULL);
4863 
4864 	bcopy(&template, ip6_statisticsp, sizeof (template));
4865 	ksp->ks_data = (void *)ip6_statisticsp;
4866 	ksp->ks_private = (void *)(uintptr_t)stackid;
4867 
4868 	kstat_install(ksp);
4869 	return (ksp);
4870 }
4871 
4872 void
4873 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4874 {
4875 	if (ksp != NULL) {
4876 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4877 		kstat_delete_netstack(ksp, stackid);
4878 	}
4879 }
4880 
4881 /*
4882  * The following two functions set and get the value for the
4883  * IPV6_SRC_PREFERENCES socket option.
4884  */
4885 int
4886 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4887 {
4888 	/*
4889 	 * We only support preferences that are covered by
4890 	 * IPV6_PREFER_SRC_MASK.
4891 	 */
4892 	if (prefs & ~IPV6_PREFER_SRC_MASK)
4893 		return (EINVAL);
4894 
4895 	/*
4896 	 * Look for conflicting preferences or default preferences.  If
4897 	 * both bits of a related pair are clear, the application wants the
4898 	 * system's default value for that pair.  Both bits in a pair can't
4899 	 * be set.
4900 	 */
4901 	if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4902 		prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4903 	} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4904 	    IPV6_PREFER_SRC_MIPMASK) {
4905 		return (EINVAL);
4906 	}
4907 	if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4908 		prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4909 	} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4910 	    IPV6_PREFER_SRC_TMPMASK) {
4911 		return (EINVAL);
4912 	}
4913 	if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4914 		prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4915 	} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4916 	    IPV6_PREFER_SRC_CGAMASK) {
4917 		return (EINVAL);
4918 	}
4919 
4920 	ixa->ixa_src_preferences = prefs;
4921 	return (0);
4922 }
4923 
4924 size_t
4925 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4926 {
4927 	*val = ixa->ixa_src_preferences;
4928 	return (sizeof (ixa->ixa_src_preferences));
4929 }
4930 
4931 /*
4932  * Get the size of the IP options (including the IP headers size)
4933  * without including the AH header's size. If till_ah is B_FALSE,
4934  * and if AH header is present, dest options beyond AH header will
4935  * also be included in the returned size.
4936  */
4937 int
4938 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4939 {
4940 	ip6_t *ip6h;
4941 	uint8_t nexthdr;
4942 	uint8_t *whereptr;
4943 	ip6_hbh_t *hbhhdr;
4944 	ip6_dest_t *dsthdr;
4945 	ip6_rthdr_t *rthdr;
4946 	int ehdrlen;
4947 	int size;
4948 	ah_t *ah;
4949 
4950 	ip6h = (ip6_t *)mp->b_rptr;
4951 	size = IPV6_HDR_LEN;
4952 	nexthdr = ip6h->ip6_nxt;
4953 	whereptr = (uint8_t *)&ip6h[1];
4954 	for (;;) {
4955 		/* Assume IP has already stripped it */
4956 		ASSERT(nexthdr != IPPROTO_FRAGMENT);
4957 		switch (nexthdr) {
4958 		case IPPROTO_HOPOPTS:
4959 			hbhhdr = (ip6_hbh_t *)whereptr;
4960 			nexthdr = hbhhdr->ip6h_nxt;
4961 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4962 			break;
4963 		case IPPROTO_DSTOPTS:
4964 			dsthdr = (ip6_dest_t *)whereptr;
4965 			nexthdr = dsthdr->ip6d_nxt;
4966 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4967 			break;
4968 		case IPPROTO_ROUTING:
4969 			rthdr = (ip6_rthdr_t *)whereptr;
4970 			nexthdr = rthdr->ip6r_nxt;
4971 			ehdrlen = 8 * (rthdr->ip6r_len + 1);
4972 			break;
4973 		default :
4974 			if (till_ah) {
4975 				ASSERT(nexthdr == IPPROTO_AH);
4976 				return (size);
4977 			}
4978 			/*
4979 			 * If we don't have a AH header to traverse,
4980 			 * return now. This happens normally for
4981 			 * outbound datagrams where we have not inserted
4982 			 * the AH header.
4983 			 */
4984 			if (nexthdr != IPPROTO_AH) {
4985 				return (size);
4986 			}
4987 
4988 			/*
4989 			 * We don't include the AH header's size
4990 			 * to be symmetrical with other cases where
4991 			 * we either don't have a AH header (outbound)
4992 			 * or peek into the AH header yet (inbound and
4993 			 * not pulled up yet).
4994 			 */
4995 			ah = (ah_t *)whereptr;
4996 			nexthdr = ah->ah_nexthdr;
4997 			ehdrlen = (ah->ah_length << 2) + 8;
4998 
4999 			if (nexthdr == IPPROTO_DSTOPTS) {
5000 				if (whereptr + ehdrlen >= mp->b_wptr) {
5001 					/*
5002 					 * The destination options header
5003 					 * is not part of the first mblk.
5004 					 */
5005 					whereptr = mp->b_cont->b_rptr;
5006 				} else {
5007 					whereptr += ehdrlen;
5008 				}
5009 
5010 				dsthdr = (ip6_dest_t *)whereptr;
5011 				ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5012 				size += ehdrlen;
5013 			}
5014 			return (size);
5015 		}
5016 		whereptr += ehdrlen;
5017 		size += ehdrlen;
5018 	}
5019 }
5020 
5021 /*
5022  * Utility routine that checks if `v6srcp' is a valid address on underlying
5023  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5024  * associated with `v6srcp' on success.  NOTE: if this is not called from
5025  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5026  * group during or after this lookup.
5027  */
5028 boolean_t
5029 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5030 {
5031 	ipif_t *ipif;
5032 
5033 
5034 	ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5035 	if (ipif != NULL) {
5036 		if (ipifp != NULL)
5037 			*ipifp = ipif;
5038 		else
5039 			ipif_refrele(ipif);
5040 		return (B_TRUE);
5041 	}
5042 
5043 	if (ip_debug > 2) {
5044 		pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5045 		    "src %s\n", AF_INET6, v6srcp);
5046 	}
5047 	return (B_FALSE);
5048 }
5049