xref: /titanic_50/usr/src/uts/common/inet/ip/ip6.c (revision 310da939904e5e37a19c8a427209c0d4d9683ec8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/sysmacros.h>
34 #include <sys/strsun.h>
35 #include <sys/strlog.h>
36 #include <sys/strsubr.h>
37 #define	_SUN_TPI_VERSION	2
38 #include <sys/tihdr.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/sdt.h>
44 #include <sys/kobj.h>
45 #include <sys/zone.h>
46 #include <sys/neti.h>
47 #include <sys/hook.h>
48 
49 #include <sys/kmem.h>
50 #include <sys/systm.h>
51 #include <sys/param.h>
52 #include <sys/socket.h>
53 #include <sys/vtrace.h>
54 #include <sys/isa_defs.h>
55 #include <sys/atomic.h>
56 #include <sys/policy.h>
57 #include <sys/mac.h>
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/route.h>
61 #include <net/if_dl.h>
62 #include <sys/sockio.h>
63 #include <netinet/in.h>
64 #include <netinet/ip6.h>
65 #include <netinet/icmp6.h>
66 #include <netinet/sctp.h>
67 
68 #include <inet/common.h>
69 #include <inet/mi.h>
70 #include <inet/optcom.h>
71 #include <inet/mib2.h>
72 #include <inet/nd.h>
73 #include <inet/arp.h>
74 
75 #include <inet/ip.h>
76 #include <inet/ip_impl.h>
77 #include <inet/ip6.h>
78 #include <inet/ip6_asp.h>
79 #include <inet/tcp.h>
80 #include <inet/tcp_impl.h>
81 #include <inet/udp_impl.h>
82 #include <inet/ipp_common.h>
83 
84 #include <inet/ip_multi.h>
85 #include <inet/ip_if.h>
86 #include <inet/ip_ire.h>
87 #include <inet/ip_rts.h>
88 #include <inet/ip_ndp.h>
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/iptun/iptun_impl.h>
93 #include <inet/sctp_ip.h>
94 #include <sys/pattr.h>
95 #include <inet/ipclassifier.h>
96 #include <inet/ipsecah.h>
97 #include <inet/rawip_impl.h>
98 #include <inet/rts_impl.h>
99 #include <sys/squeue_impl.h>
100 #include <sys/squeue.h>
101 
102 #include <sys/tsol/label.h>
103 #include <sys/tsol/tnet.h>
104 
105 /* Temporary; for CR 6451644 work-around */
106 #include <sys/ethernet.h>
107 
108 /*
109  * Naming conventions:
110  *      These rules should be judiciously applied
111  *	if there is a need to identify something as IPv6 versus IPv4
112  *	IPv6 funcions will end with _v6 in the ip module.
113  *	IPv6 funcions will end with _ipv6 in the transport modules.
114  *	IPv6 macros:
115  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
116  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
117  *		And then there are ..V4_PART_OF_V6.
118  *		The intent is that macros in the ip module end with _V6.
119  *	IPv6 global variables will start with ipv6_
120  *	IPv6 structures will start with ipv6
121  *	IPv6 defined constants should start with IPV6_
122  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
123  */
124 
125 /*
126  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
127  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
128  * from IANA. This mechanism will remain in effect until an official
129  * number is obtained.
130  */
131 uchar_t ip6opt_ls;
132 
133 const in6_addr_t ipv6_all_ones =
134 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
135 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
136 
137 #ifdef	_BIG_ENDIAN
138 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
139 #else	/* _BIG_ENDIAN */
140 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
141 #endif	/* _BIG_ENDIAN */
142 
143 #ifdef	_BIG_ENDIAN
144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
145 #else  /* _BIG_ENDIAN */
146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
147 #endif /* _BIG_ENDIAN */
148 
149 #ifdef _BIG_ENDIAN
150 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
151 #else  /* _BIG_ENDIAN */
152 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
153 #endif /* _BIG_ENDIAN */
154 
155 #ifdef _BIG_ENDIAN
156 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
157 #else  /* _BIG_ENDIAN */
158 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
159 #endif /* _BIG_ENDIAN */
160 
161 #ifdef _BIG_ENDIAN
162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
163 #else  /* _BIG_ENDIAN */
164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
165 #endif /* _BIG_ENDIAN */
166 
167 #ifdef _BIG_ENDIAN
168 const in6_addr_t ipv6_solicited_node_mcast =
169 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
170 #else  /* _BIG_ENDIAN */
171 const in6_addr_t ipv6_solicited_node_mcast =
172 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
173 #endif /* _BIG_ENDIAN */
174 
175 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
176 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
177 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
178     ip_recv_attr_t *);
179 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
180     ip_recv_attr_t *);
181 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
182     in6_addr_t *, ip_recv_attr_t *);
183 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
184     ip_recv_attr_t *);
185 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
186 
187 /*
188  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
189  * If the ICMP message is consumed by IP, i.e., it should not be delivered
190  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
191  * Likewise, if the ICMP error is misformed (too short, etc), then it
192  * returns NULL. The caller uses this to determine whether or not to send
193  * to raw sockets.
194  *
195  * All error messages are passed to the matching transport stream.
196  *
197  * See comment for icmp_inbound_v4() on how IPsec is handled.
198  */
199 mblk_t *
200 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
201 {
202 	icmp6_t		*icmp6;
203 	ip6_t		*ip6h;		/* Outer header */
204 	int		ip_hdr_length;	/* Outer header length */
205 	boolean_t	interested;
206 	ill_t		*ill = ira->ira_ill;
207 	ip_stack_t	*ipst = ill->ill_ipst;
208 	mblk_t		*mp_ret = NULL;
209 
210 	ip6h = (ip6_t *)mp->b_rptr;
211 
212 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
213 
214 	/* Check for Martian packets  */
215 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
216 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
217 		ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
218 		freemsg(mp);
219 		return (NULL);
220 	}
221 
222 	/* Make sure ira_l2src is set for ndp_input */
223 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
224 		ip_setl2src(mp, ira, ira->ira_rill);
225 
226 	ip_hdr_length = ira->ira_ip_hdr_length;
227 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
228 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
229 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
230 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
231 			freemsg(mp);
232 			return (NULL);
233 		}
234 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
235 		if (ip6h == NULL) {
236 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
237 			freemsg(mp);
238 			return (NULL);
239 		}
240 	}
241 
242 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
243 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
244 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
245 	    icmp6->icmp6_code));
246 
247 	/*
248 	 * We will set "interested" to "true" if we should pass a copy to
249 	 * the transport i.e., if it is an error message.
250 	 */
251 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
252 
253 	switch (icmp6->icmp6_type) {
254 	case ICMP6_DST_UNREACH:
255 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
256 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
257 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
258 		break;
259 
260 	case ICMP6_TIME_EXCEEDED:
261 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
262 		break;
263 
264 	case ICMP6_PARAM_PROB:
265 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
266 		break;
267 
268 	case ICMP6_PACKET_TOO_BIG:
269 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
270 		break;
271 
272 	case ICMP6_ECHO_REQUEST:
273 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
274 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
275 		    !ipst->ips_ipv6_resp_echo_mcast)
276 			break;
277 
278 		/*
279 		 * We must have exclusive use of the mblk to convert it to
280 		 * a response.
281 		 * If not, we copy it.
282 		 */
283 		if (mp->b_datap->db_ref > 1) {
284 			mblk_t	*mp1;
285 
286 			mp1 = copymsg(mp);
287 			if (mp1 == NULL) {
288 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
289 				ip_drop_input("ipIfStatsInDiscards - copymsg",
290 				    mp, ill);
291 				freemsg(mp);
292 				return (NULL);
293 			}
294 			freemsg(mp);
295 			mp = mp1;
296 			ip6h = (ip6_t *)mp->b_rptr;
297 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
298 		}
299 
300 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
301 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
302 		return (NULL);
303 
304 	case ICMP6_ECHO_REPLY:
305 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
306 		break;
307 
308 	case ND_ROUTER_SOLICIT:
309 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
310 		break;
311 
312 	case ND_ROUTER_ADVERT:
313 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
314 		break;
315 
316 	case ND_NEIGHBOR_SOLICIT:
317 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
318 		ndp_input(mp, ira);
319 		return (NULL);
320 
321 	case ND_NEIGHBOR_ADVERT:
322 		BUMP_MIB(ill->ill_icmp6_mib,
323 		    ipv6IfIcmpInNeighborAdvertisements);
324 		ndp_input(mp, ira);
325 		return (NULL);
326 
327 	case ND_REDIRECT:
328 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
329 
330 		if (ipst->ips_ipv6_ignore_redirect)
331 			break;
332 
333 		/* We now allow a RAW socket to receive this. */
334 		interested = B_TRUE;
335 		break;
336 
337 	/*
338 	 * The next three icmp messages will be handled by MLD.
339 	 * Pass all valid MLD packets up to any process(es)
340 	 * listening on a raw ICMP socket.
341 	 */
342 	case MLD_LISTENER_QUERY:
343 	case MLD_LISTENER_REPORT:
344 	case MLD_LISTENER_REDUCTION:
345 		mp = mld_input(mp, ira);
346 		return (mp);
347 	default:
348 		break;
349 	}
350 	/*
351 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
352 	 * if there isn't one.
353 	 */
354 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
355 		/* If there is an ICMP client and we want one too, copy it. */
356 
357 		if (!interested) {
358 			/* Caller will deliver to RAW sockets */
359 			return (mp);
360 		}
361 		mp_ret = copymsg(mp);
362 		if (mp_ret == NULL) {
363 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
364 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
365 		}
366 	} else if (!interested) {
367 		/* Neither we nor raw sockets are interested. Drop packet now */
368 		freemsg(mp);
369 		return (NULL);
370 	}
371 
372 	/*
373 	 * ICMP error or redirect packet. Make sure we have enough of
374 	 * the header and that db_ref == 1 since we might end up modifying
375 	 * the packet.
376 	 */
377 	if (mp->b_cont != NULL) {
378 		if (ip_pullup(mp, -1, ira) == NULL) {
379 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
380 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
381 			    mp, ill);
382 			freemsg(mp);
383 			return (mp_ret);
384 		}
385 	}
386 
387 	if (mp->b_datap->db_ref > 1) {
388 		mblk_t	*mp1;
389 
390 		mp1 = copymsg(mp);
391 		if (mp1 == NULL) {
392 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
393 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
394 			freemsg(mp);
395 			return (mp_ret);
396 		}
397 		freemsg(mp);
398 		mp = mp1;
399 	}
400 
401 	/*
402 	 * In case mp has changed, verify the message before any further
403 	 * processes.
404 	 */
405 	ip6h = (ip6_t *)mp->b_rptr;
406 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
407 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
408 		freemsg(mp);
409 		return (mp_ret);
410 	}
411 
412 	switch (icmp6->icmp6_type) {
413 	case ND_REDIRECT:
414 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
415 		break;
416 	case ICMP6_PACKET_TOO_BIG:
417 		/* Update DCE and adjust MTU is icmp header if needed */
418 		icmp_inbound_too_big_v6(icmp6, ira);
419 		/* FALLTHRU */
420 	default:
421 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
422 		break;
423 	}
424 
425 	return (mp_ret);
426 }
427 
428 /*
429  * Send an ICMP echo reply.
430  * The caller has already updated the payload part of the packet.
431  * We handle the ICMP checksum, IP source address selection and feed
432  * the packet into ip_output_simple.
433  */
434 static void
435 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
436     ip_recv_attr_t *ira)
437 {
438 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
439 	ill_t		*ill = ira->ira_ill;
440 	ip_stack_t	*ipst = ill->ill_ipst;
441 	ip_xmit_attr_t	ixas;
442 	in6_addr_t	origsrc;
443 
444 	/*
445 	 * Remove any extension headers (do not reverse a source route)
446 	 * and clear the flow id (keep traffic class for now).
447 	 */
448 	if (ip_hdr_length != IPV6_HDR_LEN) {
449 		int	i;
450 
451 		for (i = 0; i < IPV6_HDR_LEN; i++) {
452 			mp->b_rptr[ip_hdr_length - i - 1] =
453 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
454 		}
455 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
456 		ip6h = (ip6_t *)mp->b_rptr;
457 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
458 		i = ntohs(ip6h->ip6_plen);
459 		i -= (ip_hdr_length - IPV6_HDR_LEN);
460 		ip6h->ip6_plen = htons(i);
461 		ip_hdr_length = IPV6_HDR_LEN;
462 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
463 	}
464 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
465 
466 	/* Reverse the source and destination addresses. */
467 	origsrc = ip6h->ip6_src;
468 	ip6h->ip6_src = ip6h->ip6_dst;
469 	ip6h->ip6_dst = origsrc;
470 
471 	/* set the hop limit */
472 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
473 
474 	/*
475 	 * Prepare for checksum by putting icmp length in the icmp
476 	 * checksum field. The checksum is calculated in ip_output
477 	 */
478 	icmp6->icmp6_cksum = ip6h->ip6_plen;
479 
480 	bzero(&ixas, sizeof (ixas));
481 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
482 	ixas.ixa_zoneid = ira->ira_zoneid;
483 	ixas.ixa_cred = kcred;
484 	ixas.ixa_cpid = NOPID;
485 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
486 	ixas.ixa_ifindex = 0;
487 	ixas.ixa_ipst = ipst;
488 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
489 
490 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
491 		/*
492 		 * This packet should go out the same way as it
493 		 * came in i.e in clear, independent of the IPsec
494 		 * policy for transmitting packets.
495 		 */
496 		ixas.ixa_flags |= IXAF_NO_IPSEC;
497 	} else {
498 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
499 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
500 			/* Note: mp already consumed and ip_drop_packet done */
501 			return;
502 		}
503 	}
504 
505 	/* Was the destination (now source) link-local? Send out same group */
506 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
507 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
508 		if (IS_UNDER_IPMP(ill))
509 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
510 		else
511 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
512 	}
513 
514 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
515 		/*
516 		 * Not one or our addresses (IRE_LOCALs), thus we let
517 		 * ip_output_simple pick the source.
518 		 */
519 		ip6h->ip6_src = ipv6_all_zeros;
520 		ixas.ixa_flags |= IXAF_SET_SOURCE;
521 	}
522 
523 	/* Should we send using dce_pmtu? */
524 	if (ipst->ips_ipv6_icmp_return_pmtu)
525 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
526 
527 	(void) ip_output_simple(mp, &ixas);
528 	ixa_cleanup(&ixas);
529 
530 }
531 
532 /*
533  * Verify the ICMP messages for either for ICMP error or redirect packet.
534  * The caller should have fully pulled up the message. If it's a redirect
535  * packet, only basic checks on IP header will be done; otherwise, verify
536  * the packet by looking at the included ULP header.
537  *
538  * Called before icmp_inbound_error_fanout_v6 is called.
539  */
540 static boolean_t
541 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
542 {
543 	ill_t		*ill = ira->ira_ill;
544 	uint16_t	hdr_length;
545 	uint8_t		*nexthdrp;
546 	uint8_t		nexthdr;
547 	ip_stack_t	*ipst = ill->ill_ipst;
548 	conn_t		*connp;
549 	ip6_t		*ip6h;	/* Inner header */
550 
551 	ip6h = (ip6_t *)&icmp6[1];
552 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
553 		goto truncated;
554 
555 	if (icmp6->icmp6_type == ND_REDIRECT) {
556 		hdr_length = sizeof (nd_redirect_t);
557 	} else {
558 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
559 			goto discard_pkt;
560 		hdr_length = IPV6_HDR_LEN;
561 	}
562 
563 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
564 		goto truncated;
565 
566 	/*
567 	 * Stop here for ICMP_REDIRECT.
568 	 */
569 	if (icmp6->icmp6_type == ND_REDIRECT)
570 		return (B_TRUE);
571 
572 	/*
573 	 * ICMP errors only.
574 	 */
575 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
576 		goto discard_pkt;
577 	nexthdr = *nexthdrp;
578 
579 	/* Try to pass the ICMP message to clients who need it */
580 	switch (nexthdr) {
581 	case IPPROTO_UDP:
582 		/*
583 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
584 		 * transport header.
585 		 */
586 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
587 		    mp->b_wptr)
588 			goto truncated;
589 		break;
590 	case IPPROTO_TCP: {
591 		tcpha_t		*tcpha;
592 
593 		/*
594 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
595 		 * transport header.
596 		 */
597 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
598 		    mp->b_wptr)
599 			goto truncated;
600 
601 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
602 		/*
603 		 * With IPMP we need to match across group, which we do
604 		 * since we have the upper ill from ira_ill.
605 		 */
606 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
607 		    ill->ill_phyint->phyint_ifindex, ipst);
608 		if (connp == NULL)
609 			goto discard_pkt;
610 
611 		if ((connp->conn_verifyicmp != NULL) &&
612 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
613 			CONN_DEC_REF(connp);
614 			goto discard_pkt;
615 		}
616 		CONN_DEC_REF(connp);
617 		break;
618 	}
619 	case IPPROTO_SCTP:
620 		/*
621 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
622 		 * transport header.
623 		 */
624 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
625 		    mp->b_wptr)
626 			goto truncated;
627 		break;
628 	case IPPROTO_ESP:
629 	case IPPROTO_AH:
630 		break;
631 	case IPPROTO_ENCAP:
632 	case IPPROTO_IPV6: {
633 		/* Look for self-encapsulated packets that caused an error */
634 		ip6_t *in_ip6h;
635 
636 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
637 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
638 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
639 			goto truncated;
640 		break;
641 	}
642 	default:
643 		break;
644 	}
645 
646 	return (B_TRUE);
647 
648 discard_pkt:
649 	/* Bogus ICMP error. */
650 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
651 	return (B_FALSE);
652 
653 truncated:
654 	/* We pulled up everthing already. Must be truncated */
655 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
656 	return (B_FALSE);
657 }
658 
659 /*
660  * Process received IPv6 ICMP Packet too big.
661  * The caller is responsible for validating the packet before passing it in
662  * and also to fanout the ICMP error to any matching transport conns. Assumes
663  * the message has been fully pulled up.
664  *
665  * Before getting here, the caller has called icmp_inbound_verify_v6()
666  * that should have verified with ULP to prevent undoing the changes we're
667  * going to make to DCE. For example, TCP might have verified that the packet
668  * which generated error is in the send window.
669  *
670  * In some cases modified this MTU in the ICMP header packet; the caller
671  * should pass to the matching ULP after this returns.
672  */
673 static void
674 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
675 {
676 	uint32_t	mtu;
677 	dce_t		*dce;
678 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
679 	ip_stack_t	*ipst = ill->ill_ipst;
680 	int		old_max_frag;
681 	in6_addr_t	final_dst;
682 	ip6_t		*ip6h;	/* Inner IP header */
683 
684 	/* Caller has already pulled up everything. */
685 	ip6h = (ip6_t *)&icmp6[1];
686 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
687 
688 	/*
689 	 * For link local destinations matching simply on address is not
690 	 * sufficient. Same link local addresses for different ILL's is
691 	 * possible.
692 	 */
693 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
694 		dce = dce_lookup_and_add_v6(&final_dst,
695 		    ill->ill_phyint->phyint_ifindex, ipst);
696 	} else {
697 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
698 	}
699 	if (dce == NULL) {
700 		/* Couldn't add a unique one - ENOMEM */
701 		if (ip_debug > 2) {
702 			/* ip1dbg */
703 			pr_addr_dbg("icmp_inbound_too_big_v6:"
704 			    "no dce for dst %s\n", AF_INET6,
705 			    &final_dst);
706 		}
707 		return;
708 	}
709 
710 	mtu = ntohl(icmp6->icmp6_mtu);
711 
712 	mutex_enter(&dce->dce_lock);
713 	if (dce->dce_flags & DCEF_PMTU)
714 		old_max_frag = dce->dce_pmtu;
715 	else
716 		old_max_frag = ill->ill_mtu;
717 
718 	if (mtu < IPV6_MIN_MTU) {
719 		ip1dbg(("Received mtu less than IPv6 "
720 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
721 		mtu = IPV6_MIN_MTU;
722 		/*
723 		 * If an mtu less than IPv6 min mtu is received,
724 		 * we must include a fragment header in
725 		 * subsequent packets.
726 		 */
727 		dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
728 	} else {
729 		dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
730 	}
731 	ip1dbg(("Received mtu from router: %d\n", mtu));
732 	dce->dce_pmtu = MIN(old_max_frag, mtu);
733 
734 	/* Prepare to send the new max frag size for the ULP. */
735 	if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
736 		/*
737 		 * If we need a fragment header in every packet
738 		 * (above case or multirouting), make sure the
739 		 * ULP takes it into account when computing the
740 		 * payload size.
741 		 */
742 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
743 	} else {
744 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
745 	}
746 	/* We now have a PMTU for sure */
747 	dce->dce_flags |= DCEF_PMTU;
748 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
749 	mutex_exit(&dce->dce_lock);
750 	/*
751 	 * After dropping the lock the new value is visible to everyone.
752 	 * Then we bump the generation number so any cached values reinspect
753 	 * the dce_t.
754 	 */
755 	dce_increment_generation(dce);
756 	dce_refrele(dce);
757 }
758 
759 /*
760  * Fanout received ICMPv6 error packets to the transports.
761  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
762  *
763  * The caller must have called icmp_inbound_verify_v6.
764  */
765 void
766 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
767 {
768 	uint16_t	*up;	/* Pointer to ports in ULP header */
769 	uint32_t	ports;	/* reversed ports for fanout */
770 	ip6_t		rip6h;	/* With reversed addresses */
771 	ip6_t		*ip6h;	/* Inner IP header */
772 	uint16_t	hdr_length; /* Inner IP header length */
773 	uint8_t		*nexthdrp;
774 	uint8_t		nexthdr;
775 	tcpha_t		*tcpha;
776 	conn_t		*connp;
777 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
778 	ip_stack_t	*ipst = ill->ill_ipst;
779 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
780 
781 	/* Caller has already pulled up everything. */
782 	ip6h = (ip6_t *)&icmp6[1];
783 	ASSERT(mp->b_cont == NULL);
784 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
785 
786 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
787 		goto drop_pkt;
788 	nexthdr = *nexthdrp;
789 	ira->ira_protocol = nexthdr;
790 
791 	/*
792 	 * We need a separate IP header with the source and destination
793 	 * addresses reversed to do fanout/classification because the ip6h in
794 	 * the ICMPv6 error is in the form we sent it out.
795 	 */
796 	rip6h.ip6_src = ip6h->ip6_dst;
797 	rip6h.ip6_dst = ip6h->ip6_src;
798 	rip6h.ip6_nxt = nexthdr;
799 
800 	/* Try to pass the ICMP message to clients who need it */
801 	switch (nexthdr) {
802 	case IPPROTO_UDP: {
803 		/* Attempt to find a client stream based on port. */
804 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
805 
806 		/* Note that we send error to all matches. */
807 		ira->ira_flags |= IRAF_ICMP_ERROR;
808 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
809 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
810 		return;
811 	}
812 	case IPPROTO_TCP: {
813 		/*
814 		 * Attempt to find a client stream based on port.
815 		 * Note that we do a reverse lookup since the header is
816 		 * in the form we sent it out.
817 		 */
818 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
819 		/*
820 		 * With IPMP we need to match across group, which we do
821 		 * since we have the upper ill from ira_ill.
822 		 */
823 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
824 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
825 		if (connp == NULL) {
826 			goto drop_pkt;
827 		}
828 
829 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
830 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
831 			mp = ipsec_check_inbound_policy(mp, connp,
832 			    NULL, ip6h, ira);
833 			if (mp == NULL) {
834 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
835 				/* Note that mp is NULL */
836 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
837 				CONN_DEC_REF(connp);
838 				return;
839 			}
840 		}
841 
842 		ira->ira_flags |= IRAF_ICMP_ERROR;
843 		if (IPCL_IS_TCP(connp)) {
844 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
845 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
846 			    SQTAG_TCP6_INPUT_ICMP_ERR);
847 		} else {
848 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
849 			ill_t *rill = ira->ira_rill;
850 
851 			ira->ira_ill = ira->ira_rill = NULL;
852 			(connp->conn_recv)(connp, mp, NULL, ira);
853 			CONN_DEC_REF(connp);
854 			ira->ira_ill = ill;
855 			ira->ira_rill = rill;
856 		}
857 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
858 		return;
859 
860 	}
861 	case IPPROTO_SCTP:
862 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
863 		/* Find a SCTP client stream for this packet. */
864 		((uint16_t *)&ports)[0] = up[1];
865 		((uint16_t *)&ports)[1] = up[0];
866 
867 		ira->ira_flags |= IRAF_ICMP_ERROR;
868 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
869 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
870 		return;
871 
872 	case IPPROTO_ESP:
873 	case IPPROTO_AH:
874 		if (!ipsec_loaded(ipss)) {
875 			ip_proto_not_sup(mp, ira);
876 			return;
877 		}
878 
879 		if (nexthdr == IPPROTO_ESP)
880 			mp = ipsecesp_icmp_error(mp, ira);
881 		else
882 			mp = ipsecah_icmp_error(mp, ira);
883 		if (mp == NULL)
884 			return;
885 
886 		/* Just in case ipsec didn't preserve the NULL b_cont */
887 		if (mp->b_cont != NULL) {
888 			if (!pullupmsg(mp, -1))
889 				goto drop_pkt;
890 		}
891 
892 		/*
893 		 * If succesful, the mp has been modified to not include
894 		 * the ESP/AH header so we can fanout to the ULP's icmp
895 		 * error handler.
896 		 */
897 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
898 			goto drop_pkt;
899 
900 		ip6h = (ip6_t *)mp->b_rptr;
901 		/* Don't call hdr_length_v6() unless you have to. */
902 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
903 			hdr_length = ip_hdr_length_v6(mp, ip6h);
904 		else
905 			hdr_length = IPV6_HDR_LEN;
906 
907 		/* Verify the modified message before any further processes. */
908 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
909 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
910 			freemsg(mp);
911 			return;
912 		}
913 
914 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
915 		return;
916 
917 	case IPPROTO_IPV6: {
918 		/* Look for self-encapsulated packets that caused an error */
919 		ip6_t *in_ip6h;
920 
921 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
922 
923 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
924 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
925 			/*
926 			 * Self-encapsulated case. As in the ipv4 case,
927 			 * we need to strip the 2nd IP header. Since mp
928 			 * is already pulled-up, we can simply bcopy
929 			 * the 3rd header + data over the 2nd header.
930 			 */
931 			uint16_t unused_len;
932 
933 			/*
934 			 * Make sure we don't do recursion more than once.
935 			 */
936 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
937 			    &unused_len, &nexthdrp) ||
938 			    *nexthdrp == IPPROTO_IPV6) {
939 				goto drop_pkt;
940 			}
941 
942 			/*
943 			 * Copy the 3rd header + remaining data on top
944 			 * of the 2nd header.
945 			 */
946 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
947 
948 			/*
949 			 * Subtract length of the 2nd header.
950 			 */
951 			mp->b_wptr -= hdr_length;
952 
953 			ip6h = (ip6_t *)mp->b_rptr;
954 			/* Don't call hdr_length_v6() unless you have to. */
955 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
956 				hdr_length = ip_hdr_length_v6(mp, ip6h);
957 			else
958 				hdr_length = IPV6_HDR_LEN;
959 
960 			/*
961 			 * Verify the modified message before any further
962 			 * processes.
963 			 */
964 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
965 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
966 				freemsg(mp);
967 				return;
968 			}
969 
970 			/*
971 			 * Now recurse, and see what I _really_ should be
972 			 * doing here.
973 			 */
974 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
975 			return;
976 		}
977 		/* FALLTHRU */
978 	}
979 	case IPPROTO_ENCAP:
980 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
981 		    &rip6h.ip6_dst, ipst)) != NULL) {
982 			ira->ira_flags |= IRAF_ICMP_ERROR;
983 			connp->conn_recvicmp(connp, mp, NULL, ira);
984 			CONN_DEC_REF(connp);
985 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
986 			return;
987 		}
988 		/*
989 		 * No IP tunnel is interested, fallthrough and see
990 		 * if a raw socket will want it.
991 		 */
992 		/* FALLTHRU */
993 	default:
994 		ira->ira_flags |= IRAF_ICMP_ERROR;
995 		ASSERT(ira->ira_protocol == nexthdr);
996 		ip_fanout_proto_v6(mp, &rip6h, ira);
997 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
998 		return;
999 	}
1000 	/* NOTREACHED */
1001 drop_pkt:
1002 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1003 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
1004 	freemsg(mp);
1005 }
1006 
1007 /*
1008  * Process received IPv6 ICMP Redirect messages.
1009  * Assumes the caller has verified that the headers are in the pulled up mblk.
1010  * Consumes mp.
1011  */
1012 /* ARGSUSED */
1013 static void
1014 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1015     ip_recv_attr_t *ira)
1016 {
1017 	ire_t		*ire, *nire;
1018 	ire_t		*prev_ire = NULL;
1019 	ire_t		*redir_ire;
1020 	in6_addr_t	*src, *dst, *gateway;
1021 	nd_opt_hdr_t	*opt;
1022 	nce_t		*nce;
1023 	int		ncec_flags = 0;
1024 	int		err = 0;
1025 	boolean_t	redirect_to_router = B_FALSE;
1026 	int		len;
1027 	int		optlen;
1028 	ill_t		*ill = ira->ira_rill;
1029 	ill_t		*rill = ira->ira_rill;
1030 	ip_stack_t	*ipst = ill->ill_ipst;
1031 
1032 	/*
1033 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1034 	 * and make it be the IPMP upper so avoid being confused by a packet
1035 	 * addressed to a unicast address on a different ill.
1036 	 */
1037 	if (IS_UNDER_IPMP(rill)) {
1038 		rill = ipmp_ill_hold_ipmp_ill(rill);
1039 		if (rill == NULL) {
1040 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1041 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1042 			    mp, ill);
1043 			freemsg(mp);
1044 			return;
1045 		}
1046 		ASSERT(rill != ira->ira_rill);
1047 	}
1048 
1049 	len = mp->b_wptr - (uchar_t *)rd;
1050 	src = &ip6h->ip6_src;
1051 	dst = &rd->nd_rd_dst;
1052 	gateway = &rd->nd_rd_target;
1053 
1054 	/* Verify if it is a valid redirect */
1055 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1056 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1057 	    (rd->nd_rd_code != 0) ||
1058 	    (len < sizeof (nd_redirect_t)) ||
1059 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
1060 	    (IN6_IS_ADDR_MULTICAST(dst))) {
1061 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1062 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1063 		goto fail_redirect;
1064 	}
1065 
1066 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1067 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1068 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1069 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1070 		    mp, ill);
1071 		goto fail_redirect;
1072 	}
1073 
1074 	optlen = len - sizeof (nd_redirect_t);
1075 	if (optlen != 0) {
1076 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1077 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1078 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1079 			    mp, ill);
1080 			goto fail_redirect;
1081 		}
1082 	}
1083 
1084 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1085 		redirect_to_router = B_TRUE;
1086 		ncec_flags |= NCE_F_ISROUTER;
1087 	} else {
1088 		gateway = dst;	/* Add nce for dst */
1089 	}
1090 
1091 
1092 	/*
1093 	 * Verify that the IP source address of the redirect is
1094 	 * the same as the current first-hop router for the specified
1095 	 * ICMP destination address.
1096 	 * Also, Make sure we had a route for the dest in question and
1097 	 * that route was pointing to the old gateway (the source of the
1098 	 * redirect packet.)
1099 	 * We do longest match and then compare ire_gateway_addr_v6 below.
1100 	 */
1101 	prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1102 	    ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1103 
1104 	/*
1105 	 * Check that
1106 	 *	the redirect was not from ourselves
1107 	 *	old gateway is still directly reachable
1108 	 */
1109 	if (prev_ire == NULL ||
1110 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1111 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1112 	    !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1113 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1114 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1115 		goto fail_redirect;
1116 	}
1117 
1118 	ASSERT(prev_ire->ire_ill != NULL);
1119 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1120 		ncec_flags |= NCE_F_NONUD;
1121 
1122 	opt = (nd_opt_hdr_t *)&rd[1];
1123 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1124 	if (opt != NULL) {
1125 		err = nce_lookup_then_add_v6(rill,
1126 		    (uchar_t *)&opt[1],		/* Link layer address */
1127 		    rill->ill_phys_addr_length,
1128 		    gateway, ncec_flags, ND_STALE, &nce);
1129 		switch (err) {
1130 		case 0:
1131 			nce_refrele(nce);
1132 			break;
1133 		case EEXIST:
1134 			/*
1135 			 * Check to see if link layer address has changed and
1136 			 * process the ncec_state accordingly.
1137 			 */
1138 			nce_process(nce->nce_common,
1139 			    (uchar_t *)&opt[1], 0, B_FALSE);
1140 			nce_refrele(nce);
1141 			break;
1142 		default:
1143 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1144 			    err));
1145 			goto fail_redirect;
1146 		}
1147 	}
1148 	if (redirect_to_router) {
1149 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1150 
1151 		/*
1152 		 * Create a Route Association.  This will allow us to remember
1153 		 * a router told us to use the particular gateway.
1154 		 */
1155 		ire = ire_create_v6(
1156 		    dst,
1157 		    &ipv6_all_ones,		/* mask */
1158 		    gateway,			/* gateway addr */
1159 		    IRE_HOST,
1160 		    prev_ire->ire_ill,
1161 		    ALL_ZONES,
1162 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1163 		    NULL,
1164 		    ipst);
1165 	} else {
1166 		ipif_t *ipif;
1167 		in6_addr_t gw;
1168 
1169 		/*
1170 		 * Just create an on link entry, i.e. interface route.
1171 		 * The gateway field is our link-local on the ill.
1172 		 */
1173 		mutex_enter(&rill->ill_lock);
1174 		for (ipif = rill->ill_ipif; ipif != NULL;
1175 		    ipif = ipif->ipif_next) {
1176 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1177 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1178 				break;
1179 		}
1180 		if (ipif == NULL) {
1181 			/* We have no link-local address! */
1182 			mutex_exit(&rill->ill_lock);
1183 			goto fail_redirect;
1184 		}
1185 		gw = ipif->ipif_v6lcl_addr;
1186 		mutex_exit(&rill->ill_lock);
1187 
1188 		ire = ire_create_v6(
1189 		    dst,				/* gateway == dst */
1190 		    &ipv6_all_ones,			/* mask */
1191 		    &gw,				/* gateway addr */
1192 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
1193 		    prev_ire->ire_ill,
1194 		    ALL_ZONES,
1195 		    (RTF_DYNAMIC | RTF_HOST),
1196 		    NULL,
1197 		    ipst);
1198 	}
1199 
1200 	if (ire == NULL)
1201 		goto fail_redirect;
1202 
1203 	nire = ire_add(ire);
1204 	/* Check if it was a duplicate entry */
1205 	if (nire != NULL && nire != ire) {
1206 		ASSERT(nire->ire_identical_ref > 1);
1207 		ire_delete(nire);
1208 		ire_refrele(nire);
1209 		nire = NULL;
1210 	}
1211 	ire = nire;
1212 	if (ire != NULL) {
1213 		ire_refrele(ire);		/* Held in ire_add */
1214 
1215 		/* tell routing sockets that we received a redirect */
1216 		ip_rts_change_v6(RTM_REDIRECT,
1217 		    &rd->nd_rd_dst,
1218 		    &rd->nd_rd_target,
1219 		    &ipv6_all_ones, 0, src,
1220 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1221 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1222 
1223 		/*
1224 		 * Delete any existing IRE_HOST type ires for this destination.
1225 		 * This together with the added IRE has the effect of
1226 		 * modifying an existing redirect.
1227 		 */
1228 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1229 		    prev_ire->ire_ill, ALL_ZONES, NULL,
1230 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1231 		    NULL);
1232 
1233 		if (redir_ire != NULL) {
1234 			if (redir_ire->ire_flags & RTF_DYNAMIC)
1235 				ire_delete(redir_ire);
1236 			ire_refrele(redir_ire);
1237 		}
1238 	}
1239 
1240 	ire_refrele(prev_ire);
1241 	prev_ire = NULL;
1242 
1243 fail_redirect:
1244 	if (prev_ire != NULL)
1245 		ire_refrele(prev_ire);
1246 	freemsg(mp);
1247 	if (rill != ira->ira_rill)
1248 		ill_refrele(rill);
1249 }
1250 
1251 /*
1252  * Build and ship an IPv6 ICMP message using the packet data in mp,
1253  * and the ICMP header pointed to by "stuff".  (May be called as
1254  * writer.)
1255  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1256  * verify that an icmp error packet can be sent.
1257  *
1258  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1259  * source address (see above function).
1260  */
1261 static void
1262 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1263     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1264 {
1265 	ip6_t		*ip6h;
1266 	in6_addr_t	v6dst;
1267 	size_t		len_needed;
1268 	size_t		msg_len;
1269 	mblk_t		*mp1;
1270 	icmp6_t		*icmp6;
1271 	in6_addr_t	v6src;
1272 	ill_t		*ill = ira->ira_ill;
1273 	ip_stack_t	*ipst = ill->ill_ipst;
1274 	ip_xmit_attr_t	ixas;
1275 
1276 	ip6h = (ip6_t *)mp->b_rptr;
1277 
1278 	bzero(&ixas, sizeof (ixas));
1279 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1280 	ixas.ixa_zoneid = ira->ira_zoneid;
1281 	ixas.ixa_ifindex = 0;
1282 	ixas.ixa_ipst = ipst;
1283 	ixas.ixa_cred = kcred;
1284 	ixas.ixa_cpid = NOPID;
1285 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
1286 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1287 
1288 	/*
1289 	 * If the source of the original packet was link-local, then
1290 	 * make sure we send on the same ill (group) as we received it on.
1291 	 */
1292 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1293 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
1294 		if (IS_UNDER_IPMP(ill))
1295 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1296 		else
1297 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1298 	}
1299 
1300 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1301 		/*
1302 		 * Apply IPsec based on how IPsec was applied to
1303 		 * the packet that had the error.
1304 		 *
1305 		 * If it was an outbound packet that caused the ICMP
1306 		 * error, then the caller will have setup the IRA
1307 		 * appropriately.
1308 		 */
1309 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1310 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1311 			/* Note: mp already consumed and ip_drop_packet done */
1312 			return;
1313 		}
1314 	} else {
1315 		/*
1316 		 * This is in clear. The icmp message we are building
1317 		 * here should go out in clear, independent of our policy.
1318 		 */
1319 		ixas.ixa_flags |= IXAF_NO_IPSEC;
1320 	}
1321 
1322 	/*
1323 	 * If the caller specified the source we use that.
1324 	 * Otherwise, if the packet was for one of our unicast addresses, make
1325 	 * sure we respond with that as the source. Otherwise
1326 	 * have ip_output_simple pick the source address.
1327 	 */
1328 	if (v6src_ptr != NULL) {
1329 		v6src = *v6src_ptr;
1330 	} else {
1331 		ire_t *ire;
1332 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1333 
1334 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1335 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1336 			match_flags |= MATCH_IRE_ILL;
1337 
1338 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1339 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1340 		    match_flags, 0, ipst, NULL);
1341 		if (ire != NULL) {
1342 			v6src = ip6h->ip6_dst;
1343 			ire_refrele(ire);
1344 		} else {
1345 			v6src = ipv6_all_zeros;
1346 			ixas.ixa_flags |= IXAF_SET_SOURCE;
1347 		}
1348 	}
1349 	v6dst = ip6h->ip6_src;
1350 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1351 	msg_len = msgdsize(mp);
1352 	if (msg_len > len_needed) {
1353 		if (!adjmsg(mp, len_needed - msg_len)) {
1354 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1355 			freemsg(mp);
1356 			return;
1357 		}
1358 		msg_len = len_needed;
1359 	}
1360 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1361 	if (mp1 == NULL) {
1362 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1363 		freemsg(mp);
1364 		return;
1365 	}
1366 	mp1->b_cont = mp;
1367 	mp = mp1;
1368 
1369 	/*
1370 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1371 	 * node generates be accepted in peace by all on-host destinations.
1372 	 * If we do NOT assume that all on-host destinations trust
1373 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1374 	 * (Look for IXAF_TRUSTED_ICMP).
1375 	 */
1376 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1377 
1378 	ip6h = (ip6_t *)mp->b_rptr;
1379 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1380 
1381 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1382 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
1383 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1384 	ip6h->ip6_dst = v6dst;
1385 	ip6h->ip6_src = v6src;
1386 	msg_len += IPV6_HDR_LEN + len;
1387 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1388 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1389 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1390 	}
1391 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1392 	icmp6 = (icmp6_t *)&ip6h[1];
1393 	bcopy(stuff, (char *)icmp6, len);
1394 	/*
1395 	 * Prepare for checksum by putting icmp length in the icmp
1396 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
1397 	 */
1398 	icmp6->icmp6_cksum = ip6h->ip6_plen;
1399 	if (icmp6->icmp6_type == ND_REDIRECT) {
1400 		ip6h->ip6_hops = IPV6_MAX_HOPS;
1401 	}
1402 
1403 	(void) ip_output_simple(mp, &ixas);
1404 	ixa_cleanup(&ixas);
1405 }
1406 
1407 /*
1408  * Update the output mib when ICMPv6 packets are sent.
1409  */
1410 void
1411 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1412 {
1413 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1414 
1415 	switch (icmp6->icmp6_type) {
1416 	case ICMP6_DST_UNREACH:
1417 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1418 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1419 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1420 		break;
1421 
1422 	case ICMP6_TIME_EXCEEDED:
1423 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1424 		break;
1425 
1426 	case ICMP6_PARAM_PROB:
1427 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1428 		break;
1429 
1430 	case ICMP6_PACKET_TOO_BIG:
1431 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1432 		break;
1433 
1434 	case ICMP6_ECHO_REQUEST:
1435 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1436 		break;
1437 
1438 	case ICMP6_ECHO_REPLY:
1439 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1440 		break;
1441 
1442 	case ND_ROUTER_SOLICIT:
1443 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1444 		break;
1445 
1446 	case ND_ROUTER_ADVERT:
1447 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1448 		break;
1449 
1450 	case ND_NEIGHBOR_SOLICIT:
1451 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1452 		break;
1453 
1454 	case ND_NEIGHBOR_ADVERT:
1455 		BUMP_MIB(ill->ill_icmp6_mib,
1456 		    ipv6IfIcmpOutNeighborAdvertisements);
1457 		break;
1458 
1459 	case ND_REDIRECT:
1460 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1461 		break;
1462 
1463 	case MLD_LISTENER_QUERY:
1464 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1465 		break;
1466 
1467 	case MLD_LISTENER_REPORT:
1468 	case MLD_V2_LISTENER_REPORT:
1469 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1470 		break;
1471 
1472 	case MLD_LISTENER_REDUCTION:
1473 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1474 		break;
1475 	}
1476 }
1477 
1478 /*
1479  * Check if it is ok to send an ICMPv6 error packet in
1480  * response to the IP packet in mp.
1481  * Free the message and return null if no
1482  * ICMP error packet should be sent.
1483  */
1484 static mblk_t *
1485 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1486 {
1487 	ill_t		*ill = ira->ira_ill;
1488 	ip_stack_t	*ipst = ill->ill_ipst;
1489 	boolean_t	llbcast;
1490 	ip6_t		*ip6h;
1491 
1492 	if (!mp)
1493 		return (NULL);
1494 
1495 	/* We view multicast and broadcast as the same.. */
1496 	llbcast = (ira->ira_flags &
1497 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1498 	ip6h = (ip6_t *)mp->b_rptr;
1499 
1500 	/* Check if source address uniquely identifies the host */
1501 
1502 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1503 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1504 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1505 		freemsg(mp);
1506 		return (NULL);
1507 	}
1508 
1509 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1510 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1511 		icmp6_t		*icmp6;
1512 
1513 		if (mp->b_wptr - mp->b_rptr < len_needed) {
1514 			if (!pullupmsg(mp, len_needed)) {
1515 				BUMP_MIB(ill->ill_icmp6_mib,
1516 				    ipv6IfIcmpInErrors);
1517 				freemsg(mp);
1518 				return (NULL);
1519 			}
1520 			ip6h = (ip6_t *)mp->b_rptr;
1521 		}
1522 		icmp6 = (icmp6_t *)&ip6h[1];
1523 		/* Explicitly do not generate errors in response to redirects */
1524 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1525 		    icmp6->icmp6_type == ND_REDIRECT) {
1526 			freemsg(mp);
1527 			return (NULL);
1528 		}
1529 	}
1530 	/*
1531 	 * Check that the destination is not multicast and that the packet
1532 	 * was not sent on link layer broadcast or multicast.  (Exception
1533 	 * is Packet too big message as per the draft - when mcast_ok is set.)
1534 	 */
1535 	if (!mcast_ok &&
1536 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1537 		freemsg(mp);
1538 		return (NULL);
1539 	}
1540 	/*
1541 	 * If this is a labeled system, then check to see if we're allowed to
1542 	 * send a response to this particular sender.  If not, then just drop.
1543 	 */
1544 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1545 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1546 		freemsg(mp);
1547 		return (NULL);
1548 	}
1549 
1550 	if (icmp_err_rate_limit(ipst)) {
1551 		/*
1552 		 * Only send ICMP error packets every so often.
1553 		 * This should be done on a per port/source basis,
1554 		 * but for now this will suffice.
1555 		 */
1556 		freemsg(mp);
1557 		return (NULL);
1558 	}
1559 	return (mp);
1560 }
1561 
1562 /*
1563  * Called when a packet was sent out the same link that it arrived on.
1564  * Check if it is ok to send a redirect and then send it.
1565  */
1566 void
1567 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1568     ip_recv_attr_t *ira)
1569 {
1570 	ill_t		*ill = ira->ira_ill;
1571 	ip_stack_t	*ipst = ill->ill_ipst;
1572 	in6_addr_t	*v6targ;
1573 	ire_t		*src_ire_v6 = NULL;
1574 	mblk_t		*mp1;
1575 	ire_t		*nhop_ire = NULL;
1576 
1577 	/*
1578 	 * Don't send a redirect when forwarding a source
1579 	 * routed packet.
1580 	 */
1581 	if (ip_source_routed_v6(ip6h, mp, ipst))
1582 		return;
1583 
1584 	if (ire->ire_type & IRE_ONLINK) {
1585 		/* Target is directly connected */
1586 		v6targ = &ip6h->ip6_dst;
1587 	} else {
1588 		/* Determine the most specific IRE used to send the packets */
1589 		nhop_ire = ire_nexthop(ire);
1590 		if (nhop_ire == NULL)
1591 			return;
1592 
1593 		/*
1594 		 * We won't send redirects to a router
1595 		 * that doesn't have a link local
1596 		 * address, but will forward.
1597 		 */
1598 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1599 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1600 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1601 			ire_refrele(nhop_ire);
1602 			return;
1603 		}
1604 		v6targ = &nhop_ire->ire_addr_v6;
1605 	}
1606 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1607 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1608 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1609 
1610 	if (src_ire_v6 == NULL) {
1611 		if (nhop_ire != NULL)
1612 			ire_refrele(nhop_ire);
1613 		return;
1614 	}
1615 
1616 	/*
1617 	 * The source is directly connected.
1618 	 */
1619 	mp1 = copymsg(mp);
1620 	if (mp1 != NULL)
1621 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1622 
1623 	if (nhop_ire != NULL)
1624 		ire_refrele(nhop_ire);
1625 	ire_refrele(src_ire_v6);
1626 }
1627 
1628 /*
1629  * Generate an ICMPv6 redirect message.
1630  * Include target link layer address option if it exits.
1631  * Always include redirect header.
1632  */
1633 static void
1634 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1635     ip_recv_attr_t *ira)
1636 {
1637 	nd_redirect_t	*rd;
1638 	nd_opt_rd_hdr_t	*rdh;
1639 	uchar_t		*buf;
1640 	ncec_t		*ncec = NULL;
1641 	nd_opt_hdr_t	*opt;
1642 	int		len;
1643 	int		ll_opt_len = 0;
1644 	int		max_redir_hdr_data_len;
1645 	int		pkt_len;
1646 	in6_addr_t	*srcp;
1647 	ill_t		*ill;
1648 	boolean_t	need_refrele;
1649 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
1650 
1651 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1652 	if (mp == NULL)
1653 		return;
1654 
1655 	if (IS_UNDER_IPMP(ira->ira_ill)) {
1656 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1657 		if (ill == NULL) {
1658 			ill = ira->ira_ill;
1659 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1660 			ip_drop_output("no IPMP ill for sending redirect",
1661 			    mp, ill);
1662 			freemsg(mp);
1663 			return;
1664 		}
1665 		need_refrele = B_TRUE;
1666 	} else {
1667 		ill = ira->ira_ill;
1668 		need_refrele = B_FALSE;
1669 	}
1670 
1671 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
1672 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1673 	    ncec->ncec_lladdr != NULL) {
1674 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
1675 		    ill->ill_phys_addr_length + 7)/8 * 8;
1676 	}
1677 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1678 	ASSERT(len % 4 == 0);
1679 	buf = kmem_alloc(len, KM_NOSLEEP);
1680 	if (buf == NULL) {
1681 		if (ncec != NULL)
1682 			ncec_refrele(ncec);
1683 		if (need_refrele)
1684 			ill_refrele(ill);
1685 		freemsg(mp);
1686 		return;
1687 	}
1688 
1689 	rd = (nd_redirect_t *)buf;
1690 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1691 	rd->nd_rd_code = 0;
1692 	rd->nd_rd_reserved = 0;
1693 	rd->nd_rd_target = *targetp;
1694 	rd->nd_rd_dst = *dest;
1695 
1696 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1697 	if (ncec != NULL && ll_opt_len != 0) {
1698 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1699 		opt->nd_opt_len = ll_opt_len/8;
1700 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
1701 		    ill->ill_phys_addr_length);
1702 	}
1703 	if (ncec != NULL)
1704 		ncec_refrele(ncec);
1705 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1706 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1707 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1708 	max_redir_hdr_data_len =
1709 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1710 	pkt_len = msgdsize(mp);
1711 	/* Make sure mp is 8 byte aligned */
1712 	if (pkt_len > max_redir_hdr_data_len) {
1713 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1714 		    sizeof (nd_opt_rd_hdr_t))/8;
1715 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1716 	} else {
1717 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1718 		(void) adjmsg(mp, -(pkt_len % 8));
1719 	}
1720 	rdh->nd_opt_rh_reserved1 = 0;
1721 	rdh->nd_opt_rh_reserved2 = 0;
1722 	/* ipif_v6lcl_addr contains the link-local source address */
1723 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1724 
1725 	/* Redirects sent by router, and router is global zone */
1726 	ASSERT(ira->ira_zoneid == ALL_ZONES);
1727 	ira->ira_zoneid = GLOBAL_ZONEID;
1728 	icmp_pkt_v6(mp, buf, len, srcp, ira);
1729 	kmem_free(buf, len);
1730 	if (need_refrele)
1731 		ill_refrele(ill);
1732 }
1733 
1734 
1735 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1736 void
1737 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1738     ip_recv_attr_t *ira)
1739 {
1740 	icmp6_t	icmp6;
1741 
1742 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1743 	if (mp == NULL)
1744 		return;
1745 
1746 	bzero(&icmp6, sizeof (icmp6_t));
1747 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1748 	icmp6.icmp6_code = code;
1749 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1750 }
1751 
1752 /*
1753  * Generate an ICMP unreachable message.
1754  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1755  * constructed by the caller.
1756  */
1757 void
1758 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1759     ip_recv_attr_t *ira)
1760 {
1761 	icmp6_t	icmp6;
1762 
1763 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1764 	if (mp == NULL)
1765 		return;
1766 
1767 	bzero(&icmp6, sizeof (icmp6_t));
1768 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
1769 	icmp6.icmp6_code = code;
1770 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1771 }
1772 
1773 /*
1774  * Generate an ICMP pkt too big message.
1775  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1776  * constructed by the caller.
1777  */
1778 void
1779 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1780     ip_recv_attr_t *ira)
1781 {
1782 	icmp6_t	icmp6;
1783 
1784 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1785 	if (mp == NULL)
1786 		return;
1787 
1788 	bzero(&icmp6, sizeof (icmp6_t));
1789 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1790 	icmp6.icmp6_code = 0;
1791 	icmp6.icmp6_mtu = htonl(mtu);
1792 
1793 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1794 }
1795 
1796 /*
1797  * Generate an ICMP parameter problem message. (May be called as writer.)
1798  * 'offset' is the offset from the beginning of the packet in error.
1799  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1800  * constructed by the caller.
1801  */
1802 static void
1803 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1804     boolean_t mcast_ok, ip_recv_attr_t *ira)
1805 {
1806 	icmp6_t	icmp6;
1807 
1808 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1809 	if (mp == NULL)
1810 		return;
1811 
1812 	bzero((char *)&icmp6, sizeof (icmp6_t));
1813 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
1814 	icmp6.icmp6_code = code;
1815 	icmp6.icmp6_pptr = htonl(offset);
1816 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1817 }
1818 
1819 void
1820 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1821     ip_recv_attr_t *ira)
1822 {
1823 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
1824 	uint16_t	hdr_length;
1825 	uint8_t		*nexthdrp;
1826 	uint32_t	offset;
1827 	ill_t		*ill = ira->ira_ill;
1828 
1829 	/* Determine the offset of the bad nexthdr value */
1830 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
1831 		/* Malformed packet */
1832 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1833 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
1834 		freemsg(mp);
1835 		return;
1836 	}
1837 
1838 	offset = nexthdrp - mp->b_rptr;
1839 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1840 	    mcast_ok, ira);
1841 }
1842 
1843 /*
1844  * Verify whether or not the IP address is a valid local address.
1845  * Could be a unicast, including one for a down interface.
1846  * If allow_mcbc then a multicast or broadcast address is also
1847  * acceptable.
1848  *
1849  * In the case of a multicast address, however, the
1850  * upper protocol is expected to reset the src address
1851  * to zero when we return IPVL_MCAST so that
1852  * no packets are emitted with multicast address as
1853  * source address.
1854  * The addresses valid for bind are:
1855  *	(1) - in6addr_any
1856  *	(2) - IP address of an UP interface
1857  *	(3) - IP address of a DOWN interface
1858  *	(4) - a multicast address. In this case
1859  *	the conn will only receive packets destined to
1860  *	the specified multicast address. Note: the
1861  *	application still has to issue an
1862  *	IPV6_JOIN_GROUP socket option.
1863  *
1864  * In all the above cases, the bound address must be valid in the current zone.
1865  * When the address is loopback or multicast, there might be many matching IREs
1866  * so bind has to look up based on the zone.
1867  */
1868 ip_laddr_t
1869 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1870     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1871 {
1872 	ire_t		*src_ire;
1873 	uint_t		match_flags;
1874 	ill_t		*ill = NULL;
1875 
1876 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1877 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1878 
1879 	match_flags = MATCH_IRE_ZONEONLY;
1880 	if (scopeid != 0) {
1881 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1882 		if (ill == NULL)
1883 			return (IPVL_BAD);
1884 		match_flags |= MATCH_IRE_ILL;
1885 	}
1886 
1887 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1888 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1889 	if (ill != NULL)
1890 		ill_refrele(ill);
1891 
1892 	/*
1893 	 * If an address other than in6addr_any is requested,
1894 	 * we verify that it is a valid address for bind
1895 	 * Note: Following code is in if-else-if form for
1896 	 * readability compared to a condition check.
1897 	 */
1898 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1899 		/*
1900 		 * (2) Bind to address of local UP interface
1901 		 */
1902 		ire_refrele(src_ire);
1903 		return (IPVL_UNICAST_UP);
1904 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1905 		/* (4) bind to multicast address. */
1906 		if (src_ire != NULL)
1907 			ire_refrele(src_ire);
1908 
1909 		/*
1910 		 * Note: caller should take IPV6_MULTICAST_IF
1911 		 * into account when selecting a real source address.
1912 		 */
1913 		if (allow_mcbc)
1914 			return (IPVL_MCAST);
1915 		else
1916 			return (IPVL_BAD);
1917 	} else {
1918 		ipif_t *ipif;
1919 
1920 		/*
1921 		 * (3) Bind to address of local DOWN interface?
1922 		 * (ipif_lookup_addr() looks up all interfaces
1923 		 * but we do not get here for UP interfaces
1924 		 * - case (2) above)
1925 		 */
1926 		if (src_ire != NULL)
1927 			ire_refrele(src_ire);
1928 
1929 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1930 		if (ipif == NULL)
1931 			return (IPVL_BAD);
1932 
1933 		/* Not a useful source? */
1934 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1935 			ipif_refrele(ipif);
1936 			return (IPVL_BAD);
1937 		}
1938 		ipif_refrele(ipif);
1939 		return (IPVL_UNICAST_DOWN);
1940 	}
1941 }
1942 
1943 /*
1944  * Verify that both the source and destination addresses are valid.  If
1945  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1946  * i.e. have no route to it.  Protocols like TCP want to verify destination
1947  * reachability, while tunnels do not.
1948  *
1949  * Determine the route, the interface, and (optionally) the source address
1950  * to use to reach a given destination.
1951  * Note that we allow connect to broadcast and multicast addresses when
1952  * IPDF_ALLOW_MCBC is set.
1953  * first_hop and dst_addr are normally the same, but if source routing
1954  * they will differ; in that case the first_hop is what we'll use for the
1955  * routing lookup but the dce and label checks will be done on dst_addr,
1956  *
1957  * If uinfo is set, then we fill in the best available information
1958  * we have for the destination. This is based on (in priority order) any
1959  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1960  * ill_mtu.
1961  *
1962  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1963  * always do the label check on dst_addr.
1964  *
1965  * Assumes that the caller has set ixa_scopeid for link-local communication.
1966  */
1967 int
1968 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1969     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1970     uint32_t flags, uint_t mac_mode)
1971 {
1972 	ire_t		*ire;
1973 	int		error = 0;
1974 	in6_addr_t	setsrc;				/* RTF_SETSRC */
1975 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
1976 	ip_stack_t	*ipst = ixa->ixa_ipst;
1977 	dce_t		*dce;
1978 	uint_t		pmtu;
1979 	uint_t		ifindex;
1980 	uint_t		generation;
1981 	nce_t		*nce;
1982 	ill_t		*ill = NULL;
1983 	boolean_t	multirt = B_FALSE;
1984 
1985 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1986 
1987 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1988 
1989 	/*
1990 	 * We never send to zero; the ULPs map it to the loopback address.
1991 	 * We can't allow it since we use zero to mean unitialized in some
1992 	 * places.
1993 	 */
1994 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1995 
1996 	if (is_system_labeled()) {
1997 		ts_label_t *tsl = NULL;
1998 
1999 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
2000 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
2001 		if (error != 0)
2002 			return (error);
2003 		if (tsl != NULL) {
2004 			/* Update the label */
2005 			ip_xmit_attr_replace_tsl(ixa, tsl);
2006 		}
2007 	}
2008 
2009 	setsrc = ipv6_all_zeros;
2010 	/*
2011 	 * Select a route; For IPMP interfaces, we would only select
2012 	 * a "hidden" route (i.e., going through a specific under_ill)
2013 	 * if ixa_ifindex has been specified.
2014 	 */
2015 	ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2016 	    &setsrc, &error, &multirt);
2017 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
2018 	if (error != 0)
2019 		goto bad_addr;
2020 
2021 	/*
2022 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2023 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2024 	 * Otherwise the destination needn't be reachable.
2025 	 *
2026 	 * If we match on a reject or black hole, then we've got a
2027 	 * local failure.  May as well fail out the connect() attempt,
2028 	 * since it's never going to succeed.
2029 	 */
2030 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2031 		/*
2032 		 * If we're verifying destination reachability, we always want
2033 		 * to complain here.
2034 		 *
2035 		 * If we're not verifying destination reachability but the
2036 		 * destination has a route, we still want to fail on the
2037 		 * temporary address and broadcast address tests.
2038 		 *
2039 		 * In both cases do we let the code continue so some reasonable
2040 		 * information is returned to the caller. That enables the
2041 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
2042 		 * use the generation mismatch path to check for the unreachable
2043 		 * case thereby avoiding any specific check in the main path.
2044 		 */
2045 		ASSERT(generation == IRE_GENERATION_VERIFY);
2046 		if (flags & IPDF_VERIFY_DST) {
2047 			/*
2048 			 * Set errno but continue to set up ixa_ire to be
2049 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2050 			 * That allows callers to use ip_output to get an
2051 			 * ICMP error back.
2052 			 */
2053 			if (!(ire->ire_type & IRE_HOST))
2054 				error = ENETUNREACH;
2055 			else
2056 				error = EHOSTUNREACH;
2057 		}
2058 	}
2059 
2060 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2061 	    !(flags & IPDF_ALLOW_MCBC)) {
2062 		ire_refrele(ire);
2063 		ire = ire_reject(ipst, B_FALSE);
2064 		generation = IRE_GENERATION_VERIFY;
2065 		error = ENETUNREACH;
2066 	}
2067 
2068 	/* Cache things */
2069 	if (ixa->ixa_ire != NULL)
2070 		ire_refrele_notr(ixa->ixa_ire);
2071 #ifdef DEBUG
2072 	ire_refhold_notr(ire);
2073 	ire_refrele(ire);
2074 #endif
2075 	ixa->ixa_ire = ire;
2076 	ixa->ixa_ire_generation = generation;
2077 
2078 	/*
2079 	 * For multicast with multirt we have a flag passed back from
2080 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2081 	 * possible multicast address.
2082 	 * We also need a flag for multicast since we can't check
2083 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2084 	 */
2085 	if (multirt) {
2086 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2087 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2088 	} else {
2089 		ixa->ixa_postfragfn = ire->ire_postfragfn;
2090 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2091 	}
2092 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2093 		/* Get an nce to cache. */
2094 		nce = ire_to_nce(ire, NULL, firsthop);
2095 		if (nce == NULL) {
2096 			/* Allocation failure? */
2097 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2098 		} else {
2099 			if (ixa->ixa_nce != NULL)
2100 				nce_refrele(ixa->ixa_nce);
2101 			ixa->ixa_nce = nce;
2102 		}
2103 	}
2104 
2105 	/*
2106 	 * If the source address is a loopback address, the
2107 	 * destination had best be local or multicast.
2108 	 * If we are sending to an IRE_LOCAL using a loopback source then
2109 	 * it had better be the same zoneid.
2110 	 */
2111 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2112 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2113 			ire = NULL;	/* Stored in ixa_ire */
2114 			error = EADDRNOTAVAIL;
2115 			goto bad_addr;
2116 		}
2117 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2118 			ire = NULL;	/* Stored in ixa_ire */
2119 			error = EADDRNOTAVAIL;
2120 			goto bad_addr;
2121 		}
2122 	}
2123 
2124 	/*
2125 	 * Does the caller want us to pick a source address?
2126 	 */
2127 	if (flags & IPDF_SELECT_SRC) {
2128 		in6_addr_t	src_addr;
2129 
2130 		/*
2131 		 * We use use ire_nexthop_ill to avoid the under ipmp
2132 		 * interface for source address selection. Note that for ipmp
2133 		 * probe packets, ixa_ifindex would have been specified, and
2134 		 * the ip_select_route() invocation would have picked an ire
2135 		 * will ire_ill pointing at an under interface.
2136 		 */
2137 		ill = ire_nexthop_ill(ire);
2138 
2139 		/* If unreachable we have no ill but need some source */
2140 		if (ill == NULL) {
2141 			src_addr = ipv6_loopback;
2142 			/* Make sure we look for a better source address */
2143 			generation = SRC_GENERATION_VERIFY;
2144 		} else {
2145 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
2146 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2147 			    &src_addr, &generation, NULL);
2148 			if (error != 0) {
2149 				ire = NULL;	/* Stored in ixa_ire */
2150 				goto bad_addr;
2151 			}
2152 		}
2153 
2154 		/*
2155 		 * We allow the source address to to down.
2156 		 * However, we check that we don't use the loopback address
2157 		 * as a source when sending out on the wire.
2158 		 */
2159 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2160 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2161 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2162 			ire = NULL;	/* Stored in ixa_ire */
2163 			error = EADDRNOTAVAIL;
2164 			goto bad_addr;
2165 		}
2166 
2167 		*src_addrp = src_addr;
2168 		ixa->ixa_src_generation = generation;
2169 	}
2170 
2171 	/*
2172 	 * Make sure we don't leave an unreachable ixa_nce in place
2173 	 * since ip_select_route is used when we unplumb i.e., remove
2174 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2175 	 */
2176 	nce = ixa->ixa_nce;
2177 	if (nce != NULL && nce->nce_is_condemned) {
2178 		nce_refrele(nce);
2179 		ixa->ixa_nce = NULL;
2180 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2181 	}
2182 
2183 
2184 	ifindex = 0;
2185 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2186 		/* If we are creating a DCE we'd better have an ifindex */
2187 		if (ill != NULL)
2188 			ifindex = ill->ill_phyint->phyint_ifindex;
2189 		else
2190 			flags &= ~IPDF_UNIQUE_DCE;
2191 	}
2192 
2193 	if (flags & IPDF_UNIQUE_DCE) {
2194 		/* Fallback to the default dce if allocation fails */
2195 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2196 		if (dce != NULL) {
2197 			generation = dce->dce_generation;
2198 		} else {
2199 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2200 			    &generation);
2201 		}
2202 	} else {
2203 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2204 	}
2205 	ASSERT(dce != NULL);
2206 	if (ixa->ixa_dce != NULL)
2207 		dce_refrele_notr(ixa->ixa_dce);
2208 #ifdef DEBUG
2209 	dce_refhold_notr(dce);
2210 	dce_refrele(dce);
2211 #endif
2212 	ixa->ixa_dce = dce;
2213 	ixa->ixa_dce_generation = generation;
2214 
2215 	/*
2216 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2217 	 * multicast. But pmtu discovery is only enabled for connected
2218 	 * sockets in general.
2219 	 */
2220 
2221 	/*
2222 	 * Set initial value for fragmentation limit.  Either conn_ip_output
2223 	 * or ULP might updates it when there are routing changes.
2224 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2225 	 */
2226 	pmtu = ip_get_pmtu(ixa);
2227 	ixa->ixa_fragsize = pmtu;
2228 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
2229 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2230 		ixa->ixa_pmtu = pmtu;
2231 
2232 	/*
2233 	 * Extract information useful for some transports.
2234 	 * First we look for DCE metrics. Then we take what we have in
2235 	 * the metrics in the route, where the offlink is used if we have
2236 	 * one.
2237 	 */
2238 	if (uinfo != NULL) {
2239 		bzero(uinfo, sizeof (*uinfo));
2240 
2241 		if (dce->dce_flags & DCEF_UINFO)
2242 			*uinfo = dce->dce_uinfo;
2243 
2244 		rts_merge_metrics(uinfo, &ire->ire_metrics);
2245 
2246 		/* Allow ire_metrics to decrease the path MTU from above */
2247 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2248 			uinfo->iulp_mtu = pmtu;
2249 
2250 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2251 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2252 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2253 	}
2254 
2255 	if (ill != NULL)
2256 		ill_refrele(ill);
2257 
2258 	return (error);
2259 
2260 bad_addr:
2261 	if (ire != NULL)
2262 		ire_refrele(ire);
2263 
2264 	if (ill != NULL)
2265 		ill_refrele(ill);
2266 
2267 	/*
2268 	 * Make sure we don't leave an unreachable ixa_nce in place
2269 	 * since ip_select_route is used when we unplumb i.e., remove
2270 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2271 	 */
2272 	nce = ixa->ixa_nce;
2273 	if (nce != NULL && nce->nce_is_condemned) {
2274 		nce_refrele(nce);
2275 		ixa->ixa_nce = NULL;
2276 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2277 	}
2278 
2279 	return (error);
2280 }
2281 
2282 /*
2283  * Handle protocols with which IP is less intimate.  There
2284  * can be more than one stream bound to a particular
2285  * protocol.  When this is the case, normally each one gets a copy
2286  * of any incoming packets.
2287  *
2288  * Zones notes:
2289  * Packets will be distributed to conns in all zones. This is really only
2290  * useful for ICMPv6 as only applications in the global zone can create raw
2291  * sockets for other protocols.
2292  */
2293 void
2294 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2295 {
2296 	mblk_t		*mp1;
2297 	in6_addr_t	laddr = ip6h->ip6_dst;
2298 	conn_t		*connp, *first_connp, *next_connp;
2299 	connf_t		*connfp;
2300 	ill_t		*ill = ira->ira_ill;
2301 	ip_stack_t	*ipst = ill->ill_ipst;
2302 
2303 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2304 	mutex_enter(&connfp->connf_lock);
2305 	connp = connfp->connf_head;
2306 	for (connp = connfp->connf_head; connp != NULL;
2307 	    connp = connp->conn_next) {
2308 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2309 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2310 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2311 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2312 			break;
2313 	}
2314 
2315 	if (connp == NULL) {
2316 		/*
2317 		 * No one bound to this port.  Is
2318 		 * there a client that wants all
2319 		 * unclaimed datagrams?
2320 		 */
2321 		mutex_exit(&connfp->connf_lock);
2322 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2323 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
2324 		return;
2325 	}
2326 
2327 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2328 
2329 	CONN_INC_REF(connp);
2330 	first_connp = connp;
2331 
2332 	/*
2333 	 * XXX: Fix the multiple protocol listeners case. We should not
2334 	 * be walking the conn->conn_next list here.
2335 	 */
2336 	connp = connp->conn_next;
2337 	for (;;) {
2338 		while (connp != NULL) {
2339 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2340 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2341 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2342 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2343 			    ira, connp)))
2344 				break;
2345 			connp = connp->conn_next;
2346 		}
2347 
2348 		if (connp == NULL) {
2349 			/* No more interested clients */
2350 			connp = first_connp;
2351 			break;
2352 		}
2353 		if (((mp1 = dupmsg(mp)) == NULL) &&
2354 		    ((mp1 = copymsg(mp)) == NULL)) {
2355 			/* Memory allocation failed */
2356 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2357 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
2358 			connp = first_connp;
2359 			break;
2360 		}
2361 
2362 		CONN_INC_REF(connp);
2363 		mutex_exit(&connfp->connf_lock);
2364 
2365 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2366 		    ira);
2367 
2368 		mutex_enter(&connfp->connf_lock);
2369 		/* Follow the next pointer before releasing the conn. */
2370 		next_connp = connp->conn_next;
2371 		CONN_DEC_REF(connp);
2372 		connp = next_connp;
2373 	}
2374 
2375 	/* Last one.  Send it upstream. */
2376 	mutex_exit(&connfp->connf_lock);
2377 
2378 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2379 
2380 	CONN_DEC_REF(connp);
2381 }
2382 
2383 /*
2384  * Called when it is conceptually a ULP that would sent the packet
2385  * e.g., port unreachable and nexthdr unknown. Check that the packet
2386  * would have passed the IPsec global policy before sending the error.
2387  *
2388  * Send an ICMP error after patching up the packet appropriately.
2389  * Uses ip_drop_input and bumps the appropriate MIB.
2390  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2391  */
2392 void
2393 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2394     ip_recv_attr_t *ira)
2395 {
2396 	ip6_t		*ip6h;
2397 	boolean_t	secure;
2398 	ill_t		*ill = ira->ira_ill;
2399 	ip_stack_t	*ipst = ill->ill_ipst;
2400 	netstack_t	*ns = ipst->ips_netstack;
2401 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2402 
2403 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2404 
2405 	/*
2406 	 * We are generating an icmp error for some inbound packet.
2407 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
2408 	 * Before we generate an error, check with global policy
2409 	 * to see whether this is allowed to enter the system. As
2410 	 * there is no "conn", we are checking with global policy.
2411 	 */
2412 	ip6h = (ip6_t *)mp->b_rptr;
2413 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
2414 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2415 		if (mp == NULL)
2416 			return;
2417 	}
2418 
2419 	/* We never send errors for protocols that we do implement */
2420 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
2421 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2422 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2423 		freemsg(mp);
2424 		return;
2425 	}
2426 
2427 	switch (icmp_type) {
2428 	case ICMP6_DST_UNREACH:
2429 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2430 
2431 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2432 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
2433 
2434 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2435 		break;
2436 	case ICMP6_PARAM_PROB:
2437 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2438 
2439 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2440 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2441 
2442 		/* Let the system determine the offset for this one */
2443 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2444 		break;
2445 	default:
2446 #ifdef DEBUG
2447 		panic("ip_fanout_send_icmp_v6: wrong type");
2448 		/*NOTREACHED*/
2449 #else
2450 		freemsg(mp);
2451 		break;
2452 #endif
2453 	}
2454 }
2455 
2456 /*
2457  * Fanout for UDP packets that are multicast or ICMP errors.
2458  * (Unicast fanout is handled in ip_input_v6.)
2459  *
2460  * If SO_REUSEADDR is set all multicast packets
2461  * will be delivered to all conns bound to the same port.
2462  *
2463  * Fanout for UDP packets.
2464  * The caller puts <fport, lport> in the ports parameter.
2465  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2466  *
2467  * If SO_REUSEADDR is set all multicast and broadcast packets
2468  * will be delivered to all conns bound to the same port.
2469  *
2470  * Zones notes:
2471  * Earlier in ip_input on a system with multiple shared-IP zones we
2472  * duplicate the multicast and broadcast packets and send them up
2473  * with each explicit zoneid that exists on that ill.
2474  * This means that here we can match the zoneid with SO_ALLZONES being special.
2475  */
2476 void
2477 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2478     ip_recv_attr_t *ira)
2479 {
2480 	in6_addr_t	laddr;
2481 	conn_t		*connp;
2482 	connf_t		*connfp;
2483 	in6_addr_t	faddr;
2484 	ill_t		*ill = ira->ira_ill;
2485 	ip_stack_t	*ipst = ill->ill_ipst;
2486 
2487 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2488 
2489 	laddr = ip6h->ip6_dst;
2490 	faddr = ip6h->ip6_src;
2491 
2492 	/* Attempt to find a client stream based on destination port. */
2493 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2494 	mutex_enter(&connfp->connf_lock);
2495 	connp = connfp->connf_head;
2496 	while (connp != NULL) {
2497 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2498 		    conn_wantpacket_v6(connp, ira, ip6h) &&
2499 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2500 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2501 			break;
2502 		connp = connp->conn_next;
2503 	}
2504 
2505 	if (connp == NULL)
2506 		goto notfound;
2507 
2508 	CONN_INC_REF(connp);
2509 
2510 	if (connp->conn_reuseaddr) {
2511 		conn_t		*first_connp = connp;
2512 		conn_t		*next_connp;
2513 		mblk_t		*mp1;
2514 
2515 		connp = connp->conn_next;
2516 		for (;;) {
2517 			while (connp != NULL) {
2518 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2519 				    fport, faddr) &&
2520 				    conn_wantpacket_v6(connp, ira, ip6h) &&
2521 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2522 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2523 				    ira, connp)))
2524 					break;
2525 				connp = connp->conn_next;
2526 			}
2527 			if (connp == NULL) {
2528 				/* No more interested clients */
2529 				connp = first_connp;
2530 				break;
2531 			}
2532 			if (((mp1 = dupmsg(mp)) == NULL) &&
2533 			    ((mp1 = copymsg(mp)) == NULL)) {
2534 				/* Memory allocation failed */
2535 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2536 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
2537 				connp = first_connp;
2538 				break;
2539 			}
2540 
2541 			CONN_INC_REF(connp);
2542 			mutex_exit(&connfp->connf_lock);
2543 
2544 			IP6_STAT(ipst, ip6_udp_fanmb);
2545 			ip_fanout_udp_conn(connp, mp1, NULL,
2546 			    (ip6_t *)mp1->b_rptr, ira);
2547 
2548 			mutex_enter(&connfp->connf_lock);
2549 			/* Follow the next pointer before releasing the conn. */
2550 			next_connp = connp->conn_next;
2551 			IP6_STAT(ipst, ip6_udp_fanmb);
2552 			CONN_DEC_REF(connp);
2553 			connp = next_connp;
2554 		}
2555 	}
2556 
2557 	/* Last one.  Send it upstream. */
2558 	mutex_exit(&connfp->connf_lock);
2559 
2560 	IP6_STAT(ipst, ip6_udp_fanmb);
2561 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2562 	CONN_DEC_REF(connp);
2563 	return;
2564 
2565 notfound:
2566 	mutex_exit(&connfp->connf_lock);
2567 	/*
2568 	 * No one bound to this port.  Is
2569 	 * there a client that wants all
2570 	 * unclaimed datagrams?
2571 	 */
2572 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2573 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
2574 		ip_fanout_proto_v6(mp, ip6h, ira);
2575 	} else {
2576 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2577 		    ICMP6_DST_UNREACH_NOPORT, ira);
2578 	}
2579 }
2580 
2581 /*
2582  * int ip_find_hdr_v6()
2583  *
2584  * This routine is used by the upper layer protocols, iptun, and IPsec:
2585  * - Set extension header pointers to appropriate locations
2586  * - Determine IPv6 header length and return it
2587  * - Return a pointer to the last nexthdr value
2588  *
2589  * The caller must initialize ipp_fields.
2590  * The upper layer protocols normally set label_separate which makes the
2591  * routine put the TX label in ipp_label_v6. If this is not set then
2592  * the hop-by-hop options including the label are placed in ipp_hopopts.
2593  *
2594  * NOTE: If multiple extension headers of the same type are present,
2595  * ip_find_hdr_v6() will set the respective extension header pointers
2596  * to the first one that it encounters in the IPv6 header.  It also
2597  * skips fragment headers.  This routine deals with malformed packets
2598  * of various sorts in which case the returned length is up to the
2599  * malformed part.
2600  */
2601 int
2602 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2603     uint8_t *nexthdrp)
2604 {
2605 	uint_t	length, ehdrlen;
2606 	uint8_t nexthdr;
2607 	uint8_t *whereptr, *endptr;
2608 	ip6_dest_t *tmpdstopts;
2609 	ip6_rthdr_t *tmprthdr;
2610 	ip6_hbh_t *tmphopopts;
2611 	ip6_frag_t *tmpfraghdr;
2612 
2613 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2614 	ipp->ipp_hoplimit = ip6h->ip6_hops;
2615 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2616 	ipp->ipp_addr = ip6h->ip6_dst;
2617 
2618 	length = IPV6_HDR_LEN;
2619 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2620 	endptr = mp->b_wptr;
2621 
2622 	nexthdr = ip6h->ip6_nxt;
2623 	while (whereptr < endptr) {
2624 		/* Is there enough left for len + nexthdr? */
2625 		if (whereptr + MIN_EHDR_LEN > endptr)
2626 			goto done;
2627 
2628 		switch (nexthdr) {
2629 		case IPPROTO_HOPOPTS: {
2630 			/* We check for any CIPSO */
2631 			uchar_t *secopt;
2632 			boolean_t hbh_needed;
2633 			uchar_t *after_secopt;
2634 
2635 			tmphopopts = (ip6_hbh_t *)whereptr;
2636 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2637 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2638 				goto done;
2639 			nexthdr = tmphopopts->ip6h_nxt;
2640 
2641 			if (!label_separate) {
2642 				secopt = NULL;
2643 				after_secopt = whereptr;
2644 			} else {
2645 				/*
2646 				 * We have dropped packets with bad options in
2647 				 * ip6_input. No need to check return value
2648 				 * here.
2649 				 */
2650 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
2651 				    &secopt, &after_secopt, &hbh_needed);
2652 			}
2653 			if (secopt != NULL && after_secopt - whereptr > 0) {
2654 				ipp->ipp_fields |= IPPF_LABEL_V6;
2655 				ipp->ipp_label_v6 = secopt;
2656 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
2657 			} else {
2658 				ipp->ipp_label_len_v6 = 0;
2659 				after_secopt = whereptr;
2660 				hbh_needed = B_TRUE;
2661 			}
2662 			/* return only 1st hbh */
2663 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2664 				ipp->ipp_fields |= IPPF_HOPOPTS;
2665 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2666 				ipp->ipp_hopoptslen = ehdrlen -
2667 				    ipp->ipp_label_len_v6;
2668 			}
2669 			break;
2670 		}
2671 		case IPPROTO_DSTOPTS:
2672 			tmpdstopts = (ip6_dest_t *)whereptr;
2673 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2674 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2675 				goto done;
2676 			nexthdr = tmpdstopts->ip6d_nxt;
2677 			/*
2678 			 * ipp_dstopts is set to the destination header after a
2679 			 * routing header.
2680 			 * Assume it is a post-rthdr destination header
2681 			 * and adjust when we find an rthdr.
2682 			 */
2683 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2684 				ipp->ipp_fields |= IPPF_DSTOPTS;
2685 				ipp->ipp_dstopts = tmpdstopts;
2686 				ipp->ipp_dstoptslen = ehdrlen;
2687 			}
2688 			break;
2689 		case IPPROTO_ROUTING:
2690 			tmprthdr = (ip6_rthdr_t *)whereptr;
2691 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2692 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2693 				goto done;
2694 			nexthdr = tmprthdr->ip6r_nxt;
2695 			/* return only 1st rthdr */
2696 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2697 				ipp->ipp_fields |= IPPF_RTHDR;
2698 				ipp->ipp_rthdr = tmprthdr;
2699 				ipp->ipp_rthdrlen = ehdrlen;
2700 			}
2701 			/*
2702 			 * Make any destination header we've seen be a
2703 			 * pre-rthdr destination header.
2704 			 */
2705 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
2706 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2707 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2708 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2709 				ipp->ipp_dstopts = NULL;
2710 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2711 				ipp->ipp_dstoptslen = 0;
2712 			}
2713 			break;
2714 		case IPPROTO_FRAGMENT:
2715 			tmpfraghdr = (ip6_frag_t *)whereptr;
2716 			ehdrlen = sizeof (ip6_frag_t);
2717 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2718 				goto done;
2719 			nexthdr = tmpfraghdr->ip6f_nxt;
2720 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2721 				ipp->ipp_fields |= IPPF_FRAGHDR;
2722 				ipp->ipp_fraghdr = tmpfraghdr;
2723 				ipp->ipp_fraghdrlen = ehdrlen;
2724 			}
2725 			break;
2726 		case IPPROTO_NONE:
2727 		default:
2728 			goto done;
2729 		}
2730 		length += ehdrlen;
2731 		whereptr += ehdrlen;
2732 	}
2733 done:
2734 	if (nexthdrp != NULL)
2735 		*nexthdrp = nexthdr;
2736 	return (length);
2737 }
2738 
2739 /*
2740  * Try to determine where and what are the IPv6 header length and
2741  * pointer to nexthdr value for the upper layer protocol (or an
2742  * unknown next hdr).
2743  *
2744  * Parameters returns a pointer to the nexthdr value;
2745  * Must handle malformed packets of various sorts.
2746  * Function returns failure for malformed cases.
2747  */
2748 boolean_t
2749 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2750     uint8_t **nexthdrpp)
2751 {
2752 	uint16_t length;
2753 	uint_t	ehdrlen;
2754 	uint8_t	*nexthdrp;
2755 	uint8_t *whereptr;
2756 	uint8_t *endptr;
2757 	ip6_dest_t *desthdr;
2758 	ip6_rthdr_t *rthdr;
2759 	ip6_frag_t *fraghdr;
2760 
2761 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2762 	length = IPV6_HDR_LEN;
2763 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2764 	endptr = mp->b_wptr;
2765 
2766 	nexthdrp = &ip6h->ip6_nxt;
2767 	while (whereptr < endptr) {
2768 		/* Is there enough left for len + nexthdr? */
2769 		if (whereptr + MIN_EHDR_LEN > endptr)
2770 			break;
2771 
2772 		switch (*nexthdrp) {
2773 		case IPPROTO_HOPOPTS:
2774 		case IPPROTO_DSTOPTS:
2775 			/* Assumes the headers are identical for hbh and dst */
2776 			desthdr = (ip6_dest_t *)whereptr;
2777 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
2778 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
2779 				return (B_FALSE);
2780 			nexthdrp = &desthdr->ip6d_nxt;
2781 			break;
2782 		case IPPROTO_ROUTING:
2783 			rthdr = (ip6_rthdr_t *)whereptr;
2784 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
2785 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
2786 				return (B_FALSE);
2787 			nexthdrp = &rthdr->ip6r_nxt;
2788 			break;
2789 		case IPPROTO_FRAGMENT:
2790 			fraghdr = (ip6_frag_t *)whereptr;
2791 			ehdrlen = sizeof (ip6_frag_t);
2792 			if ((uchar_t *)&fraghdr[1] > endptr)
2793 				return (B_FALSE);
2794 			nexthdrp = &fraghdr->ip6f_nxt;
2795 			break;
2796 		case IPPROTO_NONE:
2797 			/* No next header means we're finished */
2798 		default:
2799 			*hdr_length_ptr = length;
2800 			*nexthdrpp = nexthdrp;
2801 			return (B_TRUE);
2802 		}
2803 		length += ehdrlen;
2804 		whereptr += ehdrlen;
2805 		*hdr_length_ptr = length;
2806 		*nexthdrpp = nexthdrp;
2807 	}
2808 	switch (*nexthdrp) {
2809 	case IPPROTO_HOPOPTS:
2810 	case IPPROTO_DSTOPTS:
2811 	case IPPROTO_ROUTING:
2812 	case IPPROTO_FRAGMENT:
2813 		/*
2814 		 * If any know extension headers are still to be processed,
2815 		 * the packet's malformed (or at least all the IP header(s) are
2816 		 * not in the same mblk - and that should never happen.
2817 		 */
2818 		return (B_FALSE);
2819 
2820 	default:
2821 		/*
2822 		 * If we get here, we know that all of the IP headers were in
2823 		 * the same mblk, even if the ULP header is in the next mblk.
2824 		 */
2825 		*hdr_length_ptr = length;
2826 		*nexthdrpp = nexthdrp;
2827 		return (B_TRUE);
2828 	}
2829 }
2830 
2831 /*
2832  * Return the length of the IPv6 related headers (including extension headers)
2833  * Returns a length even if the packet is malformed.
2834  */
2835 int
2836 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2837 {
2838 	uint16_t hdr_len;
2839 	uint8_t	*nexthdrp;
2840 
2841 	(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2842 	return (hdr_len);
2843 }
2844 
2845 /*
2846  * Parse and process any hop-by-hop or destination options.
2847  *
2848  * Assumes that q is an ill read queue so that ICMP errors for link-local
2849  * destinations are sent out the correct interface.
2850  *
2851  * Returns -1 if there was an error and mp has been consumed.
2852  * Returns 0 if no special action is needed.
2853  * Returns 1 if the packet contained a router alert option for this node
2854  * which is verified to be "interesting/known" for our implementation.
2855  *
2856  * XXX Note: In future as more hbh or dest options are defined,
2857  * it may be better to have different routines for hbh and dest
2858  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2859  * may have same value in different namespaces. Or is it same namespace ??
2860  * Current code checks for each opt_type (other than pads) if it is in
2861  * the expected  nexthdr (hbh or dest)
2862  */
2863 int
2864 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2865     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2866 {
2867 	uint8_t opt_type;
2868 	uint_t optused;
2869 	int ret = 0;
2870 	const char *errtype;
2871 	ill_t		*ill = ira->ira_ill;
2872 	ip_stack_t	*ipst = ill->ill_ipst;
2873 
2874 	while (optlen != 0) {
2875 		opt_type = *optptr;
2876 		if (opt_type == IP6OPT_PAD1) {
2877 			optused = 1;
2878 		} else {
2879 			if (optlen < 2)
2880 				goto bad_opt;
2881 			errtype = "malformed";
2882 			if (opt_type == ip6opt_ls) {
2883 				optused = 2 + optptr[1];
2884 				if (optused > optlen)
2885 					goto bad_opt;
2886 			} else switch (opt_type) {
2887 			case IP6OPT_PADN:
2888 				/*
2889 				 * Note:We don't verify that (N-2) pad octets
2890 				 * are zero as required by spec. Adhere to
2891 				 * "be liberal in what you accept..." part of
2892 				 * implementation philosophy (RFC791,RFC1122)
2893 				 */
2894 				optused = 2 + optptr[1];
2895 				if (optused > optlen)
2896 					goto bad_opt;
2897 				break;
2898 
2899 			case IP6OPT_JUMBO:
2900 				if (hdr_type != IPPROTO_HOPOPTS)
2901 					goto opt_error;
2902 				goto opt_error; /* XXX Not implemented! */
2903 
2904 			case IP6OPT_ROUTER_ALERT: {
2905 				struct ip6_opt_router *or;
2906 
2907 				if (hdr_type != IPPROTO_HOPOPTS)
2908 					goto opt_error;
2909 				optused = 2 + optptr[1];
2910 				if (optused > optlen)
2911 					goto bad_opt;
2912 				or = (struct ip6_opt_router *)optptr;
2913 				/* Check total length and alignment */
2914 				if (optused != sizeof (*or) ||
2915 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
2916 					goto opt_error;
2917 				/* Check value */
2918 				switch (*((uint16_t *)or->ip6or_value)) {
2919 				case IP6_ALERT_MLD:
2920 				case IP6_ALERT_RSVP:
2921 					ret = 1;
2922 				}
2923 				break;
2924 			}
2925 			case IP6OPT_HOME_ADDRESS: {
2926 				/*
2927 				 * Minimal support for the home address option
2928 				 * (which is required by all IPv6 nodes).
2929 				 * Implement by just swapping the home address
2930 				 * and source address.
2931 				 * XXX Note: this has IPsec implications since
2932 				 * AH needs to take this into account.
2933 				 * Also, when IPsec is used we need to ensure
2934 				 * that this is only processed once
2935 				 * in the received packet (to avoid swapping
2936 				 * back and forth).
2937 				 * NOTE:This option processing is considered
2938 				 * to be unsafe and prone to a denial of
2939 				 * service attack.
2940 				 * The current processing is not safe even with
2941 				 * IPsec secured IP packets. Since the home
2942 				 * address option processing requirement still
2943 				 * is in the IETF draft and in the process of
2944 				 * being redefined for its usage, it has been
2945 				 * decided to turn off the option by default.
2946 				 * If this section of code needs to be executed,
2947 				 * ndd variable ip6_ignore_home_address_opt
2948 				 * should be set to 0 at the user's own risk.
2949 				 */
2950 				struct ip6_opt_home_address *oh;
2951 				in6_addr_t tmp;
2952 
2953 				if (ipst->ips_ipv6_ignore_home_address_opt)
2954 					goto opt_error;
2955 
2956 				if (hdr_type != IPPROTO_DSTOPTS)
2957 					goto opt_error;
2958 				optused = 2 + optptr[1];
2959 				if (optused > optlen)
2960 					goto bad_opt;
2961 
2962 				/*
2963 				 * We did this dest. opt the first time
2964 				 * around (i.e. before AH processing).
2965 				 * If we've done AH... stop now.
2966 				 */
2967 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2968 				    ira->ira_ipsec_ah_sa != NULL)
2969 					break;
2970 
2971 				oh = (struct ip6_opt_home_address *)optptr;
2972 				/* Check total length and alignment */
2973 				if (optused < sizeof (*oh) ||
2974 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2975 					goto opt_error;
2976 				/* Swap ip6_src and the home address */
2977 				tmp = ip6h->ip6_src;
2978 				/* XXX Note: only 8 byte alignment option */
2979 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2980 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
2981 				break;
2982 			}
2983 
2984 			case IP6OPT_TUNNEL_LIMIT:
2985 				if (hdr_type != IPPROTO_DSTOPTS) {
2986 					goto opt_error;
2987 				}
2988 				optused = 2 + optptr[1];
2989 				if (optused > optlen) {
2990 					goto bad_opt;
2991 				}
2992 				if (optused != 3) {
2993 					goto opt_error;
2994 				}
2995 				break;
2996 
2997 			default:
2998 				errtype = "unknown";
2999 				/* FALLTHROUGH */
3000 			opt_error:
3001 				/* Determine which zone should send error */
3002 				switch (IP6OPT_TYPE(opt_type)) {
3003 				case IP6OPT_TYPE_SKIP:
3004 					optused = 2 + optptr[1];
3005 					if (optused > optlen)
3006 						goto bad_opt;
3007 					ip1dbg(("ip_process_options_v6: %s "
3008 					    "opt 0x%x skipped\n",
3009 					    errtype, opt_type));
3010 					break;
3011 				case IP6OPT_TYPE_DISCARD:
3012 					ip1dbg(("ip_process_options_v6: %s "
3013 					    "opt 0x%x; packet dropped\n",
3014 					    errtype, opt_type));
3015 					BUMP_MIB(ill->ill_ip_mib,
3016 					    ipIfStatsInHdrErrors);
3017 					ip_drop_input("ipIfStatsInHdrErrors",
3018 					    mp, ill);
3019 					freemsg(mp);
3020 					return (-1);
3021 				case IP6OPT_TYPE_ICMP:
3022 					BUMP_MIB(ill->ill_ip_mib,
3023 					    ipIfStatsInHdrErrors);
3024 					ip_drop_input("ipIfStatsInHdrErrors",
3025 					    mp, ill);
3026 					icmp_param_problem_v6(mp,
3027 					    ICMP6_PARAMPROB_OPTION,
3028 					    (uint32_t)(optptr -
3029 					    (uint8_t *)ip6h),
3030 					    B_FALSE, ira);
3031 					return (-1);
3032 				case IP6OPT_TYPE_FORCEICMP:
3033 					BUMP_MIB(ill->ill_ip_mib,
3034 					    ipIfStatsInHdrErrors);
3035 					ip_drop_input("ipIfStatsInHdrErrors",
3036 					    mp, ill);
3037 					icmp_param_problem_v6(mp,
3038 					    ICMP6_PARAMPROB_OPTION,
3039 					    (uint32_t)(optptr -
3040 					    (uint8_t *)ip6h),
3041 					    B_TRUE, ira);
3042 					return (-1);
3043 				default:
3044 					ASSERT(0);
3045 				}
3046 			}
3047 		}
3048 		optlen -= optused;
3049 		optptr += optused;
3050 	}
3051 	return (ret);
3052 
3053 bad_opt:
3054 	/* Determine which zone should send error */
3055 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3056 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3057 	    (uint32_t)(optptr - (uint8_t *)ip6h),
3058 	    B_FALSE, ira);
3059 	return (-1);
3060 }
3061 
3062 /*
3063  * Process a routing header that is not yet empty.
3064  * Because of RFC 5095, we now reject all route headers.
3065  */
3066 void
3067 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3068     ip_recv_attr_t *ira)
3069 {
3070 	ill_t		*ill = ira->ira_ill;
3071 	ip_stack_t	*ipst = ill->ill_ipst;
3072 
3073 	ASSERT(rth->ip6r_segleft != 0);
3074 
3075 	if (!ipst->ips_ipv6_forward_src_routed) {
3076 		/* XXX Check for source routed out same interface? */
3077 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3078 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3079 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3080 		freemsg(mp);
3081 		return;
3082 	}
3083 
3084 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3085 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3086 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3087 	    B_FALSE, ira);
3088 }
3089 
3090 /*
3091  * Read side put procedure for IPv6 module.
3092  */
3093 void
3094 ip_rput_v6(queue_t *q, mblk_t *mp)
3095 {
3096 	ill_t		*ill;
3097 
3098 	ill = (ill_t *)q->q_ptr;
3099 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3100 		union DL_primitives *dl;
3101 
3102 		dl = (union DL_primitives *)mp->b_rptr;
3103 		/*
3104 		 * Things are opening or closing - only accept DLPI
3105 		 * ack messages. If the stream is closing and ip_wsrv
3106 		 * has completed, ip_close is out of the qwait, but has
3107 		 * not yet completed qprocsoff. Don't proceed any further
3108 		 * because the ill has been cleaned up and things hanging
3109 		 * off the ill have been freed.
3110 		 */
3111 		if ((mp->b_datap->db_type != M_PCPROTO) ||
3112 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
3113 			inet_freemsg(mp);
3114 			return;
3115 		}
3116 	}
3117 	if (DB_TYPE(mp) == M_DATA) {
3118 		struct mac_header_info_s mhi;
3119 
3120 		ip_mdata_to_mhi(ill, mp, &mhi);
3121 		ip_input_v6(ill, NULL, mp, &mhi);
3122 	} else {
3123 		ip_rput_notdata(ill, mp);
3124 	}
3125 }
3126 
3127 /*
3128  * Walk through the IPv6 packet in mp and see if there's an AH header
3129  * in it.  See if the AH header needs to get done before other headers in
3130  * the packet.  (Worker function for ipsec_early_ah_v6().)
3131  */
3132 #define	IPSEC_HDR_DONT_PROCESS	0
3133 #define	IPSEC_HDR_PROCESS	1
3134 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
3135 static int
3136 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3137 {
3138 	uint_t	length;
3139 	uint_t	ehdrlen;
3140 	uint8_t *whereptr;
3141 	uint8_t *endptr;
3142 	uint8_t *nexthdrp;
3143 	ip6_dest_t *desthdr;
3144 	ip6_rthdr_t *rthdr;
3145 	ip6_t	*ip6h;
3146 
3147 	/*
3148 	 * For now just pullup everything.  In general, the less pullups,
3149 	 * the better, but there's so much squirrelling through anyway,
3150 	 * it's just easier this way.
3151 	 */
3152 	if (!pullupmsg(mp, -1)) {
3153 		return (IPSEC_MEMORY_ERROR);
3154 	}
3155 
3156 	ip6h = (ip6_t *)mp->b_rptr;
3157 	length = IPV6_HDR_LEN;
3158 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3159 	endptr = mp->b_wptr;
3160 
3161 	/*
3162 	 * We can't just use the argument nexthdr in the place
3163 	 * of nexthdrp becaue we don't dereference nexthdrp
3164 	 * till we confirm whether it is a valid address.
3165 	 */
3166 	nexthdrp = &ip6h->ip6_nxt;
3167 	while (whereptr < endptr) {
3168 		/* Is there enough left for len + nexthdr? */
3169 		if (whereptr + MIN_EHDR_LEN > endptr)
3170 			return (IPSEC_MEMORY_ERROR);
3171 
3172 		switch (*nexthdrp) {
3173 		case IPPROTO_HOPOPTS:
3174 		case IPPROTO_DSTOPTS:
3175 			/* Assumes the headers are identical for hbh and dst */
3176 			desthdr = (ip6_dest_t *)whereptr;
3177 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
3178 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
3179 				return (IPSEC_MEMORY_ERROR);
3180 			/*
3181 			 * Return DONT_PROCESS because the destination
3182 			 * options header may be for each hop in a
3183 			 * routing-header, and we only want AH if we're
3184 			 * finished with routing headers.
3185 			 */
3186 			if (*nexthdrp == IPPROTO_DSTOPTS)
3187 				return (IPSEC_HDR_DONT_PROCESS);
3188 			nexthdrp = &desthdr->ip6d_nxt;
3189 			break;
3190 		case IPPROTO_ROUTING:
3191 			rthdr = (ip6_rthdr_t *)whereptr;
3192 
3193 			/*
3194 			 * If there's more hops left on the routing header,
3195 			 * return now with DON'T PROCESS.
3196 			 */
3197 			if (rthdr->ip6r_segleft > 0)
3198 				return (IPSEC_HDR_DONT_PROCESS);
3199 
3200 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
3201 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
3202 				return (IPSEC_MEMORY_ERROR);
3203 			nexthdrp = &rthdr->ip6r_nxt;
3204 			break;
3205 		case IPPROTO_FRAGMENT:
3206 			/* Wait for reassembly */
3207 			return (IPSEC_HDR_DONT_PROCESS);
3208 		case IPPROTO_AH:
3209 			*nexthdr = IPPROTO_AH;
3210 			return (IPSEC_HDR_PROCESS);
3211 		case IPPROTO_NONE:
3212 			/* No next header means we're finished */
3213 		default:
3214 			return (IPSEC_HDR_DONT_PROCESS);
3215 		}
3216 		length += ehdrlen;
3217 		whereptr += ehdrlen;
3218 	}
3219 	/*
3220 	 * Malformed/truncated packet.
3221 	 */
3222 	return (IPSEC_MEMORY_ERROR);
3223 }
3224 
3225 /*
3226  * Path for AH if options are present.
3227  * Returns NULL if the mblk was consumed.
3228  *
3229  * Sometimes AH needs to be done before other IPv6 headers for security
3230  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3231  * indicates if that is so, and fans out to the appropriate IPsec protocol
3232  * for the datagram passed in.
3233  */
3234 mblk_t *
3235 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3236 {
3237 	uint8_t nexthdr;
3238 	ah_t *ah;
3239 	ill_t		*ill = ira->ira_ill;
3240 	ip_stack_t	*ipst = ill->ill_ipst;
3241 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3242 
3243 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3244 	case IPSEC_MEMORY_ERROR:
3245 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3246 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3247 		freemsg(mp);
3248 		return (NULL);
3249 	case IPSEC_HDR_DONT_PROCESS:
3250 		return (mp);
3251 	}
3252 
3253 	/* Default means send it to AH! */
3254 	ASSERT(nexthdr == IPPROTO_AH);
3255 
3256 	if (!ipsec_loaded(ipss)) {
3257 		ip_proto_not_sup(mp, ira);
3258 		return (NULL);
3259 	}
3260 
3261 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3262 	if (mp == NULL)
3263 		return (NULL);
3264 	ASSERT(ah != NULL);
3265 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3266 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
3267 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3268 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3269 
3270 	if (mp == NULL) {
3271 		/*
3272 		 * Either it failed or is pending. In the former case
3273 		 * ipIfStatsInDiscards was increased.
3274 		 */
3275 		return (NULL);
3276 	}
3277 
3278 	/* we're done with IPsec processing, send it up */
3279 	ip_input_post_ipsec(mp, ira);
3280 	return (NULL);
3281 }
3282 
3283 /*
3284  * Reassemble fragment.
3285  * When it returns a completed message the first mblk will only contain
3286  * the headers prior to the fragment header, with the nexthdr value updated
3287  * to be the header after the fragment header.
3288  */
3289 mblk_t *
3290 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3291     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3292 {
3293 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
3294 	uint16_t	offset;
3295 	boolean_t	more_frags;
3296 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
3297 	in6_addr_t	*v6dst_ptr;
3298 	in6_addr_t	*v6src_ptr;
3299 	uint_t		end;
3300 	uint_t		hdr_length;
3301 	size_t		count;
3302 	ipf_t		*ipf;
3303 	ipf_t		**ipfp;
3304 	ipfb_t		*ipfb;
3305 	mblk_t		*mp1;
3306 	uint8_t		ecn_info = 0;
3307 	size_t		msg_len;
3308 	mblk_t		*tail_mp;
3309 	mblk_t		*t_mp;
3310 	boolean_t	pruned = B_FALSE;
3311 	uint32_t	sum_val;
3312 	uint16_t	sum_flags;
3313 	ill_t		*ill = ira->ira_ill;
3314 	ip_stack_t	*ipst = ill->ill_ipst;
3315 	uint_t		prev_nexthdr_offset;
3316 	uint8_t		prev_nexthdr;
3317 	uint8_t		*ptr;
3318 	uint32_t	packet_size;
3319 
3320 	/*
3321 	 * We utilize hardware computed checksum info only for UDP since
3322 	 * IP fragmentation is a normal occurence for the protocol.  In
3323 	 * addition, checksum offload support for IP fragments carrying
3324 	 * UDP payload is commonly implemented across network adapters.
3325 	 */
3326 	ASSERT(ira->ira_rill != NULL);
3327 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
3328 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3329 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3330 		mblk_t *mp1 = mp->b_cont;
3331 		int32_t len;
3332 
3333 		/* Record checksum information from the packet */
3334 		sum_val = (uint32_t)DB_CKSUM16(mp);
3335 		sum_flags = DB_CKSUMFLAGS(mp);
3336 
3337 		/* fragmented payload offset from beginning of mblk */
3338 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3339 
3340 		if ((sum_flags & HCK_PARTIALCKSUM) &&
3341 		    (mp1 == NULL || mp1->b_cont == NULL) &&
3342 		    offset >= DB_CKSUMSTART(mp) &&
3343 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3344 			uint32_t adj;
3345 			/*
3346 			 * Partial checksum has been calculated by hardware
3347 			 * and attached to the packet; in addition, any
3348 			 * prepended extraneous data is even byte aligned.
3349 			 * If any such data exists, we adjust the checksum;
3350 			 * this would also handle any postpended data.
3351 			 */
3352 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3353 			    mp, mp1, len, adj);
3354 
3355 			/* One's complement subtract extraneous checksum */
3356 			if (adj >= sum_val)
3357 				sum_val = ~(adj - sum_val) & 0xFFFF;
3358 			else
3359 				sum_val -= adj;
3360 		}
3361 	} else {
3362 		sum_val = 0;
3363 		sum_flags = 0;
3364 	}
3365 
3366 	/* Clear hardware checksumming flag */
3367 	DB_CKSUMFLAGS(mp) = 0;
3368 
3369 	/*
3370 	 * Determine the offset (from the begining of the IP header)
3371 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3372 	 * this when removing the fragment header from the packet.
3373 	 * This packet consists of the IPv6 header, a potential
3374 	 * hop-by-hop options header, a potential pre-routing-header
3375 	 * destination options header, and a potential routing header.
3376 	 */
3377 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3378 	prev_nexthdr = ip6h->ip6_nxt;
3379 	ptr = (uint8_t *)&ip6h[1];
3380 
3381 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
3382 		ip6_hbh_t	*hbh_hdr;
3383 		uint_t		hdr_len;
3384 
3385 		hbh_hdr = (ip6_hbh_t *)ptr;
3386 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3387 		prev_nexthdr = hbh_hdr->ip6h_nxt;
3388 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3389 		    - (uint8_t *)ip6h;
3390 		ptr += hdr_len;
3391 	}
3392 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
3393 		ip6_dest_t	*dest_hdr;
3394 		uint_t		hdr_len;
3395 
3396 		dest_hdr = (ip6_dest_t *)ptr;
3397 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3398 		prev_nexthdr = dest_hdr->ip6d_nxt;
3399 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3400 		    - (uint8_t *)ip6h;
3401 		ptr += hdr_len;
3402 	}
3403 	if (prev_nexthdr == IPPROTO_ROUTING) {
3404 		ip6_rthdr_t	*rthdr;
3405 		uint_t		hdr_len;
3406 
3407 		rthdr = (ip6_rthdr_t *)ptr;
3408 		prev_nexthdr = rthdr->ip6r_nxt;
3409 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3410 		    - (uint8_t *)ip6h;
3411 		hdr_len = 8 * (rthdr->ip6r_len + 1);
3412 		ptr += hdr_len;
3413 	}
3414 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
3415 		/* Can't handle other headers before the fragment header */
3416 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3417 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3418 		freemsg(mp);
3419 		return (NULL);
3420 	}
3421 
3422 	/*
3423 	 * Note: Fragment offset in header is in 8-octet units.
3424 	 * Clearing least significant 3 bits not only extracts
3425 	 * it but also gets it in units of octets.
3426 	 */
3427 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3428 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3429 
3430 	/*
3431 	 * Is the more frags flag on and the payload length not a multiple
3432 	 * of eight?
3433 	 */
3434 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3435 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3436 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3437 		    (uint32_t)((char *)&ip6h->ip6_plen -
3438 		    (char *)ip6h), B_FALSE, ira);
3439 		return (NULL);
3440 	}
3441 
3442 	v6src_ptr = &ip6h->ip6_src;
3443 	v6dst_ptr = &ip6h->ip6_dst;
3444 	end = remlen;
3445 
3446 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3447 	end += offset;
3448 
3449 	/*
3450 	 * Would fragment cause reassembled packet to have a payload length
3451 	 * greater than IP_MAXPACKET - the max payload size?
3452 	 */
3453 	if (end > IP_MAXPACKET) {
3454 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3455 		ip_drop_input("Reassembled packet too large", mp, ill);
3456 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3457 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
3458 		    (char *)ip6h), B_FALSE, ira);
3459 		return (NULL);
3460 	}
3461 
3462 	/*
3463 	 * This packet just has one fragment. Reassembly not
3464 	 * needed.
3465 	 */
3466 	if (!more_frags && offset == 0) {
3467 		goto reass_done;
3468 	}
3469 
3470 	/*
3471 	 * Drop the fragmented as early as possible, if
3472 	 * we don't have resource(s) to re-assemble.
3473 	 */
3474 	if (ipst->ips_ip_reass_queue_bytes == 0) {
3475 		freemsg(mp);
3476 		return (NULL);
3477 	}
3478 
3479 	/* Record the ECN field info. */
3480 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3481 	/*
3482 	 * If this is not the first fragment, dump the unfragmentable
3483 	 * portion of the packet.
3484 	 */
3485 	if (offset)
3486 		mp->b_rptr = (uchar_t *)&fraghdr[1];
3487 
3488 	/*
3489 	 * Fragmentation reassembly.  Each ILL has a hash table for
3490 	 * queueing packets undergoing reassembly for all IPIFs
3491 	 * associated with the ILL.  The hash is based on the packet
3492 	 * IP ident field.  The ILL frag hash table was allocated
3493 	 * as a timer block at the time the ILL was created.  Whenever
3494 	 * there is anything on the reassembly queue, the timer will
3495 	 * be running.
3496 	 */
3497 	/* Handle vnic loopback of fragments */
3498 	if (mp->b_datap->db_ref > 2)
3499 		msg_len = 0;
3500 	else
3501 		msg_len = MBLKSIZE(mp);
3502 
3503 	tail_mp = mp;
3504 	while (tail_mp->b_cont != NULL) {
3505 		tail_mp = tail_mp->b_cont;
3506 		if (tail_mp->b_datap->db_ref <= 2)
3507 			msg_len += MBLKSIZE(tail_mp);
3508 	}
3509 	/*
3510 	 * If the reassembly list for this ILL will get too big
3511 	 * prune it.
3512 	 */
3513 
3514 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3515 	    ipst->ips_ip_reass_queue_bytes) {
3516 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3517 		    uint_t, ill->ill_frag_count,
3518 		    uint_t, ipst->ips_ip_reass_queue_bytes);
3519 		ill_frag_prune(ill,
3520 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3521 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
3522 		pruned = B_TRUE;
3523 	}
3524 
3525 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3526 	mutex_enter(&ipfb->ipfb_lock);
3527 
3528 	ipfp = &ipfb->ipfb_ipf;
3529 	/* Try to find an existing fragment queue for this packet. */
3530 	for (;;) {
3531 		ipf = ipfp[0];
3532 		if (ipf) {
3533 			/*
3534 			 * It has to match on ident, source address, and
3535 			 * dest address.
3536 			 */
3537 			if (ipf->ipf_ident == ident &&
3538 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3539 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3540 
3541 				/*
3542 				 * If we have received too many
3543 				 * duplicate fragments for this packet
3544 				 * free it.
3545 				 */
3546 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
3547 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
3548 					freemsg(mp);
3549 					mutex_exit(&ipfb->ipfb_lock);
3550 					return (NULL);
3551 				}
3552 
3553 				break;
3554 			}
3555 			ipfp = &ipf->ipf_hash_next;
3556 			continue;
3557 		}
3558 
3559 
3560 		/*
3561 		 * If we pruned the list, do we want to store this new
3562 		 * fragment?. We apply an optimization here based on the
3563 		 * fact that most fragments will be received in order.
3564 		 * So if the offset of this incoming fragment is zero,
3565 		 * it is the first fragment of a new packet. We will
3566 		 * keep it.  Otherwise drop the fragment, as we have
3567 		 * probably pruned the packet already (since the
3568 		 * packet cannot be found).
3569 		 */
3570 
3571 		if (pruned && offset != 0) {
3572 			mutex_exit(&ipfb->ipfb_lock);
3573 			freemsg(mp);
3574 			return (NULL);
3575 		}
3576 
3577 		/* New guy.  Allocate a frag message. */
3578 		mp1 = allocb(sizeof (*ipf), BPRI_MED);
3579 		if (!mp1) {
3580 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3581 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3582 			freemsg(mp);
3583 	partial_reass_done:
3584 			mutex_exit(&ipfb->ipfb_lock);
3585 			return (NULL);
3586 		}
3587 
3588 		if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3589 			/*
3590 			 * Too many fragmented packets in this hash bucket.
3591 			 * Free the oldest.
3592 			 */
3593 			ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3594 		}
3595 
3596 		mp1->b_cont = mp;
3597 
3598 		/* Initialize the fragment header. */
3599 		ipf = (ipf_t *)mp1->b_rptr;
3600 		ipf->ipf_mp = mp1;
3601 		ipf->ipf_ptphn = ipfp;
3602 		ipfp[0] = ipf;
3603 		ipf->ipf_hash_next = NULL;
3604 		ipf->ipf_ident = ident;
3605 		ipf->ipf_v6src = *v6src_ptr;
3606 		ipf->ipf_v6dst = *v6dst_ptr;
3607 		/* Record reassembly start time. */
3608 		ipf->ipf_timestamp = gethrestime_sec();
3609 		/* Record ipf generation and account for frag header */
3610 		ipf->ipf_gen = ill->ill_ipf_gen++;
3611 		ipf->ipf_count = MBLKSIZE(mp1);
3612 		ipf->ipf_protocol = nexthdr;
3613 		ipf->ipf_nf_hdr_len = 0;
3614 		ipf->ipf_prev_nexthdr_offset = 0;
3615 		ipf->ipf_last_frag_seen = B_FALSE;
3616 		ipf->ipf_ecn = ecn_info;
3617 		ipf->ipf_num_dups = 0;
3618 		ipfb->ipfb_frag_pkts++;
3619 		ipf->ipf_checksum = 0;
3620 		ipf->ipf_checksum_flags = 0;
3621 
3622 		/* Store checksum value in fragment header */
3623 		if (sum_flags != 0) {
3624 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3625 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3626 			ipf->ipf_checksum = sum_val;
3627 			ipf->ipf_checksum_flags = sum_flags;
3628 		}
3629 
3630 		/*
3631 		 * We handle reassembly two ways.  In the easy case,
3632 		 * where all the fragments show up in order, we do
3633 		 * minimal bookkeeping, and just clip new pieces on
3634 		 * the end.  If we ever see a hole, then we go off
3635 		 * to ip_reassemble which has to mark the pieces and
3636 		 * keep track of the number of holes, etc.  Obviously,
3637 		 * the point of having both mechanisms is so we can
3638 		 * handle the easy case as efficiently as possible.
3639 		 */
3640 		if (offset == 0) {
3641 			/* Easy case, in-order reassembly so far. */
3642 			/* Update the byte count */
3643 			ipf->ipf_count += msg_len;
3644 			ipf->ipf_tail_mp = tail_mp;
3645 			/*
3646 			 * Keep track of next expected offset in
3647 			 * ipf_end.
3648 			 */
3649 			ipf->ipf_end = end;
3650 			ipf->ipf_nf_hdr_len = hdr_length;
3651 			ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3652 		} else {
3653 			/* Hard case, hole at the beginning. */
3654 			ipf->ipf_tail_mp = NULL;
3655 			/*
3656 			 * ipf_end == 0 means that we have given up
3657 			 * on easy reassembly.
3658 			 */
3659 			ipf->ipf_end = 0;
3660 
3661 			/* Forget checksum offload from now on */
3662 			ipf->ipf_checksum_flags = 0;
3663 
3664 			/*
3665 			 * ipf_hole_cnt is set by ip_reassemble.
3666 			 * ipf_count is updated by ip_reassemble.
3667 			 * No need to check for return value here
3668 			 * as we don't expect reassembly to complete or
3669 			 * fail for the first fragment itself.
3670 			 */
3671 			(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3672 			    msg_len);
3673 		}
3674 		/* Update per ipfb and ill byte counts */
3675 		ipfb->ipfb_count += ipf->ipf_count;
3676 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3677 		atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3678 		/* If the frag timer wasn't already going, start it. */
3679 		mutex_enter(&ill->ill_lock);
3680 		ill_frag_timer_start(ill);
3681 		mutex_exit(&ill->ill_lock);
3682 		goto partial_reass_done;
3683 	}
3684 
3685 	/*
3686 	 * If the packet's flag has changed (it could be coming up
3687 	 * from an interface different than the previous, therefore
3688 	 * possibly different checksum capability), then forget about
3689 	 * any stored checksum states.  Otherwise add the value to
3690 	 * the existing one stored in the fragment header.
3691 	 */
3692 	if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3693 		sum_val += ipf->ipf_checksum;
3694 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3695 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3696 		ipf->ipf_checksum = sum_val;
3697 	} else if (ipf->ipf_checksum_flags != 0) {
3698 		/* Forget checksum offload from now on */
3699 		ipf->ipf_checksum_flags = 0;
3700 	}
3701 
3702 	/*
3703 	 * We have a new piece of a datagram which is already being
3704 	 * reassembled.  Update the ECN info if all IP fragments
3705 	 * are ECN capable.  If there is one which is not, clear
3706 	 * all the info.  If there is at least one which has CE
3707 	 * code point, IP needs to report that up to transport.
3708 	 */
3709 	if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3710 		if (ecn_info == IPH_ECN_CE)
3711 			ipf->ipf_ecn = IPH_ECN_CE;
3712 	} else {
3713 		ipf->ipf_ecn = IPH_ECN_NECT;
3714 	}
3715 
3716 	if (offset && ipf->ipf_end == offset) {
3717 		/* The new fragment fits at the end */
3718 		ipf->ipf_tail_mp->b_cont = mp;
3719 		/* Update the byte count */
3720 		ipf->ipf_count += msg_len;
3721 		/* Update per ipfb and ill byte counts */
3722 		ipfb->ipfb_count += msg_len;
3723 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3724 		atomic_add_32(&ill->ill_frag_count, msg_len);
3725 		if (more_frags) {
3726 			/* More to come. */
3727 			ipf->ipf_end = end;
3728 			ipf->ipf_tail_mp = tail_mp;
3729 			goto partial_reass_done;
3730 		}
3731 	} else {
3732 		/*
3733 		 * Go do the hard cases.
3734 		 * Call ip_reassemble().
3735 		 */
3736 		int ret;
3737 
3738 		if (offset == 0) {
3739 			if (ipf->ipf_prev_nexthdr_offset == 0) {
3740 				ipf->ipf_nf_hdr_len = hdr_length;
3741 				ipf->ipf_prev_nexthdr_offset =
3742 				    prev_nexthdr_offset;
3743 			}
3744 		}
3745 		/* Save current byte count */
3746 		count = ipf->ipf_count;
3747 		ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3748 
3749 		/* Count of bytes added and subtracted (freeb()ed) */
3750 		count = ipf->ipf_count - count;
3751 		if (count) {
3752 			/* Update per ipfb and ill byte counts */
3753 			ipfb->ipfb_count += count;
3754 			ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3755 			atomic_add_32(&ill->ill_frag_count, count);
3756 		}
3757 		if (ret == IP_REASS_PARTIAL) {
3758 			goto partial_reass_done;
3759 		} else if (ret == IP_REASS_FAILED) {
3760 			/* Reassembly failed. Free up all resources */
3761 			ill_frag_free_pkts(ill, ipfb, ipf, 1);
3762 			for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3763 				IP_REASS_SET_START(t_mp, 0);
3764 				IP_REASS_SET_END(t_mp, 0);
3765 			}
3766 			freemsg(mp);
3767 			goto partial_reass_done;
3768 		}
3769 
3770 		/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3771 	}
3772 	/*
3773 	 * We have completed reassembly.  Unhook the frag header from
3774 	 * the reassembly list.
3775 	 *
3776 	 * Grab the unfragmentable header length next header value out
3777 	 * of the first fragment
3778 	 */
3779 	ASSERT(ipf->ipf_nf_hdr_len != 0);
3780 	hdr_length = ipf->ipf_nf_hdr_len;
3781 
3782 	/*
3783 	 * Before we free the frag header, record the ECN info
3784 	 * to report back to the transport.
3785 	 */
3786 	ecn_info = ipf->ipf_ecn;
3787 
3788 	/*
3789 	 * Store the nextheader field in the header preceding the fragment
3790 	 * header
3791 	 */
3792 	nexthdr = ipf->ipf_protocol;
3793 	prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3794 	ipfp = ipf->ipf_ptphn;
3795 
3796 	/* We need to supply these to caller */
3797 	if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3798 		sum_val = ipf->ipf_checksum;
3799 	else
3800 		sum_val = 0;
3801 
3802 	mp1 = ipf->ipf_mp;
3803 	count = ipf->ipf_count;
3804 	ipf = ipf->ipf_hash_next;
3805 	if (ipf)
3806 		ipf->ipf_ptphn = ipfp;
3807 	ipfp[0] = ipf;
3808 	atomic_add_32(&ill->ill_frag_count, -count);
3809 	ASSERT(ipfb->ipfb_count >= count);
3810 	ipfb->ipfb_count -= count;
3811 	ipfb->ipfb_frag_pkts--;
3812 	mutex_exit(&ipfb->ipfb_lock);
3813 	/* Ditch the frag header. */
3814 	mp = mp1->b_cont;
3815 	freeb(mp1);
3816 
3817 	/*
3818 	 * Make sure the packet is good by doing some sanity
3819 	 * check. If bad we can silentely drop the packet.
3820 	 */
3821 reass_done:
3822 	if (hdr_length < sizeof (ip6_frag_t)) {
3823 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3824 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3825 		ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3826 		freemsg(mp);
3827 		return (NULL);
3828 	}
3829 
3830 	/*
3831 	 * Remove the fragment header from the initial header by
3832 	 * splitting the mblk into the non-fragmentable header and
3833 	 * everthing after the fragment extension header.  This has the
3834 	 * side effect of putting all the headers that need destination
3835 	 * processing into the b_cont block-- on return this fact is
3836 	 * used in order to avoid having to look at the extensions
3837 	 * already processed.
3838 	 *
3839 	 * Note that this code assumes that the unfragmentable portion
3840 	 * of the header is in the first mblk and increments
3841 	 * the read pointer past it.  If this assumption is broken
3842 	 * this code fails badly.
3843 	 */
3844 	if (mp->b_rptr + hdr_length != mp->b_wptr) {
3845 		mblk_t *nmp;
3846 
3847 		if (!(nmp = dupb(mp))) {
3848 			ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3849 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3850 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3851 			freemsg(mp);
3852 			return (NULL);
3853 		}
3854 		nmp->b_cont = mp->b_cont;
3855 		mp->b_cont = nmp;
3856 		nmp->b_rptr += hdr_length;
3857 	}
3858 	mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3859 
3860 	ip6h = (ip6_t *)mp->b_rptr;
3861 	((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3862 
3863 	/* Restore original IP length in header. */
3864 	packet_size = msgdsize(mp);
3865 	ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3866 	/* Record the ECN info. */
3867 	ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3868 	ip6h->ip6_vcf |= htonl(ecn_info << 20);
3869 
3870 	/* Update the receive attributes */
3871 	ira->ira_pktlen = packet_size;
3872 	ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3873 	ira->ira_protocol = nexthdr;
3874 
3875 	/* Reassembly is successful; set checksum information in packet */
3876 	DB_CKSUM16(mp) = (uint16_t)sum_val;
3877 	DB_CKSUMFLAGS(mp) = sum_flags;
3878 	DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3879 
3880 	return (mp);
3881 }
3882 
3883 /*
3884  * Given an mblk and a ptr, find the destination address in an IPv6 routing
3885  * header.
3886  */
3887 static in6_addr_t
3888 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3889 {
3890 	ip6_rthdr0_t *rt0;
3891 	int segleft, numaddr;
3892 	in6_addr_t *ap, rv = oldrv;
3893 
3894 	rt0 = (ip6_rthdr0_t *)whereptr;
3895 	if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3896 		DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3897 		    uint8_t *, whereptr);
3898 		return (rv);
3899 	}
3900 	segleft = rt0->ip6r0_segleft;
3901 	numaddr = rt0->ip6r0_len / 2;
3902 
3903 	if ((rt0->ip6r0_len & 0x1) ||
3904 	    (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3905 	    (segleft > rt0->ip6r0_len / 2)) {
3906 		/*
3907 		 * Corrupt packet.  Either the routing header length is odd
3908 		 * (can't happen) or mismatched compared to the packet, or the
3909 		 * number of addresses is.  Return what we can.  This will
3910 		 * only be a problem on forwarded packets that get squeezed
3911 		 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3912 		 */
3913 		DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3914 		    whereptr);
3915 		return (rv);
3916 	}
3917 
3918 	if (segleft != 0) {
3919 		ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3920 		rv = ap[numaddr - 1];
3921 	}
3922 
3923 	return (rv);
3924 }
3925 
3926 /*
3927  * Walk through the options to see if there is a routing header.
3928  * If present get the destination which is the last address of
3929  * the option.
3930  * mp needs to be provided in cases when the extension headers might span
3931  * b_cont; mp is never modified by this function.
3932  */
3933 in6_addr_t
3934 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3935 {
3936 	const mblk_t *current_mp = mp;
3937 	uint8_t nexthdr;
3938 	uint8_t *whereptr;
3939 	int ehdrlen;
3940 	in6_addr_t rv;
3941 
3942 	whereptr = (uint8_t *)ip6h;
3943 	ehdrlen = sizeof (ip6_t);
3944 
3945 	/* We assume at least the IPv6 base header is within one mblk. */
3946 	ASSERT(mp == NULL ||
3947 	    (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3948 
3949 	rv = ip6h->ip6_dst;
3950 	nexthdr = ip6h->ip6_nxt;
3951 	if (is_fragment != NULL)
3952 		*is_fragment = B_FALSE;
3953 
3954 	/*
3955 	 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3956 	 * no extension headers will be split across mblks.
3957 	 */
3958 
3959 	while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3960 	    nexthdr == IPPROTO_ROUTING) {
3961 		if (nexthdr == IPPROTO_ROUTING)
3962 			rv = pluck_out_dst(current_mp, whereptr, rv);
3963 
3964 		/*
3965 		 * All IPv6 extension headers have the next-header in byte
3966 		 * 0, and the (length - 8) in 8-byte-words.
3967 		 */
3968 		while (current_mp != NULL &&
3969 		    whereptr + ehdrlen >= current_mp->b_wptr) {
3970 			ehdrlen -= (current_mp->b_wptr - whereptr);
3971 			current_mp = current_mp->b_cont;
3972 			if (current_mp == NULL) {
3973 				/* Bad packet.  Return what we can. */
3974 				DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3975 				    mp, mblk_t *, current_mp, ip6_t *, ip6h);
3976 				goto done;
3977 			}
3978 			whereptr = current_mp->b_rptr;
3979 		}
3980 		whereptr += ehdrlen;
3981 
3982 		nexthdr = *whereptr;
3983 		ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3984 		ehdrlen = (*(whereptr + 1) + 1) * 8;
3985 	}
3986 
3987 done:
3988 	if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3989 		*is_fragment = B_TRUE;
3990 	return (rv);
3991 }
3992 
3993 /*
3994  * ip_source_routed_v6:
3995  * This function is called by redirect code (called from ip_input_v6) to
3996  * know whether this packet is source routed through this node i.e
3997  * whether this node (router) is part of the journey. This
3998  * function is called under two cases :
3999  *
4000  * case 1 : Routing header was processed by this node and
4001  *	    ip_process_rthdr replaced ip6_dst with the next hop
4002  *	    and we are forwarding the packet to the next hop.
4003  *
4004  * case 2 : Routing header was not processed by this node and we
4005  *	    are just forwarding the packet.
4006  *
4007  * For case (1) we don't want to send redirects. For case(2) we
4008  * want to send redirects.
4009  */
4010 static boolean_t
4011 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4012 {
4013 	uint8_t		nexthdr;
4014 	in6_addr_t	*addrptr;
4015 	ip6_rthdr0_t	*rthdr;
4016 	uint8_t		numaddr;
4017 	ip6_hbh_t	*hbhhdr;
4018 	uint_t		ehdrlen;
4019 	uint8_t		*byteptr;
4020 
4021 	ip2dbg(("ip_source_routed_v6\n"));
4022 	nexthdr = ip6h->ip6_nxt;
4023 	ehdrlen = IPV6_HDR_LEN;
4024 
4025 	/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4026 	while (nexthdr == IPPROTO_HOPOPTS ||
4027 	    nexthdr == IPPROTO_DSTOPTS) {
4028 		byteptr = (uint8_t *)ip6h + ehdrlen;
4029 		/*
4030 		 * Check if we have already processed
4031 		 * packets or we are just a forwarding
4032 		 * router which only pulled up msgs up
4033 		 * to IPV6HDR and  one HBH ext header
4034 		 */
4035 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4036 			ip2dbg(("ip_source_routed_v6: Extension"
4037 			    " headers not processed\n"));
4038 			return (B_FALSE);
4039 		}
4040 		hbhhdr = (ip6_hbh_t *)byteptr;
4041 		nexthdr = hbhhdr->ip6h_nxt;
4042 		ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4043 	}
4044 	switch (nexthdr) {
4045 	case IPPROTO_ROUTING:
4046 		byteptr = (uint8_t *)ip6h + ehdrlen;
4047 		/*
4048 		 * If for some reason, we haven't pulled up
4049 		 * the routing hdr data mblk, then we must
4050 		 * not have processed it at all. So for sure
4051 		 * we are not part of the source routed journey.
4052 		 */
4053 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4054 			ip2dbg(("ip_source_routed_v6: Routing"
4055 			    " header not processed\n"));
4056 			return (B_FALSE);
4057 		}
4058 		rthdr = (ip6_rthdr0_t *)byteptr;
4059 		/*
4060 		 * Either we are an intermediate router or the
4061 		 * last hop before destination and we have
4062 		 * already processed the routing header.
4063 		 * If segment_left is greater than or equal to zero,
4064 		 * then we must be the (numaddr - segleft) entry
4065 		 * of the routing header. Although ip6r0_segleft
4066 		 * is a unit8_t variable, we still check for zero
4067 		 * or greater value, if in case the data type
4068 		 * is changed someday in future.
4069 		 */
4070 		if (rthdr->ip6r0_segleft > 0 ||
4071 		    rthdr->ip6r0_segleft == 0) {
4072 			numaddr = rthdr->ip6r0_len / 2;
4073 			addrptr = (in6_addr_t *)((char *)rthdr +
4074 			    sizeof (*rthdr));
4075 			addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4076 			if (addrptr != NULL) {
4077 				if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4078 					return (B_TRUE);
4079 				ip1dbg(("ip_source_routed_v6: Not local\n"));
4080 			}
4081 		}
4082 	/* FALLTHRU */
4083 	default:
4084 		ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4085 		return (B_FALSE);
4086 	}
4087 }
4088 
4089 /*
4090  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4091  * We have not optimized this in terms of number of mblks
4092  * allocated. For instance, for each fragment sent we always allocate a
4093  * mblk to hold the IPv6 header and fragment header.
4094  *
4095  * Assumes that all the extension headers are contained in the first mblk
4096  * and that the fragment header has has already been added by calling
4097  * ip_fraghdr_add_v6.
4098  */
4099 int
4100 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4101     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4102     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4103 {
4104 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4105 	ip6_t		*fip6h;
4106 	mblk_t		*hmp;
4107 	mblk_t		*hmp0;
4108 	mblk_t		*dmp;
4109 	ip6_frag_t	*fraghdr;
4110 	size_t		unfragmentable_len;
4111 	size_t		mlen;
4112 	size_t		max_chunk;
4113 	uint16_t	off_flags;
4114 	uint16_t	offset = 0;
4115 	ill_t		*ill = nce->nce_ill;
4116 	uint8_t		nexthdr;
4117 	uint8_t		*ptr;
4118 	ip_stack_t	*ipst = ill->ill_ipst;
4119 	uint_t		priority = mp->b_band;
4120 	int		error = 0;
4121 
4122 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4123 	if (max_frag == 0) {
4124 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4125 		ip_drop_output("FragFails: zero max_frag", mp, ill);
4126 		freemsg(mp);
4127 		return (EINVAL);
4128 	}
4129 
4130 	/*
4131 	 * Caller should have added fraghdr_t to pkt_len, and also
4132 	 * updated ip6_plen.
4133 	 */
4134 	ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4135 	ASSERT(msgdsize(mp) == pkt_len);
4136 
4137 	/*
4138 	 * Determine the length of the unfragmentable portion of this
4139 	 * datagram.  This consists of the IPv6 header, a potential
4140 	 * hop-by-hop options header, a potential pre-routing-header
4141 	 * destination options header, and a potential routing header.
4142 	 */
4143 	nexthdr = ip6h->ip6_nxt;
4144 	ptr = (uint8_t *)&ip6h[1];
4145 
4146 	if (nexthdr == IPPROTO_HOPOPTS) {
4147 		ip6_hbh_t	*hbh_hdr;
4148 		uint_t		hdr_len;
4149 
4150 		hbh_hdr = (ip6_hbh_t *)ptr;
4151 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4152 		nexthdr = hbh_hdr->ip6h_nxt;
4153 		ptr += hdr_len;
4154 	}
4155 	if (nexthdr == IPPROTO_DSTOPTS) {
4156 		ip6_dest_t	*dest_hdr;
4157 		uint_t		hdr_len;
4158 
4159 		dest_hdr = (ip6_dest_t *)ptr;
4160 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4161 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4162 			nexthdr = dest_hdr->ip6d_nxt;
4163 			ptr += hdr_len;
4164 		}
4165 	}
4166 	if (nexthdr == IPPROTO_ROUTING) {
4167 		ip6_rthdr_t	*rthdr;
4168 		uint_t		hdr_len;
4169 
4170 		rthdr = (ip6_rthdr_t *)ptr;
4171 		nexthdr = rthdr->ip6r_nxt;
4172 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4173 		ptr += hdr_len;
4174 	}
4175 	if (nexthdr != IPPROTO_FRAGMENT) {
4176 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4177 		ip_drop_output("FragFails: bad nexthdr", mp, ill);
4178 		freemsg(mp);
4179 		return (EINVAL);
4180 	}
4181 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4182 	unfragmentable_len += sizeof (ip6_frag_t);
4183 
4184 	max_chunk = (max_frag - unfragmentable_len) & ~7;
4185 
4186 	/*
4187 	 * Allocate an mblk with enough room for the link-layer
4188 	 * header and the unfragmentable part of the datagram, which includes
4189 	 * the fragment header.  This (or a copy) will be used as the
4190 	 * first mblk for each fragment we send.
4191 	 */
4192 	hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4193 	if (hmp == NULL) {
4194 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4195 		ip_drop_output("FragFails: no hmp", mp, ill);
4196 		freemsg(mp);
4197 		return (ENOBUFS);
4198 	}
4199 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4200 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4201 
4202 	fip6h = (ip6_t *)hmp->b_rptr;
4203 	bcopy(ip6h, fip6h, unfragmentable_len);
4204 
4205 	/*
4206 	 * pkt_len is set to the total length of the fragmentable data in this
4207 	 * datagram.  For each fragment sent, we will decrement pkt_len
4208 	 * by the amount of fragmentable data sent in that fragment
4209 	 * until len reaches zero.
4210 	 */
4211 	pkt_len -= unfragmentable_len;
4212 
4213 	/*
4214 	 * Move read ptr past unfragmentable portion, we don't want this part
4215 	 * of the data in our fragments.
4216 	 */
4217 	mp->b_rptr += unfragmentable_len;
4218 	if (mp->b_rptr == mp->b_wptr) {
4219 		mblk_t *mp1 = mp->b_cont;
4220 		freeb(mp);
4221 		mp = mp1;
4222 	}
4223 
4224 	while (pkt_len != 0) {
4225 		mlen = MIN(pkt_len, max_chunk);
4226 		pkt_len -= mlen;
4227 		if (pkt_len != 0) {
4228 			/* Not last */
4229 			hmp0 = copyb(hmp);
4230 			if (hmp0 == NULL) {
4231 				BUMP_MIB(ill->ill_ip_mib,
4232 				    ipIfStatsOutFragFails);
4233 				ip_drop_output("FragFails: copyb failed",
4234 				    mp, ill);
4235 				freeb(hmp);
4236 				freemsg(mp);
4237 				ip1dbg(("ip_fragment_v6: copyb failed\n"));
4238 				return (ENOBUFS);
4239 			}
4240 			off_flags = IP6F_MORE_FRAG;
4241 		} else {
4242 			/* Last fragment */
4243 			hmp0 = hmp;
4244 			hmp = NULL;
4245 			off_flags = 0;
4246 		}
4247 		fip6h = (ip6_t *)(hmp0->b_rptr);
4248 		fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4249 		    sizeof (ip6_frag_t));
4250 
4251 		fip6h->ip6_plen = htons((uint16_t)(mlen +
4252 		    unfragmentable_len - IPV6_HDR_LEN));
4253 		/*
4254 		 * Note: Optimization alert.
4255 		 * In IPv6 (and IPv4) protocol header, Fragment Offset
4256 		 * ("offset") is 13 bits wide and in 8-octet units.
4257 		 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4258 		 * it occupies the most significant 13 bits.
4259 		 * (least significant 13 bits in IPv4).
4260 		 * We do not do any shifts here. Not shifting is same effect
4261 		 * as taking offset value in octet units, dividing by 8 and
4262 		 * then shifting 3 bits left to line it up in place in proper
4263 		 * place protocol header.
4264 		 */
4265 		fraghdr->ip6f_offlg = htons(offset) | off_flags;
4266 
4267 		if (!(dmp = ip_carve_mp(&mp, mlen))) {
4268 			/* mp has already been freed by ip_carve_mp() */
4269 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4270 			ip_drop_output("FragFails: could not carve mp",
4271 			    hmp0, ill);
4272 			if (hmp != NULL)
4273 				freeb(hmp);
4274 			freeb(hmp0);
4275 			ip1dbg(("ip_carve_mp: failed\n"));
4276 			return (ENOBUFS);
4277 		}
4278 		hmp0->b_cont = dmp;
4279 		/* Get the priority marking, if any */
4280 		hmp0->b_band = priority;
4281 
4282 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4283 
4284 		error = postfragfn(hmp0, nce, ixaflags,
4285 		    mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4286 		    ixa_cookie);
4287 		if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4288 			/* No point in sending the other fragments */
4289 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4290 			ip_drop_output("FragFails: postfragfn failed",
4291 			    hmp, ill);
4292 			freeb(hmp);
4293 			freemsg(mp);
4294 			return (error);
4295 		}
4296 		/* No need to redo state machine in loop */
4297 		ixaflags &= ~IXAF_REACH_CONF;
4298 
4299 		offset += mlen;
4300 	}
4301 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4302 	return (error);
4303 }
4304 
4305 /*
4306  * Add a fragment header to an IPv6 packet.
4307  * Assumes that all the extension headers are contained in the first mblk.
4308  *
4309  * The fragment header is inserted after an hop-by-hop options header
4310  * and after [an optional destinations header followed by] a routing header.
4311  */
4312 mblk_t *
4313 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4314 {
4315 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4316 	ip6_t		*fip6h;
4317 	mblk_t		*hmp;
4318 	ip6_frag_t	*fraghdr;
4319 	size_t		unfragmentable_len;
4320 	uint8_t		nexthdr;
4321 	uint_t		prev_nexthdr_offset;
4322 	uint8_t		*ptr;
4323 	uint_t		priority = mp->b_band;
4324 	ip_stack_t	*ipst = ixa->ixa_ipst;
4325 
4326 	/*
4327 	 * Determine the length of the unfragmentable portion of this
4328 	 * datagram.  This consists of the IPv6 header, a potential
4329 	 * hop-by-hop options header, a potential pre-routing-header
4330 	 * destination options header, and a potential routing header.
4331 	 */
4332 	nexthdr = ip6h->ip6_nxt;
4333 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4334 	ptr = (uint8_t *)&ip6h[1];
4335 
4336 	if (nexthdr == IPPROTO_HOPOPTS) {
4337 		ip6_hbh_t	*hbh_hdr;
4338 		uint_t		hdr_len;
4339 
4340 		hbh_hdr = (ip6_hbh_t *)ptr;
4341 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4342 		nexthdr = hbh_hdr->ip6h_nxt;
4343 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4344 		    - (uint8_t *)ip6h;
4345 		ptr += hdr_len;
4346 	}
4347 	if (nexthdr == IPPROTO_DSTOPTS) {
4348 		ip6_dest_t	*dest_hdr;
4349 		uint_t		hdr_len;
4350 
4351 		dest_hdr = (ip6_dest_t *)ptr;
4352 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4353 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4354 			nexthdr = dest_hdr->ip6d_nxt;
4355 			prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4356 			    - (uint8_t *)ip6h;
4357 			ptr += hdr_len;
4358 		}
4359 	}
4360 	if (nexthdr == IPPROTO_ROUTING) {
4361 		ip6_rthdr_t	*rthdr;
4362 		uint_t		hdr_len;
4363 
4364 		rthdr = (ip6_rthdr_t *)ptr;
4365 		nexthdr = rthdr->ip6r_nxt;
4366 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4367 		    - (uint8_t *)ip6h;
4368 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4369 		ptr += hdr_len;
4370 	}
4371 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4372 
4373 	/*
4374 	 * Allocate an mblk with enough room for the link-layer
4375 	 * header, the unfragmentable part of the datagram, and the
4376 	 * fragment header.
4377 	 */
4378 	hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4379 	    ipst->ips_ip_wroff_extra, mp);
4380 	if (hmp == NULL) {
4381 		ill_t *ill = ixa->ixa_nce->nce_ill;
4382 
4383 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4384 		ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4385 		freemsg(mp);
4386 		return (NULL);
4387 	}
4388 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4389 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4390 
4391 	fip6h = (ip6_t *)hmp->b_rptr;
4392 	fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4393 
4394 	bcopy(ip6h, fip6h, unfragmentable_len);
4395 	fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4396 	hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4397 
4398 	fraghdr->ip6f_nxt = nexthdr;
4399 	fraghdr->ip6f_reserved = 0;
4400 	fraghdr->ip6f_offlg = 0;
4401 	fraghdr->ip6f_ident = htonl(ident);
4402 
4403 	/* Get the priority marking, if any */
4404 	hmp->b_band = priority;
4405 
4406 	/*
4407 	 * Move read ptr past unfragmentable portion, we don't want this part
4408 	 * of the data in our fragments.
4409 	 */
4410 	mp->b_rptr += unfragmentable_len;
4411 	hmp->b_cont = mp;
4412 	return (hmp);
4413 }
4414 
4415 /*
4416  * Determine if the ill and multicast aspects of that packets
4417  * "matches" the conn.
4418  */
4419 boolean_t
4420 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4421 {
4422 	ill_t		*ill = ira->ira_rill;
4423 	zoneid_t	zoneid = ira->ira_zoneid;
4424 	uint_t		in_ifindex;
4425 	in6_addr_t	*v6dst_ptr = &ip6h->ip6_dst;
4426 	in6_addr_t	*v6src_ptr = &ip6h->ip6_src;
4427 
4428 	/*
4429 	 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4430 	 * scopeid. This is used to limit
4431 	 * unicast and multicast reception to conn_incoming_ifindex.
4432 	 * conn_wantpacket_v6 is called both for unicast and
4433 	 * multicast packets.
4434 	 */
4435 	in_ifindex = connp->conn_incoming_ifindex;
4436 
4437 	/* mpathd can bind to the under IPMP interface, which we allow */
4438 	if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4439 		if (!IS_UNDER_IPMP(ill))
4440 			return (B_FALSE);
4441 
4442 		if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4443 			return (B_FALSE);
4444 	}
4445 
4446 	if (!IPCL_ZONE_MATCH(connp, zoneid))
4447 		return (B_FALSE);
4448 
4449 	if (!(ira->ira_flags & IRAF_MULTICAST))
4450 		return (B_TRUE);
4451 
4452 	if (connp->conn_multi_router)
4453 		return (B_TRUE);
4454 
4455 	if (ira->ira_protocol == IPPROTO_RSVP)
4456 		return (B_TRUE);
4457 
4458 	return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4459 	    ira->ira_ill));
4460 }
4461 
4462 /*
4463  * pr_addr_dbg function provides the needed buffer space to call
4464  * inet_ntop() function's 3rd argument. This function should be
4465  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4466  * stack buffer space in it's own stack frame. This function uses
4467  * a buffer from it's own stack and prints the information.
4468  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4469  *
4470  * Note:    This function can call inet_ntop() once.
4471  */
4472 void
4473 pr_addr_dbg(char *fmt1, int af, const void *addr)
4474 {
4475 	char	buf[INET6_ADDRSTRLEN];
4476 
4477 	if (fmt1 == NULL) {
4478 		ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4479 		return;
4480 	}
4481 
4482 	/*
4483 	 * This does not compare debug level and just prints
4484 	 * out. Thus it is the responsibility of the caller
4485 	 * to check the appropriate debug-level before calling
4486 	 * this function.
4487 	 */
4488 	if (ip_debug > 0) {
4489 		printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4490 	}
4491 
4492 
4493 }
4494 
4495 
4496 /*
4497  * Return the length in bytes of the IPv6 headers (base header
4498  * extension headers) that will be needed based on the
4499  * ip_pkt_t structure passed by the caller.
4500  *
4501  * The returned length does not include the length of the upper level
4502  * protocol (ULP) header.
4503  */
4504 int
4505 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4506 {
4507 	int len;
4508 
4509 	len = IPV6_HDR_LEN;
4510 
4511 	/*
4512 	 * If there's a security label here, then we ignore any hop-by-hop
4513 	 * options the user may try to set.
4514 	 */
4515 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4516 		uint_t hopoptslen;
4517 		/*
4518 		 * Note that ipp_label_len_v6 is just the option - not
4519 		 * the hopopts extension header. It also needs to be padded
4520 		 * to a multiple of 8 bytes.
4521 		 */
4522 		ASSERT(ipp->ipp_label_len_v6 != 0);
4523 		hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4524 		hopoptslen = (hopoptslen + 7)/8 * 8;
4525 		len += hopoptslen;
4526 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4527 		ASSERT(ipp->ipp_hopoptslen != 0);
4528 		len += ipp->ipp_hopoptslen;
4529 	}
4530 
4531 	/*
4532 	 * En-route destination options
4533 	 * Only do them if there's a routing header as well
4534 	 */
4535 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4536 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4537 		ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4538 		len += ipp->ipp_rthdrdstoptslen;
4539 	}
4540 	if (ipp->ipp_fields & IPPF_RTHDR) {
4541 		ASSERT(ipp->ipp_rthdrlen != 0);
4542 		len += ipp->ipp_rthdrlen;
4543 	}
4544 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4545 		ASSERT(ipp->ipp_dstoptslen != 0);
4546 		len += ipp->ipp_dstoptslen;
4547 	}
4548 	return (len);
4549 }
4550 
4551 /*
4552  * All-purpose routine to build a header chain of an IPv6 header
4553  * followed by any required extension headers and a proto header.
4554  *
4555  * The caller has to set the source and destination address as well as
4556  * ip6_plen. The caller has to massage any routing header and compensate
4557  * for the ULP pseudo-header checksum due to the source route.
4558  *
4559  * The extension headers will all be fully filled in.
4560  */
4561 void
4562 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4563     uint8_t protocol, uint32_t flowinfo)
4564 {
4565 	uint8_t *nxthdr_ptr;
4566 	uint8_t *cp;
4567 	ip6_t	*ip6h = (ip6_t *)buf;
4568 
4569 	/* Initialize IPv6 header */
4570 	ip6h->ip6_vcf =
4571 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4572 	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4573 
4574 	if (ipp->ipp_fields & IPPF_TCLASS) {
4575 		/* Overrides the class part of flowinfo */
4576 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4577 		    ipp->ipp_tclass);
4578 	}
4579 
4580 	if (ipp->ipp_fields & IPPF_HOPLIMIT)
4581 		ip6h->ip6_hops = ipp->ipp_hoplimit;
4582 	else
4583 		ip6h->ip6_hops = ipp->ipp_unicast_hops;
4584 
4585 	if ((ipp->ipp_fields & IPPF_ADDR) &&
4586 	    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4587 		ip6h->ip6_src = ipp->ipp_addr;
4588 
4589 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4590 	cp = (uint8_t *)&ip6h[1];
4591 	/*
4592 	 * Here's where we have to start stringing together
4593 	 * any extension headers in the right order:
4594 	 * Hop-by-hop, destination, routing, and final destination opts.
4595 	 */
4596 	/*
4597 	 * If there's a security label here, then we ignore any hop-by-hop
4598 	 * options the user may try to set.
4599 	 */
4600 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4601 		/*
4602 		 * Hop-by-hop options with the label.
4603 		 * Note that ipp_label_v6 is just the option - not
4604 		 * the hopopts extension header. It also needs to be padded
4605 		 * to a multiple of 8 bytes.
4606 		 */
4607 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4608 		uint_t hopoptslen;
4609 		uint_t padlen;
4610 
4611 		padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4612 		hopoptslen = (padlen + 7)/8 * 8;
4613 		padlen = hopoptslen - padlen;
4614 
4615 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4616 		nxthdr_ptr = &hbh->ip6h_nxt;
4617 		hbh->ip6h_len = hopoptslen/8 - 1;
4618 		cp += sizeof (ip6_hbh_t);
4619 		bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4620 		cp += ipp->ipp_label_len_v6;
4621 
4622 		ASSERT(padlen <= 7);
4623 		switch (padlen) {
4624 		case 0:
4625 			break;
4626 		case 1:
4627 			cp[0] = IP6OPT_PAD1;
4628 			break;
4629 		default:
4630 			cp[0] = IP6OPT_PADN;
4631 			cp[1] = padlen - 2;
4632 			bzero(&cp[2], padlen - 2);
4633 			break;
4634 		}
4635 		cp += padlen;
4636 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4637 		/* Hop-by-hop options */
4638 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4639 
4640 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4641 		nxthdr_ptr = &hbh->ip6h_nxt;
4642 
4643 		bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4644 		cp += ipp->ipp_hopoptslen;
4645 	}
4646 	/*
4647 	 * En-route destination options
4648 	 * Only do them if there's a routing header as well
4649 	 */
4650 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4651 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4652 		ip6_dest_t *dst = (ip6_dest_t *)cp;
4653 
4654 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4655 		nxthdr_ptr = &dst->ip6d_nxt;
4656 
4657 		bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4658 		cp += ipp->ipp_rthdrdstoptslen;
4659 	}
4660 	/*
4661 	 * Routing header next
4662 	 */
4663 	if (ipp->ipp_fields & IPPF_RTHDR) {
4664 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4665 
4666 		*nxthdr_ptr = IPPROTO_ROUTING;
4667 		nxthdr_ptr = &rt->ip6r_nxt;
4668 
4669 		bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4670 		cp += ipp->ipp_rthdrlen;
4671 	}
4672 	/*
4673 	 * Do ultimate destination options
4674 	 */
4675 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4676 		ip6_dest_t *dest = (ip6_dest_t *)cp;
4677 
4678 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4679 		nxthdr_ptr = &dest->ip6d_nxt;
4680 
4681 		bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4682 		cp += ipp->ipp_dstoptslen;
4683 	}
4684 	/*
4685 	 * Now set the last header pointer to the proto passed in
4686 	 */
4687 	*nxthdr_ptr = protocol;
4688 	ASSERT((int)(cp - buf) == buf_len);
4689 }
4690 
4691 /*
4692  * Return a pointer to the routing header extension header
4693  * in the IPv6 header(s) chain passed in.
4694  * If none found, return NULL
4695  * Assumes that all extension headers are in same mblk as the v6 header
4696  */
4697 ip6_rthdr_t *
4698 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4699 {
4700 	ip6_dest_t	*desthdr;
4701 	ip6_frag_t	*fraghdr;
4702 	uint_t		hdrlen;
4703 	uint8_t		nexthdr;
4704 	uint8_t		*ptr = (uint8_t *)&ip6h[1];
4705 
4706 	if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4707 		return ((ip6_rthdr_t *)ptr);
4708 
4709 	/*
4710 	 * The routing header will precede all extension headers
4711 	 * other than the hop-by-hop and destination options
4712 	 * extension headers, so if we see anything other than those,
4713 	 * we're done and didn't find it.
4714 	 * We could see a destination options header alone but no
4715 	 * routing header, in which case we'll return NULL as soon as
4716 	 * we see anything after that.
4717 	 * Hop-by-hop and destination option headers are identical,
4718 	 * so we can use either one we want as a template.
4719 	 */
4720 	nexthdr = ip6h->ip6_nxt;
4721 	while (ptr < endptr) {
4722 		/* Is there enough left for len + nexthdr? */
4723 		if (ptr + MIN_EHDR_LEN > endptr)
4724 			return (NULL);
4725 
4726 		switch (nexthdr) {
4727 		case IPPROTO_HOPOPTS:
4728 		case IPPROTO_DSTOPTS:
4729 			/* Assumes the headers are identical for hbh and dst */
4730 			desthdr = (ip6_dest_t *)ptr;
4731 			hdrlen = 8 * (desthdr->ip6d_len + 1);
4732 			nexthdr = desthdr->ip6d_nxt;
4733 			break;
4734 
4735 		case IPPROTO_ROUTING:
4736 			return ((ip6_rthdr_t *)ptr);
4737 
4738 		case IPPROTO_FRAGMENT:
4739 			fraghdr = (ip6_frag_t *)ptr;
4740 			hdrlen = sizeof (ip6_frag_t);
4741 			nexthdr = fraghdr->ip6f_nxt;
4742 			break;
4743 
4744 		default:
4745 			return (NULL);
4746 		}
4747 		ptr += hdrlen;
4748 	}
4749 	return (NULL);
4750 }
4751 
4752 /*
4753  * Called for source-routed packets originating on this node.
4754  * Manipulates the original routing header by moving every entry up
4755  * one slot, placing the first entry in the v6 header's v6_dst field,
4756  * and placing the ultimate destination in the routing header's last
4757  * slot.
4758  *
4759  * Returns the checksum diference between the ultimate destination
4760  * (last hop in the routing header when the packet is sent) and
4761  * the first hop (ip6_dst when the packet is sent)
4762  */
4763 /* ARGSUSED2 */
4764 uint32_t
4765 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4766 {
4767 	uint_t		numaddr;
4768 	uint_t		i;
4769 	in6_addr_t	*addrptr;
4770 	in6_addr_t	tmp;
4771 	ip6_rthdr0_t	*rthdr = (ip6_rthdr0_t *)rth;
4772 	uint32_t	cksm;
4773 	uint32_t	addrsum = 0;
4774 	uint16_t	*ptr;
4775 
4776 	/*
4777 	 * Perform any processing needed for source routing.
4778 	 * We know that all extension headers will be in the same mblk
4779 	 * as the IPv6 header.
4780 	 */
4781 
4782 	/*
4783 	 * If no segments left in header, or the header length field is zero,
4784 	 * don't move hop addresses around;
4785 	 * Checksum difference is zero.
4786 	 */
4787 	if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4788 		return (0);
4789 
4790 	ptr = (uint16_t *)&ip6h->ip6_dst;
4791 	cksm = 0;
4792 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4793 		cksm += ptr[i];
4794 	}
4795 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4796 
4797 	/*
4798 	 * Here's where the fun begins - we have to
4799 	 * move all addresses up one spot, take the
4800 	 * first hop and make it our first ip6_dst,
4801 	 * and place the ultimate destination in the
4802 	 * newly-opened last slot.
4803 	 */
4804 	addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4805 	numaddr = rthdr->ip6r0_len / 2;
4806 	tmp = *addrptr;
4807 	for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4808 		*addrptr = addrptr[1];
4809 	}
4810 	*addrptr = ip6h->ip6_dst;
4811 	ip6h->ip6_dst = tmp;
4812 
4813 	/*
4814 	 * From the checksummed ultimate destination subtract the checksummed
4815 	 * current ip6_dst (the first hop address). Return that number.
4816 	 * (In the v4 case, the second part of this is done in each routine
4817 	 *  that calls ip_massage_options(). We do it all in this one place
4818 	 *  for v6).
4819 	 */
4820 	ptr = (uint16_t *)&ip6h->ip6_dst;
4821 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4822 		addrsum += ptr[i];
4823 	}
4824 	cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4825 	if ((int)cksm < 0)
4826 		cksm--;
4827 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4828 
4829 	return (cksm);
4830 }
4831 
4832 void
4833 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4834 {
4835 	kstat_t *ksp;
4836 
4837 	ip6_stat_t template = {
4838 		{ "ip6_udp_fannorm", 	KSTAT_DATA_UINT64 },
4839 		{ "ip6_udp_fanmb", 	KSTAT_DATA_UINT64 },
4840 		{ "ip6_recv_pullup", 		KSTAT_DATA_UINT64 },
4841 		{ "ip6_db_ref",			KSTAT_DATA_UINT64 },
4842 		{ "ip6_notaligned",		KSTAT_DATA_UINT64 },
4843 		{ "ip6_multimblk",		KSTAT_DATA_UINT64 },
4844 		{ "ipsec_proto_ahesp",		KSTAT_DATA_UINT64 },
4845 		{ "ip6_out_sw_cksum",			KSTAT_DATA_UINT64 },
4846 		{ "ip6_out_sw_cksum_bytes",		KSTAT_DATA_UINT64 },
4847 		{ "ip6_in_sw_cksum",			KSTAT_DATA_UINT64 },
4848 		{ "ip6_tcp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4849 		{ "ip6_tcp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4850 		{ "ip6_tcp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4851 		{ "ip6_udp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4852 		{ "ip6_udp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4853 		{ "ip6_udp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4854 	};
4855 	ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4856 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4857 	    KSTAT_FLAG_VIRTUAL, stackid);
4858 
4859 	if (ksp == NULL)
4860 		return (NULL);
4861 
4862 	bcopy(&template, ip6_statisticsp, sizeof (template));
4863 	ksp->ks_data = (void *)ip6_statisticsp;
4864 	ksp->ks_private = (void *)(uintptr_t)stackid;
4865 
4866 	kstat_install(ksp);
4867 	return (ksp);
4868 }
4869 
4870 void
4871 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4872 {
4873 	if (ksp != NULL) {
4874 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4875 		kstat_delete_netstack(ksp, stackid);
4876 	}
4877 }
4878 
4879 /*
4880  * The following two functions set and get the value for the
4881  * IPV6_SRC_PREFERENCES socket option.
4882  */
4883 int
4884 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4885 {
4886 	/*
4887 	 * We only support preferences that are covered by
4888 	 * IPV6_PREFER_SRC_MASK.
4889 	 */
4890 	if (prefs & ~IPV6_PREFER_SRC_MASK)
4891 		return (EINVAL);
4892 
4893 	/*
4894 	 * Look for conflicting preferences or default preferences.  If
4895 	 * both bits of a related pair are clear, the application wants the
4896 	 * system's default value for that pair.  Both bits in a pair can't
4897 	 * be set.
4898 	 */
4899 	if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4900 		prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4901 	} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4902 	    IPV6_PREFER_SRC_MIPMASK) {
4903 		return (EINVAL);
4904 	}
4905 	if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4906 		prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4907 	} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4908 	    IPV6_PREFER_SRC_TMPMASK) {
4909 		return (EINVAL);
4910 	}
4911 	if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4912 		prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4913 	} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4914 	    IPV6_PREFER_SRC_CGAMASK) {
4915 		return (EINVAL);
4916 	}
4917 
4918 	ixa->ixa_src_preferences = prefs;
4919 	return (0);
4920 }
4921 
4922 size_t
4923 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4924 {
4925 	*val = ixa->ixa_src_preferences;
4926 	return (sizeof (ixa->ixa_src_preferences));
4927 }
4928 
4929 /*
4930  * Get the size of the IP options (including the IP headers size)
4931  * without including the AH header's size. If till_ah is B_FALSE,
4932  * and if AH header is present, dest options beyond AH header will
4933  * also be included in the returned size.
4934  */
4935 int
4936 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4937 {
4938 	ip6_t *ip6h;
4939 	uint8_t nexthdr;
4940 	uint8_t *whereptr;
4941 	ip6_hbh_t *hbhhdr;
4942 	ip6_dest_t *dsthdr;
4943 	ip6_rthdr_t *rthdr;
4944 	int ehdrlen;
4945 	int size;
4946 	ah_t *ah;
4947 
4948 	ip6h = (ip6_t *)mp->b_rptr;
4949 	size = IPV6_HDR_LEN;
4950 	nexthdr = ip6h->ip6_nxt;
4951 	whereptr = (uint8_t *)&ip6h[1];
4952 	for (;;) {
4953 		/* Assume IP has already stripped it */
4954 		ASSERT(nexthdr != IPPROTO_FRAGMENT);
4955 		switch (nexthdr) {
4956 		case IPPROTO_HOPOPTS:
4957 			hbhhdr = (ip6_hbh_t *)whereptr;
4958 			nexthdr = hbhhdr->ip6h_nxt;
4959 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4960 			break;
4961 		case IPPROTO_DSTOPTS:
4962 			dsthdr = (ip6_dest_t *)whereptr;
4963 			nexthdr = dsthdr->ip6d_nxt;
4964 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4965 			break;
4966 		case IPPROTO_ROUTING:
4967 			rthdr = (ip6_rthdr_t *)whereptr;
4968 			nexthdr = rthdr->ip6r_nxt;
4969 			ehdrlen = 8 * (rthdr->ip6r_len + 1);
4970 			break;
4971 		default :
4972 			if (till_ah) {
4973 				ASSERT(nexthdr == IPPROTO_AH);
4974 				return (size);
4975 			}
4976 			/*
4977 			 * If we don't have a AH header to traverse,
4978 			 * return now. This happens normally for
4979 			 * outbound datagrams where we have not inserted
4980 			 * the AH header.
4981 			 */
4982 			if (nexthdr != IPPROTO_AH) {
4983 				return (size);
4984 			}
4985 
4986 			/*
4987 			 * We don't include the AH header's size
4988 			 * to be symmetrical with other cases where
4989 			 * we either don't have a AH header (outbound)
4990 			 * or peek into the AH header yet (inbound and
4991 			 * not pulled up yet).
4992 			 */
4993 			ah = (ah_t *)whereptr;
4994 			nexthdr = ah->ah_nexthdr;
4995 			ehdrlen = (ah->ah_length << 2) + 8;
4996 
4997 			if (nexthdr == IPPROTO_DSTOPTS) {
4998 				if (whereptr + ehdrlen >= mp->b_wptr) {
4999 					/*
5000 					 * The destination options header
5001 					 * is not part of the first mblk.
5002 					 */
5003 					whereptr = mp->b_cont->b_rptr;
5004 				} else {
5005 					whereptr += ehdrlen;
5006 				}
5007 
5008 				dsthdr = (ip6_dest_t *)whereptr;
5009 				ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5010 				size += ehdrlen;
5011 			}
5012 			return (size);
5013 		}
5014 		whereptr += ehdrlen;
5015 		size += ehdrlen;
5016 	}
5017 }
5018 
5019 /*
5020  * Utility routine that checks if `v6srcp' is a valid address on underlying
5021  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5022  * associated with `v6srcp' on success.  NOTE: if this is not called from
5023  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5024  * group during or after this lookup.
5025  */
5026 boolean_t
5027 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5028 {
5029 	ipif_t *ipif;
5030 
5031 
5032 	ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5033 	if (ipif != NULL) {
5034 		if (ipifp != NULL)
5035 			*ipifp = ipif;
5036 		else
5037 			ipif_refrele(ipif);
5038 		return (B_TRUE);
5039 	}
5040 
5041 	if (ip_debug > 2) {
5042 		pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5043 		    "src %s\n", AF_INET6, v6srcp);
5044 	}
5045 	return (B_FALSE);
5046 }
5047