xref: /freebsd/sys/netinet6/ip6_output.c (revision 145992504973bd16cf3518af9ba5ce185fefa82a)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30  */
31 
32 /*-
33  * Copyright (c) 1982, 1986, 1988, 1990, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
61  */
62 
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65 
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 #include "opt_ipfw.h"
69 #include "opt_ipsec.h"
70 #include "opt_sctp.h"
71 #include "opt_route.h"
72 
73 #include <sys/param.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/errno.h>
78 #include <sys/priv.h>
79 #include <sys/proc.h>
80 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/syslog.h>
84 #include <sys/ucred.h>
85 
86 #include <machine/in_cksum.h>
87 
88 #include <net/if.h>
89 #include <net/netisr.h>
90 #include <net/route.h>
91 #include <net/pfil.h>
92 #include <net/vnet.h>
93 
94 #include <netinet/in.h>
95 #include <netinet/in_var.h>
96 #include <netinet/ip_var.h>
97 #include <netinet6/in6_var.h>
98 #include <netinet/ip6.h>
99 #include <netinet/icmp6.h>
100 #include <netinet6/ip6_var.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_var.h>
103 #include <netinet6/nd6.h>
104 
105 #ifdef IPSEC
106 #include <netipsec/ipsec.h>
107 #include <netipsec/ipsec6.h>
108 #include <netipsec/key.h>
109 #include <netinet6/ip6_ipsec.h>
110 #endif /* IPSEC */
111 #ifdef SCTP
112 #include <netinet/sctp.h>
113 #include <netinet/sctp_crc32.h>
114 #endif
115 
116 #include <netinet6/ip6protosw.h>
117 #include <netinet6/scope6_var.h>
118 
119 #ifdef FLOWTABLE
120 #include <net/flowtable.h>
121 #endif
122 
123 extern int in6_mcast_loop;
124 
125 struct ip6_exthdrs {
126 	struct mbuf *ip6e_ip6;
127 	struct mbuf *ip6e_hbh;
128 	struct mbuf *ip6e_dest1;
129 	struct mbuf *ip6e_rthdr;
130 	struct mbuf *ip6e_dest2;
131 };
132 
133 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
134 			   struct ucred *, int));
135 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
136 	struct socket *, struct sockopt *));
137 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
138 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
139 	struct ucred *, int, int, int));
140 
141 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
142 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
143 	struct ip6_frag **));
144 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
145 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
146 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
147 	struct ifnet *, struct in6_addr *, u_long *, int *, u_int));
148 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
149 
150 
151 /*
152  * Make an extension header from option data.  hp is the source, and
153  * mp is the destination.
154  */
155 #define MAKE_EXTHDR(hp, mp)						\
156     do {								\
157 	if (hp) {							\
158 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
159 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
160 		    ((eh)->ip6e_len + 1) << 3);				\
161 		if (error)						\
162 			goto freehdrs;					\
163 	}								\
164     } while (/*CONSTCOND*/ 0)
165 
166 /*
167  * Form a chain of extension headers.
168  * m is the extension header mbuf
169  * mp is the previous mbuf in the chain
170  * p is the next header
171  * i is the type of option.
172  */
173 #define MAKE_CHAIN(m, mp, p, i)\
174     do {\
175 	if (m) {\
176 		if (!hdrsplit) \
177 			panic("assumption failed: hdr not split"); \
178 		*mtod((m), u_char *) = *(p);\
179 		*(p) = (i);\
180 		p = mtod((m), u_char *);\
181 		(m)->m_next = (mp)->m_next;\
182 		(mp)->m_next = (m);\
183 		(mp) = (m);\
184 	}\
185     } while (/*CONSTCOND*/ 0)
186 
187 static void
188 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
189 {
190 	u_short csum;
191 
192 	csum = in_cksum_skip(m, offset + plen, offset);
193 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
194 		csum = 0xffff;
195 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
196 
197 	if (offset + sizeof(u_short) > m->m_len) {
198 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
199 		    "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
200 		    m->m_pkthdr.csum_flags);
201 		/*
202 		 * XXX this should not happen, but if it does, the correct
203 		 * behavior may be to insert the checksum in the appropriate
204 		 * next mbuf in the chain.
205 		 */
206 		return;
207 	}
208 	*(u_short *)(m->m_data + offset) = csum;
209 }
210 
211 /*
212  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
213  * header (with pri, len, nxt, hlim, src, dst).
214  * This function may modify ver and hlim only.
215  * The mbuf chain containing the packet will be freed.
216  * The mbuf opt, if present, will not be freed.
217  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
218  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
219  * then result of route lookup is stored in ro->ro_rt.
220  *
221  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
222  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
223  * which is rt_rmx.rmx_mtu.
224  *
225  * ifpp - XXX: just for statistics
226  */
227 int
228 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
229     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
230     struct ifnet **ifpp, struct inpcb *inp)
231 {
232 	struct ip6_hdr *ip6, *mhip6;
233 	struct ifnet *ifp, *origifp;
234 	struct mbuf *m = m0;
235 	struct mbuf *mprev = NULL;
236 	int hlen, tlen, len, off;
237 	struct route_in6 ip6route;
238 	struct rtentry *rt = NULL;
239 	struct sockaddr_in6 *dst, src_sa, dst_sa;
240 	struct in6_addr odst;
241 	int error = 0;
242 	struct in6_ifaddr *ia = NULL;
243 	u_long mtu;
244 	int alwaysfrag, dontfrag;
245 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
246 	struct ip6_exthdrs exthdrs;
247 	struct in6_addr finaldst, src0, dst0;
248 	u_int32_t zone;
249 	struct route_in6 *ro_pmtu = NULL;
250 	int hdrsplit = 0;
251 	int needipsec = 0;
252 	int sw_csum, tso;
253 #ifdef IPSEC
254 	struct ipsec_output_state state;
255 	struct ip6_rthdr *rh = NULL;
256 	int needipsectun = 0;
257 	int segleft_org = 0;
258 	struct secpolicy *sp = NULL;
259 #endif /* IPSEC */
260 #ifdef IPFIREWALL_FORWARD
261 	struct m_tag *fwd_tag;
262 #endif
263 
264 	ip6 = mtod(m, struct ip6_hdr *);
265 	if (ip6 == NULL) {
266 		printf ("ip6 is NULL");
267 		goto bad;
268 	}
269 
270 	if (inp != NULL)
271 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
272 
273 	finaldst = ip6->ip6_dst;
274 	bzero(&exthdrs, sizeof(exthdrs));
275 	if (opt) {
276 		/* Hop-by-Hop options header */
277 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
278 		/* Destination options header(1st part) */
279 		if (opt->ip6po_rthdr) {
280 			/*
281 			 * Destination options header(1st part)
282 			 * This only makes sense with a routing header.
283 			 * See Section 9.2 of RFC 3542.
284 			 * Disabling this part just for MIP6 convenience is
285 			 * a bad idea.  We need to think carefully about a
286 			 * way to make the advanced API coexist with MIP6
287 			 * options, which might automatically be inserted in
288 			 * the kernel.
289 			 */
290 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
291 		}
292 		/* Routing header */
293 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
294 		/* Destination options header(2nd part) */
295 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
296 	}
297 
298 #ifdef IPSEC
299 	/*
300 	 * IPSec checking which handles several cases.
301 	 * FAST IPSEC: We re-injected the packet.
302 	 */
303 	switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
304 	{
305 	case 1:                 /* Bad packet */
306 		goto freehdrs;
307 	case -1:                /* Do IPSec */
308 		needipsec = 1;
309 		/*
310 		 * Do delayed checksums now, as we may send before returning.
311 		 */
312 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
313 			plen = m->m_pkthdr.len - sizeof(*ip6);
314 			in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
315 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
316 		}
317 #ifdef SCTP
318 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
319 			sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
320 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
321 		}
322 #endif
323 	case 0:                 /* No IPSec */
324 	default:
325 		break;
326 	}
327 #endif /* IPSEC */
328 
329 	/*
330 	 * Calculate the total length of the extension header chain.
331 	 * Keep the length of the unfragmentable part for fragmentation.
332 	 */
333 	optlen = 0;
334 	if (exthdrs.ip6e_hbh)
335 		optlen += exthdrs.ip6e_hbh->m_len;
336 	if (exthdrs.ip6e_dest1)
337 		optlen += exthdrs.ip6e_dest1->m_len;
338 	if (exthdrs.ip6e_rthdr)
339 		optlen += exthdrs.ip6e_rthdr->m_len;
340 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
341 
342 	/* NOTE: we don't add AH/ESP length here. do that later. */
343 	if (exthdrs.ip6e_dest2)
344 		optlen += exthdrs.ip6e_dest2->m_len;
345 
346 	/*
347 	 * If we need IPsec, or there is at least one extension header,
348 	 * separate IP6 header from the payload.
349 	 */
350 	if ((needipsec || optlen) && !hdrsplit) {
351 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
352 			m = NULL;
353 			goto freehdrs;
354 		}
355 		m = exthdrs.ip6e_ip6;
356 		hdrsplit++;
357 	}
358 
359 	/* adjust pointer */
360 	ip6 = mtod(m, struct ip6_hdr *);
361 
362 	/* adjust mbuf packet header length */
363 	m->m_pkthdr.len += optlen;
364 	plen = m->m_pkthdr.len - sizeof(*ip6);
365 
366 	/* If this is a jumbo payload, insert a jumbo payload option. */
367 	if (plen > IPV6_MAXPACKET) {
368 		if (!hdrsplit) {
369 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
370 				m = NULL;
371 				goto freehdrs;
372 			}
373 			m = exthdrs.ip6e_ip6;
374 			hdrsplit++;
375 		}
376 		/* adjust pointer */
377 		ip6 = mtod(m, struct ip6_hdr *);
378 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
379 			goto freehdrs;
380 		ip6->ip6_plen = 0;
381 	} else
382 		ip6->ip6_plen = htons(plen);
383 
384 	/*
385 	 * Concatenate headers and fill in next header fields.
386 	 * Here we have, on "m"
387 	 *	IPv6 payload
388 	 * and we insert headers accordingly.  Finally, we should be getting:
389 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
390 	 *
391 	 * during the header composing process, "m" points to IPv6 header.
392 	 * "mprev" points to an extension header prior to esp.
393 	 */
394 	u_char *nexthdrp = &ip6->ip6_nxt;
395 	mprev = m;
396 
397 	/*
398 	 * we treat dest2 specially.  this makes IPsec processing
399 	 * much easier.  the goal here is to make mprev point the
400 	 * mbuf prior to dest2.
401 	 *
402 	 * result: IPv6 dest2 payload
403 	 * m and mprev will point to IPv6 header.
404 	 */
405 	if (exthdrs.ip6e_dest2) {
406 		if (!hdrsplit)
407 			panic("assumption failed: hdr not split");
408 		exthdrs.ip6e_dest2->m_next = m->m_next;
409 		m->m_next = exthdrs.ip6e_dest2;
410 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
411 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
412 	}
413 
414 	/*
415 	 * result: IPv6 hbh dest1 rthdr dest2 payload
416 	 * m will point to IPv6 header.  mprev will point to the
417 	 * extension header prior to dest2 (rthdr in the above case).
418 	 */
419 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
420 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
421 		   IPPROTO_DSTOPTS);
422 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
423 		   IPPROTO_ROUTING);
424 
425 #ifdef IPSEC
426 	if (!needipsec)
427 		goto skip_ipsec2;
428 
429 	/*
430 	 * pointers after IPsec headers are not valid any more.
431 	 * other pointers need a great care too.
432 	 * (IPsec routines should not mangle mbufs prior to AH/ESP)
433 	 */
434 	exthdrs.ip6e_dest2 = NULL;
435 
436 	if (exthdrs.ip6e_rthdr) {
437 		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
438 		segleft_org = rh->ip6r_segleft;
439 		rh->ip6r_segleft = 0;
440 	}
441 
442 	bzero(&state, sizeof(state));
443 	state.m = m;
444 	error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
445 				    &needipsectun);
446 	m = state.m;
447 	if (error == EJUSTRETURN) {
448 		/*
449 		 * We had a SP with a level of 'use' and no SA. We
450 		 * will just continue to process the packet without
451 		 * IPsec processing.
452 		 */
453 		;
454 	} else if (error) {
455 		/* mbuf is already reclaimed in ipsec6_output_trans. */
456 		m = NULL;
457 		switch (error) {
458 		case EHOSTUNREACH:
459 		case ENETUNREACH:
460 		case EMSGSIZE:
461 		case ENOBUFS:
462 		case ENOMEM:
463 			break;
464 		default:
465 			printf("[%s:%d] (ipsec): error code %d\n",
466 			    __func__, __LINE__, error);
467 			/* FALLTHROUGH */
468 		case ENOENT:
469 			/* don't show these error codes to the user */
470 			error = 0;
471 			break;
472 		}
473 		goto bad;
474 	} else if (!needipsectun) {
475 		/*
476 		 * In the FAST IPSec case we have already
477 		 * re-injected the packet and it has been freed
478 		 * by the ipsec_done() function.  So, just clean
479 		 * up after ourselves.
480 		 */
481 		m = NULL;
482 		goto done;
483 	}
484 	if (exthdrs.ip6e_rthdr) {
485 		/* ah6_output doesn't modify mbuf chain */
486 		rh->ip6r_segleft = segleft_org;
487 	}
488 skip_ipsec2:;
489 #endif /* IPSEC */
490 
491 	/*
492 	 * If there is a routing header, discard the packet.
493 	 */
494 	if (exthdrs.ip6e_rthdr) {
495 		 error = EINVAL;
496 		 goto bad;
497 	}
498 
499 	/* Source address validation */
500 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
501 	    (flags & IPV6_UNSPECSRC) == 0) {
502 		error = EOPNOTSUPP;
503 		V_ip6stat.ip6s_badscope++;
504 		goto bad;
505 	}
506 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
507 		error = EOPNOTSUPP;
508 		V_ip6stat.ip6s_badscope++;
509 		goto bad;
510 	}
511 
512 	V_ip6stat.ip6s_localout++;
513 
514 	/*
515 	 * Route packet.
516 	 */
517 	if (ro == 0) {
518 		ro = &ip6route;
519 		bzero((caddr_t)ro, sizeof(*ro));
520 	}
521 	ro_pmtu = ro;
522 	if (opt && opt->ip6po_rthdr)
523 		ro = &opt->ip6po_route;
524 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
525 #ifdef FLOWTABLE
526 	if (ro->ro_rt == NULL) {
527 		struct flentry *fle;
528 
529 		/*
530 		 * The flow table returns route entries valid for up to 30
531 		 * seconds; we rely on the remainder of ip_output() taking no
532 		 * longer than that long for the stability of ro_rt.  The
533 		 * flow ID assignment must have happened before this point.
534 		 */
535 		fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
536 		if (fle != NULL)
537 			flow_to_route_in6(fle, ro);
538 	}
539 #endif
540 again:
541 	/*
542 	 * if specified, try to fill in the traffic class field.
543 	 * do not override if a non-zero value is already set.
544 	 * we check the diffserv field and the ecn field separately.
545 	 */
546 	if (opt && opt->ip6po_tclass >= 0) {
547 		int mask = 0;
548 
549 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
550 			mask |= 0xfc;
551 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
552 			mask |= 0x03;
553 		if (mask != 0)
554 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
555 	}
556 
557 	/* fill in or override the hop limit field, if necessary. */
558 	if (opt && opt->ip6po_hlim != -1)
559 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
560 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
561 		if (im6o != NULL)
562 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
563 		else
564 			ip6->ip6_hlim = V_ip6_defmcasthlim;
565 	}
566 
567 #ifdef IPSEC
568 	/*
569 	 * We may re-inject packets into the stack here.
570 	 */
571 	if (needipsec && needipsectun) {
572 		struct ipsec_output_state state;
573 
574 		/*
575 		 * All the extension headers will become inaccessible
576 		 * (since they can be encrypted).
577 		 * Don't panic, we need no more updates to extension headers
578 		 * on inner IPv6 packet (since they are now encapsulated).
579 		 *
580 		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
581 		 */
582 		bzero(&exthdrs, sizeof(exthdrs));
583 		exthdrs.ip6e_ip6 = m;
584 
585 		bzero(&state, sizeof(state));
586 		state.m = m;
587 		state.ro = (struct route *)ro;
588 		state.dst = (struct sockaddr *)dst;
589 
590 		error = ipsec6_output_tunnel(&state, sp, flags);
591 
592 		m = state.m;
593 		ro = (struct route_in6 *)state.ro;
594 		dst = (struct sockaddr_in6 *)state.dst;
595 		if (error == EJUSTRETURN) {
596 			/*
597 			 * We had a SP with a level of 'use' and no SA. We
598 			 * will just continue to process the packet without
599 			 * IPsec processing.
600 			 */
601 			;
602 		} else if (error) {
603 			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
604 			m0 = m = NULL;
605 			m = NULL;
606 			switch (error) {
607 			case EHOSTUNREACH:
608 			case ENETUNREACH:
609 			case EMSGSIZE:
610 			case ENOBUFS:
611 			case ENOMEM:
612 				break;
613 			default:
614 				printf("[%s:%d] (ipsec): error code %d\n",
615 				    __func__, __LINE__, error);
616 				/* FALLTHROUGH */
617 			case ENOENT:
618 				/* don't show these error codes to the user */
619 				error = 0;
620 				break;
621 			}
622 			goto bad;
623 		} else {
624 			/*
625 			 * In the FAST IPSec case we have already
626 			 * re-injected the packet and it has been freed
627 			 * by the ipsec_done() function.  So, just clean
628 			 * up after ourselves.
629 			 */
630 			m = NULL;
631 			goto done;
632 		}
633 
634 		exthdrs.ip6e_ip6 = m;
635 	}
636 #endif /* IPSEC */
637 
638 	/* adjust pointer */
639 	ip6 = mtod(m, struct ip6_hdr *);
640 
641 	bzero(&dst_sa, sizeof(dst_sa));
642 	dst_sa.sin6_family = AF_INET6;
643 	dst_sa.sin6_len = sizeof(dst_sa);
644 	dst_sa.sin6_addr = ip6->ip6_dst;
645 	if (ro->ro_rt) {
646 		rt = ro->ro_rt;
647 		ifp = ro->ro_rt->rt_ifp;
648 	} else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro,
649 	    &ifp, &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) {
650 		switch (error) {
651 		case EHOSTUNREACH:
652 			V_ip6stat.ip6s_noroute++;
653 			break;
654 		case EADDRNOTAVAIL:
655 		default:
656 			break; /* XXX statistics? */
657 		}
658 		if (ifp != NULL)
659 			in6_ifstat_inc(ifp, ifs6_out_discard);
660 		goto bad;
661 	}
662 	if (rt == NULL) {
663 		/*
664 		 * If in6_selectroute() does not return a route entry,
665 		 * dst may not have been updated.
666 		 */
667 		*dst = dst_sa;	/* XXX */
668 	}
669 
670 	/*
671 	 * then rt (for unicast) and ifp must be non-NULL valid values.
672 	 */
673 	if ((flags & IPV6_FORWARDING) == 0) {
674 		/* XXX: the FORWARDING flag can be set for mrouting. */
675 		in6_ifstat_inc(ifp, ifs6_out_request);
676 	}
677 	if (rt != NULL) {
678 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
679 		rt->rt_use++;
680 	}
681 
682 
683 	/*
684 	 * The outgoing interface must be in the zone of source and
685 	 * destination addresses.
686 	 */
687 	origifp = ifp;
688 
689 	src0 = ip6->ip6_src;
690 	if (in6_setscope(&src0, origifp, &zone))
691 		goto badscope;
692 	bzero(&src_sa, sizeof(src_sa));
693 	src_sa.sin6_family = AF_INET6;
694 	src_sa.sin6_len = sizeof(src_sa);
695 	src_sa.sin6_addr = ip6->ip6_src;
696 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
697 		goto badscope;
698 
699 	dst0 = ip6->ip6_dst;
700 	if (in6_setscope(&dst0, origifp, &zone))
701 		goto badscope;
702 	/* re-initialize to be sure */
703 	bzero(&dst_sa, sizeof(dst_sa));
704 	dst_sa.sin6_family = AF_INET6;
705 	dst_sa.sin6_len = sizeof(dst_sa);
706 	dst_sa.sin6_addr = ip6->ip6_dst;
707 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
708 		goto badscope;
709 	}
710 
711 	/* We should use ia_ifp to support the case of
712 	 * sending packets to an address of our own.
713 	 */
714 	if (ia != NULL && ia->ia_ifp)
715 		ifp = ia->ia_ifp;
716 
717 	/* scope check is done. */
718 	goto routefound;
719 
720   badscope:
721 	V_ip6stat.ip6s_badscope++;
722 	in6_ifstat_inc(origifp, ifs6_out_discard);
723 	if (error == 0)
724 		error = EHOSTUNREACH; /* XXX */
725 	goto bad;
726 
727   routefound:
728 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
729 		if (opt && opt->ip6po_nextroute.ro_rt) {
730 			/*
731 			 * The nexthop is explicitly specified by the
732 			 * application.  We assume the next hop is an IPv6
733 			 * address.
734 			 */
735 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
736 		}
737 		else if ((rt->rt_flags & RTF_GATEWAY))
738 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
739 	}
740 
741 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
742 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
743 	} else {
744 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
745 		in6_ifstat_inc(ifp, ifs6_out_mcast);
746 		/*
747 		 * Confirm that the outgoing interface supports multicast.
748 		 */
749 		if (!(ifp->if_flags & IFF_MULTICAST)) {
750 			V_ip6stat.ip6s_noroute++;
751 			in6_ifstat_inc(ifp, ifs6_out_discard);
752 			error = ENETUNREACH;
753 			goto bad;
754 		}
755 		if ((im6o == NULL && in6_mcast_loop) ||
756 		    (im6o && im6o->im6o_multicast_loop)) {
757 			/*
758 			 * Loop back multicast datagram if not expressly
759 			 * forbidden to do so, even if we have not joined
760 			 * the address; protocols will filter it later,
761 			 * thus deferring a hash lookup and lock acquisition
762 			 * at the expense of an m_copym().
763 			 */
764 			ip6_mloopback(ifp, m, dst);
765 		} else {
766 			/*
767 			 * If we are acting as a multicast router, perform
768 			 * multicast forwarding as if the packet had just
769 			 * arrived on the interface to which we are about
770 			 * to send.  The multicast forwarding function
771 			 * recursively calls this function, using the
772 			 * IPV6_FORWARDING flag to prevent infinite recursion.
773 			 *
774 			 * Multicasts that are looped back by ip6_mloopback(),
775 			 * above, will be forwarded by the ip6_input() routine,
776 			 * if necessary.
777 			 */
778 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
779 				/*
780 				 * XXX: ip6_mforward expects that rcvif is NULL
781 				 * when it is called from the originating path.
782 				 * However, it is not always the case, since
783 				 * some versions of MGETHDR() does not
784 				 * initialize the field.
785 				 */
786 				m->m_pkthdr.rcvif = NULL;
787 				if (ip6_mforward(ip6, ifp, m) != 0) {
788 					m_freem(m);
789 					goto done;
790 				}
791 			}
792 		}
793 		/*
794 		 * Multicasts with a hoplimit of zero may be looped back,
795 		 * above, but must not be transmitted on a network.
796 		 * Also, multicasts addressed to the loopback interface
797 		 * are not sent -- the above call to ip6_mloopback() will
798 		 * loop back a copy if this host actually belongs to the
799 		 * destination group on the loopback interface.
800 		 */
801 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
802 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
803 			m_freem(m);
804 			goto done;
805 		}
806 	}
807 
808 	/*
809 	 * Fill the outgoing inteface to tell the upper layer
810 	 * to increment per-interface statistics.
811 	 */
812 	if (ifpp)
813 		*ifpp = ifp;
814 
815 	/* Determine path MTU. */
816 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
817 	    &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0)
818 		goto bad;
819 
820 	/*
821 	 * The caller of this function may specify to use the minimum MTU
822 	 * in some cases.
823 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
824 	 * setting.  The logic is a bit complicated; by default, unicast
825 	 * packets will follow path MTU while multicast packets will be sent at
826 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
827 	 * including unicast ones will be sent at the minimum MTU.  Multicast
828 	 * packets will always be sent at the minimum MTU unless
829 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
830 	 * See RFC 3542 for more details.
831 	 */
832 	if (mtu > IPV6_MMTU) {
833 		if ((flags & IPV6_MINMTU))
834 			mtu = IPV6_MMTU;
835 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
836 			mtu = IPV6_MMTU;
837 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
838 			 (opt == NULL ||
839 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
840 			mtu = IPV6_MMTU;
841 		}
842 	}
843 
844 	/*
845 	 * clear embedded scope identifiers if necessary.
846 	 * in6_clearscope will touch the addresses only when necessary.
847 	 */
848 	in6_clearscope(&ip6->ip6_src);
849 	in6_clearscope(&ip6->ip6_dst);
850 
851 	/*
852 	 * If the outgoing packet contains a hop-by-hop options header,
853 	 * it must be examined and processed even by the source node.
854 	 * (RFC 2460, section 4.)
855 	 */
856 	if (exthdrs.ip6e_hbh) {
857 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
858 		u_int32_t dummy; /* XXX unused */
859 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
860 
861 #ifdef DIAGNOSTIC
862 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
863 			panic("ip6e_hbh is not contiguous");
864 #endif
865 		/*
866 		 *  XXX: if we have to send an ICMPv6 error to the sender,
867 		 *       we need the M_LOOP flag since icmp6_error() expects
868 		 *       the IPv6 and the hop-by-hop options header are
869 		 *       contiguous unless the flag is set.
870 		 */
871 		m->m_flags |= M_LOOP;
872 		m->m_pkthdr.rcvif = ifp;
873 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
874 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
875 		    &dummy, &plen) < 0) {
876 			/* m was already freed at this point */
877 			error = EINVAL;/* better error? */
878 			goto done;
879 		}
880 		m->m_flags &= ~M_LOOP; /* XXX */
881 		m->m_pkthdr.rcvif = NULL;
882 	}
883 
884 	/* Jump over all PFIL processing if hooks are not active. */
885 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
886 		goto passout;
887 
888 	odst = ip6->ip6_dst;
889 	/* Run through list of hooks for output packets. */
890 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
891 	if (error != 0 || m == NULL)
892 		goto done;
893 	ip6 = mtod(m, struct ip6_hdr *);
894 
895 	/* See if destination IP address was changed by packet filter. */
896 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
897 		m->m_flags |= M_SKIP_FIREWALL;
898 		/* If destination is now ourself drop to ip6_input(). */
899 		if (in6_localip(&ip6->ip6_dst)) {
900 			m->m_flags |= M_FASTFWD_OURS;
901 			if (m->m_pkthdr.rcvif == NULL)
902 				m->m_pkthdr.rcvif = V_loif;
903 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
904 				m->m_pkthdr.csum_flags |=
905 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
906 				m->m_pkthdr.csum_data = 0xffff;
907 			}
908 #ifdef SCTP
909 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
910 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
911 #endif
912 			error = netisr_queue(NETISR_IPV6, m);
913 			goto done;
914 		} else
915 			goto again;	/* Redo the routing table lookup. */
916 	}
917 
918 #ifdef IPFIREWALL_FORWARD
919 	/* See if local, if yes, send it to netisr. */
920 	if (m->m_flags & M_FASTFWD_OURS) {
921 		if (m->m_pkthdr.rcvif == NULL)
922 			m->m_pkthdr.rcvif = V_loif;
923 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
924 			m->m_pkthdr.csum_flags |=
925 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
926 			m->m_pkthdr.csum_data = 0xffff;
927 		}
928 #ifdef SCTP
929 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
930 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
931 #endif
932 		error = netisr_queue(NETISR_IPV6, m);
933 		goto done;
934 	}
935 	/* Or forward to some other address? */
936 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
937 	if (fwd_tag) {
938 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
939 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
940 		m->m_flags |= M_SKIP_FIREWALL;
941 		m_tag_delete(m, fwd_tag);
942 		goto again;
943 	}
944 #endif /* IPFIREWALL_FORWARD */
945 
946 passout:
947 	/*
948 	 * Send the packet to the outgoing interface.
949 	 * If necessary, do IPv6 fragmentation before sending.
950 	 *
951 	 * the logic here is rather complex:
952 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
953 	 * 1-a:	send as is if tlen <= path mtu
954 	 * 1-b:	fragment if tlen > path mtu
955 	 *
956 	 * 2: if user asks us not to fragment (dontfrag == 1)
957 	 * 2-a:	send as is if tlen <= interface mtu
958 	 * 2-b:	error if tlen > interface mtu
959 	 *
960 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
961 	 *	always fragment
962 	 *
963 	 * 4: if dontfrag == 1 && alwaysfrag == 1
964 	 *	error, as we cannot handle this conflicting request
965 	 */
966 	sw_csum = m->m_pkthdr.csum_flags;
967 	if (!hdrsplit) {
968 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
969 		sw_csum &= ~ifp->if_hwassist;
970 	} else
971 		tso = 0;
972 	/*
973 	 * If we added extension headers, we will not do TSO and calculate the
974 	 * checksums ourselves for now.
975 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
976 	 * with ext. hdrs.
977 	 */
978 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
979 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
980 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
981 	}
982 #ifdef SCTP
983 	if (sw_csum & CSUM_SCTP_IPV6) {
984 		sw_csum &= ~CSUM_SCTP_IPV6;
985 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
986 	}
987 #endif
988 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
989 	tlen = m->m_pkthdr.len;
990 
991 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
992 		dontfrag = 1;
993 	else
994 		dontfrag = 0;
995 	if (dontfrag && alwaysfrag) {	/* case 4 */
996 		/* conflicting request - can't transmit */
997 		error = EMSGSIZE;
998 		goto bad;
999 	}
1000 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
1001 		/*
1002 		 * Even if the DONTFRAG option is specified, we cannot send the
1003 		 * packet when the data length is larger than the MTU of the
1004 		 * outgoing interface.
1005 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
1006 		 * well as returning an error code (the latter is not described
1007 		 * in the API spec.)
1008 		 */
1009 		u_int32_t mtu32;
1010 		struct ip6ctlparam ip6cp;
1011 
1012 		mtu32 = (u_int32_t)mtu;
1013 		bzero(&ip6cp, sizeof(ip6cp));
1014 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1015 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1016 		    (void *)&ip6cp);
1017 
1018 		error = EMSGSIZE;
1019 		goto bad;
1020 	}
1021 
1022 	/*
1023 	 * transmit packet without fragmentation
1024 	 */
1025 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1026 		struct in6_ifaddr *ia6;
1027 
1028 		ip6 = mtod(m, struct ip6_hdr *);
1029 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1030 		if (ia6) {
1031 			/* Record statistics for this interface address. */
1032 			ia6->ia_ifa.if_opackets++;
1033 			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1034 			ifa_free(&ia6->ia_ifa);
1035 		}
1036 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1037 		goto done;
1038 	}
1039 
1040 	/*
1041 	 * try to fragment the packet.  case 1-b and 3
1042 	 */
1043 	if (mtu < IPV6_MMTU) {
1044 		/* path MTU cannot be less than IPV6_MMTU */
1045 		error = EMSGSIZE;
1046 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1047 		goto bad;
1048 	} else if (ip6->ip6_plen == 0) {
1049 		/* jumbo payload cannot be fragmented */
1050 		error = EMSGSIZE;
1051 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1052 		goto bad;
1053 	} else {
1054 		struct mbuf **mnext, *m_frgpart;
1055 		struct ip6_frag *ip6f;
1056 		u_int32_t id = htonl(ip6_randomid());
1057 		u_char nextproto;
1058 
1059 		int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1060 
1061 		/*
1062 		 * Too large for the destination or interface;
1063 		 * fragment if possible.
1064 		 * Must be able to put at least 8 bytes per fragment.
1065 		 */
1066 		hlen = unfragpartlen;
1067 		if (mtu > IPV6_MAXPACKET)
1068 			mtu = IPV6_MAXPACKET;
1069 
1070 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1071 		if (len < 8) {
1072 			error = EMSGSIZE;
1073 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1074 			goto bad;
1075 		}
1076 
1077 		/*
1078 		 * Verify that we have any chance at all of being able to queue
1079 		 *      the packet or packet fragments
1080 		 */
1081 		if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1082 		    < tlen  /* - hlen */)) {
1083 			error = ENOBUFS;
1084 			V_ip6stat.ip6s_odropped++;
1085 			goto bad;
1086 		}
1087 
1088 
1089 		/*
1090 		 * If the interface will not calculate checksums on
1091 		 * fragmented packets, then do it here.
1092 		 * XXX-BZ handle the hw offloading case.  Need flags.
1093 		 */
1094 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1095 			in6_delayed_cksum(m, plen, hlen);
1096 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
1097 		}
1098 #ifdef SCTP
1099 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
1100 			sctp_delayed_cksum(m, hlen);
1101 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
1102 		}
1103 #endif
1104 		mnext = &m->m_nextpkt;
1105 
1106 		/*
1107 		 * Change the next header field of the last header in the
1108 		 * unfragmentable part.
1109 		 */
1110 		if (exthdrs.ip6e_rthdr) {
1111 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1112 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1113 		} else if (exthdrs.ip6e_dest1) {
1114 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1115 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1116 		} else if (exthdrs.ip6e_hbh) {
1117 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1118 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1119 		} else {
1120 			nextproto = ip6->ip6_nxt;
1121 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1122 		}
1123 
1124 		/*
1125 		 * Loop through length of segment after first fragment,
1126 		 * make new header and copy data of each part and link onto
1127 		 * chain.
1128 		 */
1129 		m0 = m;
1130 		for (off = hlen; off < tlen; off += len) {
1131 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1132 			if (!m) {
1133 				error = ENOBUFS;
1134 				V_ip6stat.ip6s_odropped++;
1135 				goto sendorfree;
1136 			}
1137 			m->m_pkthdr.rcvif = NULL;
1138 			m->m_flags = m0->m_flags & M_COPYFLAGS;	/* incl. FIB */
1139 			*mnext = m;
1140 			mnext = &m->m_nextpkt;
1141 			m->m_data += max_linkhdr;
1142 			mhip6 = mtod(m, struct ip6_hdr *);
1143 			*mhip6 = *ip6;
1144 			m->m_len = sizeof(*mhip6);
1145 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1146 			if (error) {
1147 				V_ip6stat.ip6s_odropped++;
1148 				goto sendorfree;
1149 			}
1150 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1151 			if (off + len >= tlen)
1152 				len = tlen - off;
1153 			else
1154 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1155 			mhip6->ip6_plen = htons((u_short)(len + hlen +
1156 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1157 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1158 				error = ENOBUFS;
1159 				V_ip6stat.ip6s_odropped++;
1160 				goto sendorfree;
1161 			}
1162 			m_cat(m, m_frgpart);
1163 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1164 			m->m_pkthdr.rcvif = NULL;
1165 			ip6f->ip6f_reserved = 0;
1166 			ip6f->ip6f_ident = id;
1167 			ip6f->ip6f_nxt = nextproto;
1168 			V_ip6stat.ip6s_ofragments++;
1169 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1170 		}
1171 
1172 		in6_ifstat_inc(ifp, ifs6_out_fragok);
1173 	}
1174 
1175 	/*
1176 	 * Remove leading garbages.
1177 	 */
1178 sendorfree:
1179 	m = m0->m_nextpkt;
1180 	m0->m_nextpkt = 0;
1181 	m_freem(m0);
1182 	for (m0 = m; m; m = m0) {
1183 		m0 = m->m_nextpkt;
1184 		m->m_nextpkt = 0;
1185 		if (error == 0) {
1186 			/* Record statistics for this interface address. */
1187 			if (ia) {
1188 				ia->ia_ifa.if_opackets++;
1189 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1190 			}
1191 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1192 		} else
1193 			m_freem(m);
1194 	}
1195 
1196 	if (error == 0)
1197 		V_ip6stat.ip6s_fragmented++;
1198 
1199 done:
1200 	if (ro == &ip6route)
1201 		RO_RTFREE(ro);
1202 	if (ro_pmtu == &ip6route)
1203 		RO_RTFREE(ro_pmtu);
1204 #ifdef IPSEC
1205 	if (sp != NULL)
1206 		KEY_FREESP(&sp);
1207 #endif
1208 
1209 	return (error);
1210 
1211 freehdrs:
1212 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1213 	m_freem(exthdrs.ip6e_dest1);
1214 	m_freem(exthdrs.ip6e_rthdr);
1215 	m_freem(exthdrs.ip6e_dest2);
1216 	/* FALLTHROUGH */
1217 bad:
1218 	if (m)
1219 		m_freem(m);
1220 	goto done;
1221 }
1222 
1223 static int
1224 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1225 {
1226 	struct mbuf *m;
1227 
1228 	if (hlen > MCLBYTES)
1229 		return (ENOBUFS); /* XXX */
1230 
1231 	MGET(m, M_DONTWAIT, MT_DATA);
1232 	if (!m)
1233 		return (ENOBUFS);
1234 
1235 	if (hlen > MLEN) {
1236 		MCLGET(m, M_DONTWAIT);
1237 		if ((m->m_flags & M_EXT) == 0) {
1238 			m_free(m);
1239 			return (ENOBUFS);
1240 		}
1241 	}
1242 	m->m_len = hlen;
1243 	if (hdr)
1244 		bcopy(hdr, mtod(m, caddr_t), hlen);
1245 
1246 	*mp = m;
1247 	return (0);
1248 }
1249 
1250 /*
1251  * Insert jumbo payload option.
1252  */
1253 static int
1254 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1255 {
1256 	struct mbuf *mopt;
1257 	u_char *optbuf;
1258 	u_int32_t v;
1259 
1260 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1261 
1262 	/*
1263 	 * If there is no hop-by-hop options header, allocate new one.
1264 	 * If there is one but it doesn't have enough space to store the
1265 	 * jumbo payload option, allocate a cluster to store the whole options.
1266 	 * Otherwise, use it to store the options.
1267 	 */
1268 	if (exthdrs->ip6e_hbh == 0) {
1269 		MGET(mopt, M_DONTWAIT, MT_DATA);
1270 		if (mopt == 0)
1271 			return (ENOBUFS);
1272 		mopt->m_len = JUMBOOPTLEN;
1273 		optbuf = mtod(mopt, u_char *);
1274 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1275 		exthdrs->ip6e_hbh = mopt;
1276 	} else {
1277 		struct ip6_hbh *hbh;
1278 
1279 		mopt = exthdrs->ip6e_hbh;
1280 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1281 			/*
1282 			 * XXX assumption:
1283 			 * - exthdrs->ip6e_hbh is not referenced from places
1284 			 *   other than exthdrs.
1285 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1286 			 */
1287 			int oldoptlen = mopt->m_len;
1288 			struct mbuf *n;
1289 
1290 			/*
1291 			 * XXX: give up if the whole (new) hbh header does
1292 			 * not fit even in an mbuf cluster.
1293 			 */
1294 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1295 				return (ENOBUFS);
1296 
1297 			/*
1298 			 * As a consequence, we must always prepare a cluster
1299 			 * at this point.
1300 			 */
1301 			MGET(n, M_DONTWAIT, MT_DATA);
1302 			if (n) {
1303 				MCLGET(n, M_DONTWAIT);
1304 				if ((n->m_flags & M_EXT) == 0) {
1305 					m_freem(n);
1306 					n = NULL;
1307 				}
1308 			}
1309 			if (!n)
1310 				return (ENOBUFS);
1311 			n->m_len = oldoptlen + JUMBOOPTLEN;
1312 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1313 			    oldoptlen);
1314 			optbuf = mtod(n, caddr_t) + oldoptlen;
1315 			m_freem(mopt);
1316 			mopt = exthdrs->ip6e_hbh = n;
1317 		} else {
1318 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1319 			mopt->m_len += JUMBOOPTLEN;
1320 		}
1321 		optbuf[0] = IP6OPT_PADN;
1322 		optbuf[1] = 1;
1323 
1324 		/*
1325 		 * Adjust the header length according to the pad and
1326 		 * the jumbo payload option.
1327 		 */
1328 		hbh = mtod(mopt, struct ip6_hbh *);
1329 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1330 	}
1331 
1332 	/* fill in the option. */
1333 	optbuf[2] = IP6OPT_JUMBO;
1334 	optbuf[3] = 4;
1335 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1336 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1337 
1338 	/* finally, adjust the packet header length */
1339 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1340 
1341 	return (0);
1342 #undef JUMBOOPTLEN
1343 }
1344 
1345 /*
1346  * Insert fragment header and copy unfragmentable header portions.
1347  */
1348 static int
1349 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1350     struct ip6_frag **frghdrp)
1351 {
1352 	struct mbuf *n, *mlast;
1353 
1354 	if (hlen > sizeof(struct ip6_hdr)) {
1355 		n = m_copym(m0, sizeof(struct ip6_hdr),
1356 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1357 		if (n == 0)
1358 			return (ENOBUFS);
1359 		m->m_next = n;
1360 	} else
1361 		n = m;
1362 
1363 	/* Search for the last mbuf of unfragmentable part. */
1364 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1365 		;
1366 
1367 	if ((mlast->m_flags & M_EXT) == 0 &&
1368 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1369 		/* use the trailing space of the last mbuf for the fragment hdr */
1370 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1371 		    mlast->m_len);
1372 		mlast->m_len += sizeof(struct ip6_frag);
1373 		m->m_pkthdr.len += sizeof(struct ip6_frag);
1374 	} else {
1375 		/* allocate a new mbuf for the fragment header */
1376 		struct mbuf *mfrg;
1377 
1378 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1379 		if (mfrg == 0)
1380 			return (ENOBUFS);
1381 		mfrg->m_len = sizeof(struct ip6_frag);
1382 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1383 		mlast->m_next = mfrg;
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 static int
1390 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1391     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1392     int *alwaysfragp, u_int fibnum)
1393 {
1394 	u_int32_t mtu = 0;
1395 	int alwaysfrag = 0;
1396 	int error = 0;
1397 
1398 	if (ro_pmtu != ro) {
1399 		/* The first hop and the final destination may differ. */
1400 		struct sockaddr_in6 *sa6_dst =
1401 		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1402 		if (ro_pmtu->ro_rt &&
1403 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1404 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1405 			RTFREE(ro_pmtu->ro_rt);
1406 			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1407 		}
1408 		if (ro_pmtu->ro_rt == NULL) {
1409 			bzero(sa6_dst, sizeof(*sa6_dst));
1410 			sa6_dst->sin6_family = AF_INET6;
1411 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1412 			sa6_dst->sin6_addr = *dst;
1413 
1414 			in6_rtalloc(ro_pmtu, fibnum);
1415 		}
1416 	}
1417 	if (ro_pmtu->ro_rt) {
1418 		u_int32_t ifmtu;
1419 		struct in_conninfo inc;
1420 
1421 		bzero(&inc, sizeof(inc));
1422 		inc.inc_flags |= INC_ISIPV6;
1423 		inc.inc6_faddr = *dst;
1424 
1425 		if (ifp == NULL)
1426 			ifp = ro_pmtu->ro_rt->rt_ifp;
1427 		ifmtu = IN6_LINKMTU(ifp);
1428 		mtu = tcp_hc_getmtu(&inc);
1429 		if (mtu)
1430 			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1431 		else
1432 			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1433 		if (mtu == 0)
1434 			mtu = ifmtu;
1435 		else if (mtu < IPV6_MMTU) {
1436 			/*
1437 			 * RFC2460 section 5, last paragraph:
1438 			 * if we record ICMPv6 too big message with
1439 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1440 			 * or smaller, with framgent header attached.
1441 			 * (fragment header is needed regardless from the
1442 			 * packet size, for translators to identify packets)
1443 			 */
1444 			alwaysfrag = 1;
1445 			mtu = IPV6_MMTU;
1446 		} else if (mtu > ifmtu) {
1447 			/*
1448 			 * The MTU on the route is larger than the MTU on
1449 			 * the interface!  This shouldn't happen, unless the
1450 			 * MTU of the interface has been changed after the
1451 			 * interface was brought up.  Change the MTU in the
1452 			 * route to match the interface MTU (as long as the
1453 			 * field isn't locked).
1454 			 */
1455 			mtu = ifmtu;
1456 			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1457 		}
1458 	} else if (ifp) {
1459 		mtu = IN6_LINKMTU(ifp);
1460 	} else
1461 		error = EHOSTUNREACH; /* XXX */
1462 
1463 	*mtup = mtu;
1464 	if (alwaysfragp)
1465 		*alwaysfragp = alwaysfrag;
1466 	return (error);
1467 }
1468 
1469 /*
1470  * IP6 socket option processing.
1471  */
1472 int
1473 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1474 {
1475 	int optdatalen, uproto;
1476 	void *optdata;
1477 	struct inpcb *in6p = sotoinpcb(so);
1478 	int error, optval;
1479 	int level, op, optname;
1480 	int optlen;
1481 	struct thread *td;
1482 
1483 	level = sopt->sopt_level;
1484 	op = sopt->sopt_dir;
1485 	optname = sopt->sopt_name;
1486 	optlen = sopt->sopt_valsize;
1487 	td = sopt->sopt_td;
1488 	error = 0;
1489 	optval = 0;
1490 	uproto = (int)so->so_proto->pr_protocol;
1491 
1492 	if (level != IPPROTO_IPV6) {
1493 		error = EINVAL;
1494 
1495 		if (sopt->sopt_level == SOL_SOCKET &&
1496 		    sopt->sopt_dir == SOPT_SET) {
1497 			switch (sopt->sopt_name) {
1498 			case SO_REUSEADDR:
1499 				INP_WLOCK(in6p);
1500 				if (IN_MULTICAST(ntohl(in6p->inp_laddr.s_addr))) {
1501 					if ((so->so_options &
1502 					    (SO_REUSEADDR | SO_REUSEPORT)) != 0)
1503 						in6p->inp_flags2 |= INP_REUSEPORT;
1504 					else
1505 						in6p->inp_flags2 &= ~INP_REUSEPORT;
1506 				}
1507 				INP_WUNLOCK(in6p);
1508 				error = 0;
1509 				break;
1510 			case SO_REUSEPORT:
1511 				INP_WLOCK(in6p);
1512 				if ((so->so_options & SO_REUSEPORT) != 0)
1513 					in6p->inp_flags2 |= INP_REUSEPORT;
1514 				else
1515 					in6p->inp_flags2 &= ~INP_REUSEPORT;
1516 				INP_WUNLOCK(in6p);
1517 				error = 0;
1518 				break;
1519 			case SO_SETFIB:
1520 				INP_WLOCK(in6p);
1521 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
1522 				INP_WUNLOCK(in6p);
1523 				error = 0;
1524 				break;
1525 			default:
1526 				break;
1527 			}
1528 		}
1529 	} else {		/* level == IPPROTO_IPV6 */
1530 		switch (op) {
1531 
1532 		case SOPT_SET:
1533 			switch (optname) {
1534 			case IPV6_2292PKTOPTIONS:
1535 #ifdef IPV6_PKTOPTIONS
1536 			case IPV6_PKTOPTIONS:
1537 #endif
1538 			{
1539 				struct mbuf *m;
1540 
1541 				error = soopt_getm(sopt, &m); /* XXX */
1542 				if (error != 0)
1543 					break;
1544 				error = soopt_mcopyin(sopt, m); /* XXX */
1545 				if (error != 0)
1546 					break;
1547 				error = ip6_pcbopts(&in6p->in6p_outputopts,
1548 						    m, so, sopt);
1549 				m_freem(m); /* XXX */
1550 				break;
1551 			}
1552 
1553 			/*
1554 			 * Use of some Hop-by-Hop options or some
1555 			 * Destination options, might require special
1556 			 * privilege.  That is, normal applications
1557 			 * (without special privilege) might be forbidden
1558 			 * from setting certain options in outgoing packets,
1559 			 * and might never see certain options in received
1560 			 * packets. [RFC 2292 Section 6]
1561 			 * KAME specific note:
1562 			 *  KAME prevents non-privileged users from sending or
1563 			 *  receiving ANY hbh/dst options in order to avoid
1564 			 *  overhead of parsing options in the kernel.
1565 			 */
1566 			case IPV6_RECVHOPOPTS:
1567 			case IPV6_RECVDSTOPTS:
1568 			case IPV6_RECVRTHDRDSTOPTS:
1569 				if (td != NULL) {
1570 					error = priv_check(td,
1571 					    PRIV_NETINET_SETHDROPTS);
1572 					if (error)
1573 						break;
1574 				}
1575 				/* FALLTHROUGH */
1576 			case IPV6_UNICAST_HOPS:
1577 			case IPV6_HOPLIMIT:
1578 			case IPV6_FAITH:
1579 
1580 			case IPV6_RECVPKTINFO:
1581 			case IPV6_RECVHOPLIMIT:
1582 			case IPV6_RECVRTHDR:
1583 			case IPV6_RECVPATHMTU:
1584 			case IPV6_RECVTCLASS:
1585 			case IPV6_V6ONLY:
1586 			case IPV6_AUTOFLOWLABEL:
1587 			case IPV6_BINDANY:
1588 				if (optname == IPV6_BINDANY && td != NULL) {
1589 					error = priv_check(td,
1590 					    PRIV_NETINET_BINDANY);
1591 					if (error)
1592 						break;
1593 				}
1594 
1595 				if (optlen != sizeof(int)) {
1596 					error = EINVAL;
1597 					break;
1598 				}
1599 				error = sooptcopyin(sopt, &optval,
1600 					sizeof optval, sizeof optval);
1601 				if (error)
1602 					break;
1603 				switch (optname) {
1604 
1605 				case IPV6_UNICAST_HOPS:
1606 					if (optval < -1 || optval >= 256)
1607 						error = EINVAL;
1608 					else {
1609 						/* -1 = kernel default */
1610 						in6p->in6p_hops = optval;
1611 						if ((in6p->inp_vflag &
1612 						     INP_IPV4) != 0)
1613 							in6p->inp_ip_ttl = optval;
1614 					}
1615 					break;
1616 #define OPTSET(bit) \
1617 do { \
1618 	INP_WLOCK(in6p); \
1619 	if (optval) \
1620 		in6p->inp_flags |= (bit); \
1621 	else \
1622 		in6p->inp_flags &= ~(bit); \
1623 	INP_WUNLOCK(in6p); \
1624 } while (/*CONSTCOND*/ 0)
1625 #define OPTSET2292(bit) \
1626 do { \
1627 	INP_WLOCK(in6p); \
1628 	in6p->inp_flags |= IN6P_RFC2292; \
1629 	if (optval) \
1630 		in6p->inp_flags |= (bit); \
1631 	else \
1632 		in6p->inp_flags &= ~(bit); \
1633 	INP_WUNLOCK(in6p); \
1634 } while (/*CONSTCOND*/ 0)
1635 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1636 
1637 				case IPV6_RECVPKTINFO:
1638 					/* cannot mix with RFC2292 */
1639 					if (OPTBIT(IN6P_RFC2292)) {
1640 						error = EINVAL;
1641 						break;
1642 					}
1643 					OPTSET(IN6P_PKTINFO);
1644 					break;
1645 
1646 				case IPV6_HOPLIMIT:
1647 				{
1648 					struct ip6_pktopts **optp;
1649 
1650 					/* cannot mix with RFC2292 */
1651 					if (OPTBIT(IN6P_RFC2292)) {
1652 						error = EINVAL;
1653 						break;
1654 					}
1655 					optp = &in6p->in6p_outputopts;
1656 					error = ip6_pcbopt(IPV6_HOPLIMIT,
1657 					    (u_char *)&optval, sizeof(optval),
1658 					    optp, (td != NULL) ? td->td_ucred :
1659 					    NULL, uproto);
1660 					break;
1661 				}
1662 
1663 				case IPV6_RECVHOPLIMIT:
1664 					/* cannot mix with RFC2292 */
1665 					if (OPTBIT(IN6P_RFC2292)) {
1666 						error = EINVAL;
1667 						break;
1668 					}
1669 					OPTSET(IN6P_HOPLIMIT);
1670 					break;
1671 
1672 				case IPV6_RECVHOPOPTS:
1673 					/* cannot mix with RFC2292 */
1674 					if (OPTBIT(IN6P_RFC2292)) {
1675 						error = EINVAL;
1676 						break;
1677 					}
1678 					OPTSET(IN6P_HOPOPTS);
1679 					break;
1680 
1681 				case IPV6_RECVDSTOPTS:
1682 					/* cannot mix with RFC2292 */
1683 					if (OPTBIT(IN6P_RFC2292)) {
1684 						error = EINVAL;
1685 						break;
1686 					}
1687 					OPTSET(IN6P_DSTOPTS);
1688 					break;
1689 
1690 				case IPV6_RECVRTHDRDSTOPTS:
1691 					/* cannot mix with RFC2292 */
1692 					if (OPTBIT(IN6P_RFC2292)) {
1693 						error = EINVAL;
1694 						break;
1695 					}
1696 					OPTSET(IN6P_RTHDRDSTOPTS);
1697 					break;
1698 
1699 				case IPV6_RECVRTHDR:
1700 					/* cannot mix with RFC2292 */
1701 					if (OPTBIT(IN6P_RFC2292)) {
1702 						error = EINVAL;
1703 						break;
1704 					}
1705 					OPTSET(IN6P_RTHDR);
1706 					break;
1707 
1708 				case IPV6_FAITH:
1709 					OPTSET(INP_FAITH);
1710 					break;
1711 
1712 				case IPV6_RECVPATHMTU:
1713 					/*
1714 					 * We ignore this option for TCP
1715 					 * sockets.
1716 					 * (RFC3542 leaves this case
1717 					 * unspecified.)
1718 					 */
1719 					if (uproto != IPPROTO_TCP)
1720 						OPTSET(IN6P_MTU);
1721 					break;
1722 
1723 				case IPV6_V6ONLY:
1724 					/*
1725 					 * make setsockopt(IPV6_V6ONLY)
1726 					 * available only prior to bind(2).
1727 					 * see ipng mailing list, Jun 22 2001.
1728 					 */
1729 					if (in6p->inp_lport ||
1730 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1731 						error = EINVAL;
1732 						break;
1733 					}
1734 					OPTSET(IN6P_IPV6_V6ONLY);
1735 					if (optval)
1736 						in6p->inp_vflag &= ~INP_IPV4;
1737 					else
1738 						in6p->inp_vflag |= INP_IPV4;
1739 					break;
1740 				case IPV6_RECVTCLASS:
1741 					/* cannot mix with RFC2292 XXX */
1742 					if (OPTBIT(IN6P_RFC2292)) {
1743 						error = EINVAL;
1744 						break;
1745 					}
1746 					OPTSET(IN6P_TCLASS);
1747 					break;
1748 				case IPV6_AUTOFLOWLABEL:
1749 					OPTSET(IN6P_AUTOFLOWLABEL);
1750 					break;
1751 
1752 				case IPV6_BINDANY:
1753 					OPTSET(INP_BINDANY);
1754 					break;
1755 				}
1756 				break;
1757 
1758 			case IPV6_TCLASS:
1759 			case IPV6_DONTFRAG:
1760 			case IPV6_USE_MIN_MTU:
1761 			case IPV6_PREFER_TEMPADDR:
1762 				if (optlen != sizeof(optval)) {
1763 					error = EINVAL;
1764 					break;
1765 				}
1766 				error = sooptcopyin(sopt, &optval,
1767 					sizeof optval, sizeof optval);
1768 				if (error)
1769 					break;
1770 				{
1771 					struct ip6_pktopts **optp;
1772 					optp = &in6p->in6p_outputopts;
1773 					error = ip6_pcbopt(optname,
1774 					    (u_char *)&optval, sizeof(optval),
1775 					    optp, (td != NULL) ? td->td_ucred :
1776 					    NULL, uproto);
1777 					break;
1778 				}
1779 
1780 			case IPV6_2292PKTINFO:
1781 			case IPV6_2292HOPLIMIT:
1782 			case IPV6_2292HOPOPTS:
1783 			case IPV6_2292DSTOPTS:
1784 			case IPV6_2292RTHDR:
1785 				/* RFC 2292 */
1786 				if (optlen != sizeof(int)) {
1787 					error = EINVAL;
1788 					break;
1789 				}
1790 				error = sooptcopyin(sopt, &optval,
1791 					sizeof optval, sizeof optval);
1792 				if (error)
1793 					break;
1794 				switch (optname) {
1795 				case IPV6_2292PKTINFO:
1796 					OPTSET2292(IN6P_PKTINFO);
1797 					break;
1798 				case IPV6_2292HOPLIMIT:
1799 					OPTSET2292(IN6P_HOPLIMIT);
1800 					break;
1801 				case IPV6_2292HOPOPTS:
1802 					/*
1803 					 * Check super-user privilege.
1804 					 * See comments for IPV6_RECVHOPOPTS.
1805 					 */
1806 					if (td != NULL) {
1807 						error = priv_check(td,
1808 						    PRIV_NETINET_SETHDROPTS);
1809 						if (error)
1810 							return (error);
1811 					}
1812 					OPTSET2292(IN6P_HOPOPTS);
1813 					break;
1814 				case IPV6_2292DSTOPTS:
1815 					if (td != NULL) {
1816 						error = priv_check(td,
1817 						    PRIV_NETINET_SETHDROPTS);
1818 						if (error)
1819 							return (error);
1820 					}
1821 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1822 					break;
1823 				case IPV6_2292RTHDR:
1824 					OPTSET2292(IN6P_RTHDR);
1825 					break;
1826 				}
1827 				break;
1828 			case IPV6_PKTINFO:
1829 			case IPV6_HOPOPTS:
1830 			case IPV6_RTHDR:
1831 			case IPV6_DSTOPTS:
1832 			case IPV6_RTHDRDSTOPTS:
1833 			case IPV6_NEXTHOP:
1834 			{
1835 				/* new advanced API (RFC3542) */
1836 				u_char *optbuf;
1837 				u_char optbuf_storage[MCLBYTES];
1838 				int optlen;
1839 				struct ip6_pktopts **optp;
1840 
1841 				/* cannot mix with RFC2292 */
1842 				if (OPTBIT(IN6P_RFC2292)) {
1843 					error = EINVAL;
1844 					break;
1845 				}
1846 
1847 				/*
1848 				 * We only ensure valsize is not too large
1849 				 * here.  Further validation will be done
1850 				 * later.
1851 				 */
1852 				error = sooptcopyin(sopt, optbuf_storage,
1853 				    sizeof(optbuf_storage), 0);
1854 				if (error)
1855 					break;
1856 				optlen = sopt->sopt_valsize;
1857 				optbuf = optbuf_storage;
1858 				optp = &in6p->in6p_outputopts;
1859 				error = ip6_pcbopt(optname, optbuf, optlen,
1860 				    optp, (td != NULL) ? td->td_ucred : NULL,
1861 				    uproto);
1862 				break;
1863 			}
1864 #undef OPTSET
1865 
1866 			case IPV6_MULTICAST_IF:
1867 			case IPV6_MULTICAST_HOPS:
1868 			case IPV6_MULTICAST_LOOP:
1869 			case IPV6_JOIN_GROUP:
1870 			case IPV6_LEAVE_GROUP:
1871 			case IPV6_MSFILTER:
1872 			case MCAST_BLOCK_SOURCE:
1873 			case MCAST_UNBLOCK_SOURCE:
1874 			case MCAST_JOIN_GROUP:
1875 			case MCAST_LEAVE_GROUP:
1876 			case MCAST_JOIN_SOURCE_GROUP:
1877 			case MCAST_LEAVE_SOURCE_GROUP:
1878 				error = ip6_setmoptions(in6p, sopt);
1879 				break;
1880 
1881 			case IPV6_PORTRANGE:
1882 				error = sooptcopyin(sopt, &optval,
1883 				    sizeof optval, sizeof optval);
1884 				if (error)
1885 					break;
1886 
1887 				INP_WLOCK(in6p);
1888 				switch (optval) {
1889 				case IPV6_PORTRANGE_DEFAULT:
1890 					in6p->inp_flags &= ~(INP_LOWPORT);
1891 					in6p->inp_flags &= ~(INP_HIGHPORT);
1892 					break;
1893 
1894 				case IPV6_PORTRANGE_HIGH:
1895 					in6p->inp_flags &= ~(INP_LOWPORT);
1896 					in6p->inp_flags |= INP_HIGHPORT;
1897 					break;
1898 
1899 				case IPV6_PORTRANGE_LOW:
1900 					in6p->inp_flags &= ~(INP_HIGHPORT);
1901 					in6p->inp_flags |= INP_LOWPORT;
1902 					break;
1903 
1904 				default:
1905 					error = EINVAL;
1906 					break;
1907 				}
1908 				INP_WUNLOCK(in6p);
1909 				break;
1910 
1911 #ifdef IPSEC
1912 			case IPV6_IPSEC_POLICY:
1913 			{
1914 				caddr_t req;
1915 				struct mbuf *m;
1916 
1917 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1918 					break;
1919 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1920 					break;
1921 				req = mtod(m, caddr_t);
1922 				error = ipsec_set_policy(in6p, optname, req,
1923 				    m->m_len, (sopt->sopt_td != NULL) ?
1924 				    sopt->sopt_td->td_ucred : NULL);
1925 				m_freem(m);
1926 				break;
1927 			}
1928 #endif /* IPSEC */
1929 
1930 			default:
1931 				error = ENOPROTOOPT;
1932 				break;
1933 			}
1934 			break;
1935 
1936 		case SOPT_GET:
1937 			switch (optname) {
1938 
1939 			case IPV6_2292PKTOPTIONS:
1940 #ifdef IPV6_PKTOPTIONS
1941 			case IPV6_PKTOPTIONS:
1942 #endif
1943 				/*
1944 				 * RFC3542 (effectively) deprecated the
1945 				 * semantics of the 2292-style pktoptions.
1946 				 * Since it was not reliable in nature (i.e.,
1947 				 * applications had to expect the lack of some
1948 				 * information after all), it would make sense
1949 				 * to simplify this part by always returning
1950 				 * empty data.
1951 				 */
1952 				sopt->sopt_valsize = 0;
1953 				break;
1954 
1955 			case IPV6_RECVHOPOPTS:
1956 			case IPV6_RECVDSTOPTS:
1957 			case IPV6_RECVRTHDRDSTOPTS:
1958 			case IPV6_UNICAST_HOPS:
1959 			case IPV6_RECVPKTINFO:
1960 			case IPV6_RECVHOPLIMIT:
1961 			case IPV6_RECVRTHDR:
1962 			case IPV6_RECVPATHMTU:
1963 
1964 			case IPV6_FAITH:
1965 			case IPV6_V6ONLY:
1966 			case IPV6_PORTRANGE:
1967 			case IPV6_RECVTCLASS:
1968 			case IPV6_AUTOFLOWLABEL:
1969 			case IPV6_BINDANY:
1970 				switch (optname) {
1971 
1972 				case IPV6_RECVHOPOPTS:
1973 					optval = OPTBIT(IN6P_HOPOPTS);
1974 					break;
1975 
1976 				case IPV6_RECVDSTOPTS:
1977 					optval = OPTBIT(IN6P_DSTOPTS);
1978 					break;
1979 
1980 				case IPV6_RECVRTHDRDSTOPTS:
1981 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1982 					break;
1983 
1984 				case IPV6_UNICAST_HOPS:
1985 					optval = in6p->in6p_hops;
1986 					break;
1987 
1988 				case IPV6_RECVPKTINFO:
1989 					optval = OPTBIT(IN6P_PKTINFO);
1990 					break;
1991 
1992 				case IPV6_RECVHOPLIMIT:
1993 					optval = OPTBIT(IN6P_HOPLIMIT);
1994 					break;
1995 
1996 				case IPV6_RECVRTHDR:
1997 					optval = OPTBIT(IN6P_RTHDR);
1998 					break;
1999 
2000 				case IPV6_RECVPATHMTU:
2001 					optval = OPTBIT(IN6P_MTU);
2002 					break;
2003 
2004 				case IPV6_FAITH:
2005 					optval = OPTBIT(INP_FAITH);
2006 					break;
2007 
2008 				case IPV6_V6ONLY:
2009 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
2010 					break;
2011 
2012 				case IPV6_PORTRANGE:
2013 				    {
2014 					int flags;
2015 					flags = in6p->inp_flags;
2016 					if (flags & INP_HIGHPORT)
2017 						optval = IPV6_PORTRANGE_HIGH;
2018 					else if (flags & INP_LOWPORT)
2019 						optval = IPV6_PORTRANGE_LOW;
2020 					else
2021 						optval = 0;
2022 					break;
2023 				    }
2024 				case IPV6_RECVTCLASS:
2025 					optval = OPTBIT(IN6P_TCLASS);
2026 					break;
2027 
2028 				case IPV6_AUTOFLOWLABEL:
2029 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2030 					break;
2031 
2032 				case IPV6_BINDANY:
2033 					optval = OPTBIT(INP_BINDANY);
2034 					break;
2035 				}
2036 				if (error)
2037 					break;
2038 				error = sooptcopyout(sopt, &optval,
2039 					sizeof optval);
2040 				break;
2041 
2042 			case IPV6_PATHMTU:
2043 			{
2044 				u_long pmtu = 0;
2045 				struct ip6_mtuinfo mtuinfo;
2046 				struct route_in6 sro;
2047 
2048 				bzero(&sro, sizeof(sro));
2049 
2050 				if (!(so->so_state & SS_ISCONNECTED))
2051 					return (ENOTCONN);
2052 				/*
2053 				 * XXX: we dot not consider the case of source
2054 				 * routing, or optional information to specify
2055 				 * the outgoing interface.
2056 				 */
2057 				error = ip6_getpmtu(&sro, NULL, NULL,
2058 				    &in6p->in6p_faddr, &pmtu, NULL,
2059 				    so->so_fibnum);
2060 				if (sro.ro_rt)
2061 					RTFREE(sro.ro_rt);
2062 				if (error)
2063 					break;
2064 				if (pmtu > IPV6_MAXPACKET)
2065 					pmtu = IPV6_MAXPACKET;
2066 
2067 				bzero(&mtuinfo, sizeof(mtuinfo));
2068 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2069 				optdata = (void *)&mtuinfo;
2070 				optdatalen = sizeof(mtuinfo);
2071 				error = sooptcopyout(sopt, optdata,
2072 				    optdatalen);
2073 				break;
2074 			}
2075 
2076 			case IPV6_2292PKTINFO:
2077 			case IPV6_2292HOPLIMIT:
2078 			case IPV6_2292HOPOPTS:
2079 			case IPV6_2292RTHDR:
2080 			case IPV6_2292DSTOPTS:
2081 				switch (optname) {
2082 				case IPV6_2292PKTINFO:
2083 					optval = OPTBIT(IN6P_PKTINFO);
2084 					break;
2085 				case IPV6_2292HOPLIMIT:
2086 					optval = OPTBIT(IN6P_HOPLIMIT);
2087 					break;
2088 				case IPV6_2292HOPOPTS:
2089 					optval = OPTBIT(IN6P_HOPOPTS);
2090 					break;
2091 				case IPV6_2292RTHDR:
2092 					optval = OPTBIT(IN6P_RTHDR);
2093 					break;
2094 				case IPV6_2292DSTOPTS:
2095 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2096 					break;
2097 				}
2098 				error = sooptcopyout(sopt, &optval,
2099 				    sizeof optval);
2100 				break;
2101 			case IPV6_PKTINFO:
2102 			case IPV6_HOPOPTS:
2103 			case IPV6_RTHDR:
2104 			case IPV6_DSTOPTS:
2105 			case IPV6_RTHDRDSTOPTS:
2106 			case IPV6_NEXTHOP:
2107 			case IPV6_TCLASS:
2108 			case IPV6_DONTFRAG:
2109 			case IPV6_USE_MIN_MTU:
2110 			case IPV6_PREFER_TEMPADDR:
2111 				error = ip6_getpcbopt(in6p->in6p_outputopts,
2112 				    optname, sopt);
2113 				break;
2114 
2115 			case IPV6_MULTICAST_IF:
2116 			case IPV6_MULTICAST_HOPS:
2117 			case IPV6_MULTICAST_LOOP:
2118 			case IPV6_MSFILTER:
2119 				error = ip6_getmoptions(in6p, sopt);
2120 				break;
2121 
2122 #ifdef IPSEC
2123 			case IPV6_IPSEC_POLICY:
2124 			  {
2125 				caddr_t req = NULL;
2126 				size_t len = 0;
2127 				struct mbuf *m = NULL;
2128 				struct mbuf **mp = &m;
2129 				size_t ovalsize = sopt->sopt_valsize;
2130 				caddr_t oval = (caddr_t)sopt->sopt_val;
2131 
2132 				error = soopt_getm(sopt, &m); /* XXX */
2133 				if (error != 0)
2134 					break;
2135 				error = soopt_mcopyin(sopt, m); /* XXX */
2136 				if (error != 0)
2137 					break;
2138 				sopt->sopt_valsize = ovalsize;
2139 				sopt->sopt_val = oval;
2140 				if (m) {
2141 					req = mtod(m, caddr_t);
2142 					len = m->m_len;
2143 				}
2144 				error = ipsec_get_policy(in6p, req, len, mp);
2145 				if (error == 0)
2146 					error = soopt_mcopyout(sopt, m); /* XXX */
2147 				if (error == 0 && m)
2148 					m_freem(m);
2149 				break;
2150 			  }
2151 #endif /* IPSEC */
2152 
2153 			default:
2154 				error = ENOPROTOOPT;
2155 				break;
2156 			}
2157 			break;
2158 		}
2159 	}
2160 	return (error);
2161 }
2162 
2163 int
2164 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2165 {
2166 	int error = 0, optval, optlen;
2167 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2168 	struct inpcb *in6p = sotoinpcb(so);
2169 	int level, op, optname;
2170 
2171 	level = sopt->sopt_level;
2172 	op = sopt->sopt_dir;
2173 	optname = sopt->sopt_name;
2174 	optlen = sopt->sopt_valsize;
2175 
2176 	if (level != IPPROTO_IPV6) {
2177 		return (EINVAL);
2178 	}
2179 
2180 	switch (optname) {
2181 	case IPV6_CHECKSUM:
2182 		/*
2183 		 * For ICMPv6 sockets, no modification allowed for checksum
2184 		 * offset, permit "no change" values to help existing apps.
2185 		 *
2186 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2187 		 * for an ICMPv6 socket will fail."
2188 		 * The current behavior does not meet RFC3542.
2189 		 */
2190 		switch (op) {
2191 		case SOPT_SET:
2192 			if (optlen != sizeof(int)) {
2193 				error = EINVAL;
2194 				break;
2195 			}
2196 			error = sooptcopyin(sopt, &optval, sizeof(optval),
2197 					    sizeof(optval));
2198 			if (error)
2199 				break;
2200 			if ((optval % 2) != 0) {
2201 				/* the API assumes even offset values */
2202 				error = EINVAL;
2203 			} else if (so->so_proto->pr_protocol ==
2204 			    IPPROTO_ICMPV6) {
2205 				if (optval != icmp6off)
2206 					error = EINVAL;
2207 			} else
2208 				in6p->in6p_cksum = optval;
2209 			break;
2210 
2211 		case SOPT_GET:
2212 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2213 				optval = icmp6off;
2214 			else
2215 				optval = in6p->in6p_cksum;
2216 
2217 			error = sooptcopyout(sopt, &optval, sizeof(optval));
2218 			break;
2219 
2220 		default:
2221 			error = EINVAL;
2222 			break;
2223 		}
2224 		break;
2225 
2226 	default:
2227 		error = ENOPROTOOPT;
2228 		break;
2229 	}
2230 
2231 	return (error);
2232 }
2233 
2234 /*
2235  * Set up IP6 options in pcb for insertion in output packets or
2236  * specifying behavior of outgoing packets.
2237  */
2238 static int
2239 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2240     struct socket *so, struct sockopt *sopt)
2241 {
2242 	struct ip6_pktopts *opt = *pktopt;
2243 	int error = 0;
2244 	struct thread *td = sopt->sopt_td;
2245 
2246 	/* turn off any old options. */
2247 	if (opt) {
2248 #ifdef DIAGNOSTIC
2249 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2250 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2251 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2252 			printf("ip6_pcbopts: all specified options are cleared.\n");
2253 #endif
2254 		ip6_clearpktopts(opt, -1);
2255 	} else
2256 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2257 	*pktopt = NULL;
2258 
2259 	if (!m || m->m_len == 0) {
2260 		/*
2261 		 * Only turning off any previous options, regardless of
2262 		 * whether the opt is just created or given.
2263 		 */
2264 		free(opt, M_IP6OPT);
2265 		return (0);
2266 	}
2267 
2268 	/*  set options specified by user. */
2269 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2270 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2271 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2272 		free(opt, M_IP6OPT);
2273 		return (error);
2274 	}
2275 	*pktopt = opt;
2276 	return (0);
2277 }
2278 
2279 /*
2280  * initialize ip6_pktopts.  beware that there are non-zero default values in
2281  * the struct.
2282  */
2283 void
2284 ip6_initpktopts(struct ip6_pktopts *opt)
2285 {
2286 
2287 	bzero(opt, sizeof(*opt));
2288 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2289 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2290 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2291 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2292 }
2293 
2294 static int
2295 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2296     struct ucred *cred, int uproto)
2297 {
2298 	struct ip6_pktopts *opt;
2299 
2300 	if (*pktopt == NULL) {
2301 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2302 		    M_WAITOK);
2303 		ip6_initpktopts(*pktopt);
2304 	}
2305 	opt = *pktopt;
2306 
2307 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2308 }
2309 
2310 static int
2311 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2312 {
2313 	void *optdata = NULL;
2314 	int optdatalen = 0;
2315 	struct ip6_ext *ip6e;
2316 	int error = 0;
2317 	struct in6_pktinfo null_pktinfo;
2318 	int deftclass = 0, on;
2319 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2320 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2321 
2322 	switch (optname) {
2323 	case IPV6_PKTINFO:
2324 		if (pktopt && pktopt->ip6po_pktinfo)
2325 			optdata = (void *)pktopt->ip6po_pktinfo;
2326 		else {
2327 			/* XXX: we don't have to do this every time... */
2328 			bzero(&null_pktinfo, sizeof(null_pktinfo));
2329 			optdata = (void *)&null_pktinfo;
2330 		}
2331 		optdatalen = sizeof(struct in6_pktinfo);
2332 		break;
2333 	case IPV6_TCLASS:
2334 		if (pktopt && pktopt->ip6po_tclass >= 0)
2335 			optdata = (void *)&pktopt->ip6po_tclass;
2336 		else
2337 			optdata = (void *)&deftclass;
2338 		optdatalen = sizeof(int);
2339 		break;
2340 	case IPV6_HOPOPTS:
2341 		if (pktopt && pktopt->ip6po_hbh) {
2342 			optdata = (void *)pktopt->ip6po_hbh;
2343 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2344 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2345 		}
2346 		break;
2347 	case IPV6_RTHDR:
2348 		if (pktopt && pktopt->ip6po_rthdr) {
2349 			optdata = (void *)pktopt->ip6po_rthdr;
2350 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2351 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2352 		}
2353 		break;
2354 	case IPV6_RTHDRDSTOPTS:
2355 		if (pktopt && pktopt->ip6po_dest1) {
2356 			optdata = (void *)pktopt->ip6po_dest1;
2357 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2358 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2359 		}
2360 		break;
2361 	case IPV6_DSTOPTS:
2362 		if (pktopt && pktopt->ip6po_dest2) {
2363 			optdata = (void *)pktopt->ip6po_dest2;
2364 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2365 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2366 		}
2367 		break;
2368 	case IPV6_NEXTHOP:
2369 		if (pktopt && pktopt->ip6po_nexthop) {
2370 			optdata = (void *)pktopt->ip6po_nexthop;
2371 			optdatalen = pktopt->ip6po_nexthop->sa_len;
2372 		}
2373 		break;
2374 	case IPV6_USE_MIN_MTU:
2375 		if (pktopt)
2376 			optdata = (void *)&pktopt->ip6po_minmtu;
2377 		else
2378 			optdata = (void *)&defminmtu;
2379 		optdatalen = sizeof(int);
2380 		break;
2381 	case IPV6_DONTFRAG:
2382 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2383 			on = 1;
2384 		else
2385 			on = 0;
2386 		optdata = (void *)&on;
2387 		optdatalen = sizeof(on);
2388 		break;
2389 	case IPV6_PREFER_TEMPADDR:
2390 		if (pktopt)
2391 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2392 		else
2393 			optdata = (void *)&defpreftemp;
2394 		optdatalen = sizeof(int);
2395 		break;
2396 	default:		/* should not happen */
2397 #ifdef DIAGNOSTIC
2398 		panic("ip6_getpcbopt: unexpected option\n");
2399 #endif
2400 		return (ENOPROTOOPT);
2401 	}
2402 
2403 	error = sooptcopyout(sopt, optdata, optdatalen);
2404 
2405 	return (error);
2406 }
2407 
2408 void
2409 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2410 {
2411 	if (pktopt == NULL)
2412 		return;
2413 
2414 	if (optname == -1 || optname == IPV6_PKTINFO) {
2415 		if (pktopt->ip6po_pktinfo)
2416 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2417 		pktopt->ip6po_pktinfo = NULL;
2418 	}
2419 	if (optname == -1 || optname == IPV6_HOPLIMIT)
2420 		pktopt->ip6po_hlim = -1;
2421 	if (optname == -1 || optname == IPV6_TCLASS)
2422 		pktopt->ip6po_tclass = -1;
2423 	if (optname == -1 || optname == IPV6_NEXTHOP) {
2424 		if (pktopt->ip6po_nextroute.ro_rt) {
2425 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2426 			pktopt->ip6po_nextroute.ro_rt = NULL;
2427 		}
2428 		if (pktopt->ip6po_nexthop)
2429 			free(pktopt->ip6po_nexthop, M_IP6OPT);
2430 		pktopt->ip6po_nexthop = NULL;
2431 	}
2432 	if (optname == -1 || optname == IPV6_HOPOPTS) {
2433 		if (pktopt->ip6po_hbh)
2434 			free(pktopt->ip6po_hbh, M_IP6OPT);
2435 		pktopt->ip6po_hbh = NULL;
2436 	}
2437 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2438 		if (pktopt->ip6po_dest1)
2439 			free(pktopt->ip6po_dest1, M_IP6OPT);
2440 		pktopt->ip6po_dest1 = NULL;
2441 	}
2442 	if (optname == -1 || optname == IPV6_RTHDR) {
2443 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2444 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2445 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2446 		if (pktopt->ip6po_route.ro_rt) {
2447 			RTFREE(pktopt->ip6po_route.ro_rt);
2448 			pktopt->ip6po_route.ro_rt = NULL;
2449 		}
2450 	}
2451 	if (optname == -1 || optname == IPV6_DSTOPTS) {
2452 		if (pktopt->ip6po_dest2)
2453 			free(pktopt->ip6po_dest2, M_IP6OPT);
2454 		pktopt->ip6po_dest2 = NULL;
2455 	}
2456 }
2457 
2458 #define PKTOPT_EXTHDRCPY(type) \
2459 do {\
2460 	if (src->type) {\
2461 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2462 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2463 		if (dst->type == NULL && canwait == M_NOWAIT)\
2464 			goto bad;\
2465 		bcopy(src->type, dst->type, hlen);\
2466 	}\
2467 } while (/*CONSTCOND*/ 0)
2468 
2469 static int
2470 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2471 {
2472 	if (dst == NULL || src == NULL)  {
2473 		printf("ip6_clearpktopts: invalid argument\n");
2474 		return (EINVAL);
2475 	}
2476 
2477 	dst->ip6po_hlim = src->ip6po_hlim;
2478 	dst->ip6po_tclass = src->ip6po_tclass;
2479 	dst->ip6po_flags = src->ip6po_flags;
2480 	dst->ip6po_minmtu = src->ip6po_minmtu;
2481 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2482 	if (src->ip6po_pktinfo) {
2483 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2484 		    M_IP6OPT, canwait);
2485 		if (dst->ip6po_pktinfo == NULL)
2486 			goto bad;
2487 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2488 	}
2489 	if (src->ip6po_nexthop) {
2490 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2491 		    M_IP6OPT, canwait);
2492 		if (dst->ip6po_nexthop == NULL)
2493 			goto bad;
2494 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2495 		    src->ip6po_nexthop->sa_len);
2496 	}
2497 	PKTOPT_EXTHDRCPY(ip6po_hbh);
2498 	PKTOPT_EXTHDRCPY(ip6po_dest1);
2499 	PKTOPT_EXTHDRCPY(ip6po_dest2);
2500 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2501 	return (0);
2502 
2503   bad:
2504 	ip6_clearpktopts(dst, -1);
2505 	return (ENOBUFS);
2506 }
2507 #undef PKTOPT_EXTHDRCPY
2508 
2509 struct ip6_pktopts *
2510 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2511 {
2512 	int error;
2513 	struct ip6_pktopts *dst;
2514 
2515 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2516 	if (dst == NULL)
2517 		return (NULL);
2518 	ip6_initpktopts(dst);
2519 
2520 	if ((error = copypktopts(dst, src, canwait)) != 0) {
2521 		free(dst, M_IP6OPT);
2522 		return (NULL);
2523 	}
2524 
2525 	return (dst);
2526 }
2527 
2528 void
2529 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2530 {
2531 	if (pktopt == NULL)
2532 		return;
2533 
2534 	ip6_clearpktopts(pktopt, -1);
2535 
2536 	free(pktopt, M_IP6OPT);
2537 }
2538 
2539 /*
2540  * Set IPv6 outgoing packet options based on advanced API.
2541  */
2542 int
2543 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2544     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2545 {
2546 	struct cmsghdr *cm = 0;
2547 
2548 	if (control == NULL || opt == NULL)
2549 		return (EINVAL);
2550 
2551 	ip6_initpktopts(opt);
2552 	if (stickyopt) {
2553 		int error;
2554 
2555 		/*
2556 		 * If stickyopt is provided, make a local copy of the options
2557 		 * for this particular packet, then override them by ancillary
2558 		 * objects.
2559 		 * XXX: copypktopts() does not copy the cached route to a next
2560 		 * hop (if any).  This is not very good in terms of efficiency,
2561 		 * but we can allow this since this option should be rarely
2562 		 * used.
2563 		 */
2564 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2565 			return (error);
2566 	}
2567 
2568 	/*
2569 	 * XXX: Currently, we assume all the optional information is stored
2570 	 * in a single mbuf.
2571 	 */
2572 	if (control->m_next)
2573 		return (EINVAL);
2574 
2575 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2576 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2577 		int error;
2578 
2579 		if (control->m_len < CMSG_LEN(0))
2580 			return (EINVAL);
2581 
2582 		cm = mtod(control, struct cmsghdr *);
2583 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2584 			return (EINVAL);
2585 		if (cm->cmsg_level != IPPROTO_IPV6)
2586 			continue;
2587 
2588 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2589 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2590 		if (error)
2591 			return (error);
2592 	}
2593 
2594 	return (0);
2595 }
2596 
2597 /*
2598  * Set a particular packet option, as a sticky option or an ancillary data
2599  * item.  "len" can be 0 only when it's a sticky option.
2600  * We have 4 cases of combination of "sticky" and "cmsg":
2601  * "sticky=0, cmsg=0": impossible
2602  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2603  * "sticky=1, cmsg=0": RFC3542 socket option
2604  * "sticky=1, cmsg=1": RFC2292 socket option
2605  */
2606 static int
2607 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2608     struct ucred *cred, int sticky, int cmsg, int uproto)
2609 {
2610 	int minmtupolicy, preftemp;
2611 	int error;
2612 
2613 	if (!sticky && !cmsg) {
2614 #ifdef DIAGNOSTIC
2615 		printf("ip6_setpktopt: impossible case\n");
2616 #endif
2617 		return (EINVAL);
2618 	}
2619 
2620 	/*
2621 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2622 	 * not be specified in the context of RFC3542.  Conversely,
2623 	 * RFC3542 types should not be specified in the context of RFC2292.
2624 	 */
2625 	if (!cmsg) {
2626 		switch (optname) {
2627 		case IPV6_2292PKTINFO:
2628 		case IPV6_2292HOPLIMIT:
2629 		case IPV6_2292NEXTHOP:
2630 		case IPV6_2292HOPOPTS:
2631 		case IPV6_2292DSTOPTS:
2632 		case IPV6_2292RTHDR:
2633 		case IPV6_2292PKTOPTIONS:
2634 			return (ENOPROTOOPT);
2635 		}
2636 	}
2637 	if (sticky && cmsg) {
2638 		switch (optname) {
2639 		case IPV6_PKTINFO:
2640 		case IPV6_HOPLIMIT:
2641 		case IPV6_NEXTHOP:
2642 		case IPV6_HOPOPTS:
2643 		case IPV6_DSTOPTS:
2644 		case IPV6_RTHDRDSTOPTS:
2645 		case IPV6_RTHDR:
2646 		case IPV6_USE_MIN_MTU:
2647 		case IPV6_DONTFRAG:
2648 		case IPV6_TCLASS:
2649 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2650 			return (ENOPROTOOPT);
2651 		}
2652 	}
2653 
2654 	switch (optname) {
2655 	case IPV6_2292PKTINFO:
2656 	case IPV6_PKTINFO:
2657 	{
2658 		struct ifnet *ifp = NULL;
2659 		struct in6_pktinfo *pktinfo;
2660 
2661 		if (len != sizeof(struct in6_pktinfo))
2662 			return (EINVAL);
2663 
2664 		pktinfo = (struct in6_pktinfo *)buf;
2665 
2666 		/*
2667 		 * An application can clear any sticky IPV6_PKTINFO option by
2668 		 * doing a "regular" setsockopt with ipi6_addr being
2669 		 * in6addr_any and ipi6_ifindex being zero.
2670 		 * [RFC 3542, Section 6]
2671 		 */
2672 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2673 		    pktinfo->ipi6_ifindex == 0 &&
2674 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2675 			ip6_clearpktopts(opt, optname);
2676 			break;
2677 		}
2678 
2679 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2680 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2681 			return (EINVAL);
2682 		}
2683 
2684 		/* validate the interface index if specified. */
2685 		if (pktinfo->ipi6_ifindex > V_if_index ||
2686 		    pktinfo->ipi6_ifindex < 0) {
2687 			 return (ENXIO);
2688 		}
2689 		if (pktinfo->ipi6_ifindex) {
2690 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2691 			if (ifp == NULL)
2692 				return (ENXIO);
2693 		}
2694 
2695 		/*
2696 		 * We store the address anyway, and let in6_selectsrc()
2697 		 * validate the specified address.  This is because ipi6_addr
2698 		 * may not have enough information about its scope zone, and
2699 		 * we may need additional information (such as outgoing
2700 		 * interface or the scope zone of a destination address) to
2701 		 * disambiguate the scope.
2702 		 * XXX: the delay of the validation may confuse the
2703 		 * application when it is used as a sticky option.
2704 		 */
2705 		if (opt->ip6po_pktinfo == NULL) {
2706 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2707 			    M_IP6OPT, M_NOWAIT);
2708 			if (opt->ip6po_pktinfo == NULL)
2709 				return (ENOBUFS);
2710 		}
2711 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2712 		break;
2713 	}
2714 
2715 	case IPV6_2292HOPLIMIT:
2716 	case IPV6_HOPLIMIT:
2717 	{
2718 		int *hlimp;
2719 
2720 		/*
2721 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2722 		 * to simplify the ordering among hoplimit options.
2723 		 */
2724 		if (optname == IPV6_HOPLIMIT && sticky)
2725 			return (ENOPROTOOPT);
2726 
2727 		if (len != sizeof(int))
2728 			return (EINVAL);
2729 		hlimp = (int *)buf;
2730 		if (*hlimp < -1 || *hlimp > 255)
2731 			return (EINVAL);
2732 
2733 		opt->ip6po_hlim = *hlimp;
2734 		break;
2735 	}
2736 
2737 	case IPV6_TCLASS:
2738 	{
2739 		int tclass;
2740 
2741 		if (len != sizeof(int))
2742 			return (EINVAL);
2743 		tclass = *(int *)buf;
2744 		if (tclass < -1 || tclass > 255)
2745 			return (EINVAL);
2746 
2747 		opt->ip6po_tclass = tclass;
2748 		break;
2749 	}
2750 
2751 	case IPV6_2292NEXTHOP:
2752 	case IPV6_NEXTHOP:
2753 		if (cred != NULL) {
2754 			error = priv_check_cred(cred,
2755 			    PRIV_NETINET_SETHDROPTS, 0);
2756 			if (error)
2757 				return (error);
2758 		}
2759 
2760 		if (len == 0) {	/* just remove the option */
2761 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
2762 			break;
2763 		}
2764 
2765 		/* check if cmsg_len is large enough for sa_len */
2766 		if (len < sizeof(struct sockaddr) || len < *buf)
2767 			return (EINVAL);
2768 
2769 		switch (((struct sockaddr *)buf)->sa_family) {
2770 		case AF_INET6:
2771 		{
2772 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2773 			int error;
2774 
2775 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2776 				return (EINVAL);
2777 
2778 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2779 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2780 				return (EINVAL);
2781 			}
2782 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
2783 			    != 0) {
2784 				return (error);
2785 			}
2786 			break;
2787 		}
2788 		case AF_LINK:	/* should eventually be supported */
2789 		default:
2790 			return (EAFNOSUPPORT);
2791 		}
2792 
2793 		/* turn off the previous option, then set the new option. */
2794 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
2795 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2796 		if (opt->ip6po_nexthop == NULL)
2797 			return (ENOBUFS);
2798 		bcopy(buf, opt->ip6po_nexthop, *buf);
2799 		break;
2800 
2801 	case IPV6_2292HOPOPTS:
2802 	case IPV6_HOPOPTS:
2803 	{
2804 		struct ip6_hbh *hbh;
2805 		int hbhlen;
2806 
2807 		/*
2808 		 * XXX: We don't allow a non-privileged user to set ANY HbH
2809 		 * options, since per-option restriction has too much
2810 		 * overhead.
2811 		 */
2812 		if (cred != NULL) {
2813 			error = priv_check_cred(cred,
2814 			    PRIV_NETINET_SETHDROPTS, 0);
2815 			if (error)
2816 				return (error);
2817 		}
2818 
2819 		if (len == 0) {
2820 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
2821 			break;	/* just remove the option */
2822 		}
2823 
2824 		/* message length validation */
2825 		if (len < sizeof(struct ip6_hbh))
2826 			return (EINVAL);
2827 		hbh = (struct ip6_hbh *)buf;
2828 		hbhlen = (hbh->ip6h_len + 1) << 3;
2829 		if (len != hbhlen)
2830 			return (EINVAL);
2831 
2832 		/* turn off the previous option, then set the new option. */
2833 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
2834 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2835 		if (opt->ip6po_hbh == NULL)
2836 			return (ENOBUFS);
2837 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
2838 
2839 		break;
2840 	}
2841 
2842 	case IPV6_2292DSTOPTS:
2843 	case IPV6_DSTOPTS:
2844 	case IPV6_RTHDRDSTOPTS:
2845 	{
2846 		struct ip6_dest *dest, **newdest = NULL;
2847 		int destlen;
2848 
2849 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
2850 			error = priv_check_cred(cred,
2851 			    PRIV_NETINET_SETHDROPTS, 0);
2852 			if (error)
2853 				return (error);
2854 		}
2855 
2856 		if (len == 0) {
2857 			ip6_clearpktopts(opt, optname);
2858 			break;	/* just remove the option */
2859 		}
2860 
2861 		/* message length validation */
2862 		if (len < sizeof(struct ip6_dest))
2863 			return (EINVAL);
2864 		dest = (struct ip6_dest *)buf;
2865 		destlen = (dest->ip6d_len + 1) << 3;
2866 		if (len != destlen)
2867 			return (EINVAL);
2868 
2869 		/*
2870 		 * Determine the position that the destination options header
2871 		 * should be inserted; before or after the routing header.
2872 		 */
2873 		switch (optname) {
2874 		case IPV6_2292DSTOPTS:
2875 			/*
2876 			 * The old advacned API is ambiguous on this point.
2877 			 * Our approach is to determine the position based
2878 			 * according to the existence of a routing header.
2879 			 * Note, however, that this depends on the order of the
2880 			 * extension headers in the ancillary data; the 1st
2881 			 * part of the destination options header must appear
2882 			 * before the routing header in the ancillary data,
2883 			 * too.
2884 			 * RFC3542 solved the ambiguity by introducing
2885 			 * separate ancillary data or option types.
2886 			 */
2887 			if (opt->ip6po_rthdr == NULL)
2888 				newdest = &opt->ip6po_dest1;
2889 			else
2890 				newdest = &opt->ip6po_dest2;
2891 			break;
2892 		case IPV6_RTHDRDSTOPTS:
2893 			newdest = &opt->ip6po_dest1;
2894 			break;
2895 		case IPV6_DSTOPTS:
2896 			newdest = &opt->ip6po_dest2;
2897 			break;
2898 		}
2899 
2900 		/* turn off the previous option, then set the new option. */
2901 		ip6_clearpktopts(opt, optname);
2902 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
2903 		if (*newdest == NULL)
2904 			return (ENOBUFS);
2905 		bcopy(dest, *newdest, destlen);
2906 
2907 		break;
2908 	}
2909 
2910 	case IPV6_2292RTHDR:
2911 	case IPV6_RTHDR:
2912 	{
2913 		struct ip6_rthdr *rth;
2914 		int rthlen;
2915 
2916 		if (len == 0) {
2917 			ip6_clearpktopts(opt, IPV6_RTHDR);
2918 			break;	/* just remove the option */
2919 		}
2920 
2921 		/* message length validation */
2922 		if (len < sizeof(struct ip6_rthdr))
2923 			return (EINVAL);
2924 		rth = (struct ip6_rthdr *)buf;
2925 		rthlen = (rth->ip6r_len + 1) << 3;
2926 		if (len != rthlen)
2927 			return (EINVAL);
2928 
2929 		switch (rth->ip6r_type) {
2930 		case IPV6_RTHDR_TYPE_0:
2931 			if (rth->ip6r_len == 0)	/* must contain one addr */
2932 				return (EINVAL);
2933 			if (rth->ip6r_len % 2) /* length must be even */
2934 				return (EINVAL);
2935 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2936 				return (EINVAL);
2937 			break;
2938 		default:
2939 			return (EINVAL);	/* not supported */
2940 		}
2941 
2942 		/* turn off the previous option */
2943 		ip6_clearpktopts(opt, IPV6_RTHDR);
2944 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
2945 		if (opt->ip6po_rthdr == NULL)
2946 			return (ENOBUFS);
2947 		bcopy(rth, opt->ip6po_rthdr, rthlen);
2948 
2949 		break;
2950 	}
2951 
2952 	case IPV6_USE_MIN_MTU:
2953 		if (len != sizeof(int))
2954 			return (EINVAL);
2955 		minmtupolicy = *(int *)buf;
2956 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
2957 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
2958 		    minmtupolicy != IP6PO_MINMTU_ALL) {
2959 			return (EINVAL);
2960 		}
2961 		opt->ip6po_minmtu = minmtupolicy;
2962 		break;
2963 
2964 	case IPV6_DONTFRAG:
2965 		if (len != sizeof(int))
2966 			return (EINVAL);
2967 
2968 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
2969 			/*
2970 			 * we ignore this option for TCP sockets.
2971 			 * (RFC3542 leaves this case unspecified.)
2972 			 */
2973 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
2974 		} else
2975 			opt->ip6po_flags |= IP6PO_DONTFRAG;
2976 		break;
2977 
2978 	case IPV6_PREFER_TEMPADDR:
2979 		if (len != sizeof(int))
2980 			return (EINVAL);
2981 		preftemp = *(int *)buf;
2982 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
2983 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
2984 		    preftemp != IP6PO_TEMPADDR_PREFER) {
2985 			return (EINVAL);
2986 		}
2987 		opt->ip6po_prefer_tempaddr = preftemp;
2988 		break;
2989 
2990 	default:
2991 		return (ENOPROTOOPT);
2992 	} /* end of switch */
2993 
2994 	return (0);
2995 }
2996 
2997 /*
2998  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2999  * packet to the input queue of a specified interface.  Note that this
3000  * calls the output routine of the loopback "driver", but with an interface
3001  * pointer that might NOT be &loif -- easier than replicating that code here.
3002  */
3003 void
3004 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3005 {
3006 	struct mbuf *copym;
3007 	struct ip6_hdr *ip6;
3008 
3009 	copym = m_copy(m, 0, M_COPYALL);
3010 	if (copym == NULL)
3011 		return;
3012 
3013 	/*
3014 	 * Make sure to deep-copy IPv6 header portion in case the data
3015 	 * is in an mbuf cluster, so that we can safely override the IPv6
3016 	 * header portion later.
3017 	 */
3018 	if ((copym->m_flags & M_EXT) != 0 ||
3019 	    copym->m_len < sizeof(struct ip6_hdr)) {
3020 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3021 		if (copym == NULL)
3022 			return;
3023 	}
3024 
3025 #ifdef DIAGNOSTIC
3026 	if (copym->m_len < sizeof(*ip6)) {
3027 		m_freem(copym);
3028 		return;
3029 	}
3030 #endif
3031 
3032 	ip6 = mtod(copym, struct ip6_hdr *);
3033 	/*
3034 	 * clear embedded scope identifiers if necessary.
3035 	 * in6_clearscope will touch the addresses only when necessary.
3036 	 */
3037 	in6_clearscope(&ip6->ip6_src);
3038 	in6_clearscope(&ip6->ip6_dst);
3039 
3040 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3041 }
3042 
3043 /*
3044  * Chop IPv6 header off from the payload.
3045  */
3046 static int
3047 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3048 {
3049 	struct mbuf *mh;
3050 	struct ip6_hdr *ip6;
3051 
3052 	ip6 = mtod(m, struct ip6_hdr *);
3053 	if (m->m_len > sizeof(*ip6)) {
3054 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3055 		if (mh == 0) {
3056 			m_freem(m);
3057 			return ENOBUFS;
3058 		}
3059 		M_MOVE_PKTHDR(mh, m);
3060 		MH_ALIGN(mh, sizeof(*ip6));
3061 		m->m_len -= sizeof(*ip6);
3062 		m->m_data += sizeof(*ip6);
3063 		mh->m_next = m;
3064 		m = mh;
3065 		m->m_len = sizeof(*ip6);
3066 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3067 	}
3068 	exthdrs->ip6e_ip6 = m;
3069 	return 0;
3070 }
3071 
3072 /*
3073  * Compute IPv6 extension header length.
3074  */
3075 int
3076 ip6_optlen(struct inpcb *in6p)
3077 {
3078 	int len;
3079 
3080 	if (!in6p->in6p_outputopts)
3081 		return 0;
3082 
3083 	len = 0;
3084 #define elen(x) \
3085     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3086 
3087 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3088 	if (in6p->in6p_outputopts->ip6po_rthdr)
3089 		/* dest1 is valid with rthdr only */
3090 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3091 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3092 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3093 	return len;
3094 #undef elen
3095 }
3096