xref: /freebsd/sys/netinet6/ip6_output.c (revision 0f8f86b71f022b803e99151c19db81b280f245dc)
1 /*	$FreeBSD$	*/
2 /*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66  */
67 
68 #include "opt_ip6fw.h"
69 #include "opt_inet.h"
70 #include "opt_inet6.h"
71 #include "opt_ipsec.h"
72 #include "opt_pfil_hooks.h"
73 #include "opt_random_ip_id.h"
74 
75 #include <sys/param.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/proc.h>
79 #include <sys/errno.h>
80 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/systm.h>
84 #include <sys/kernel.h>
85 
86 #include <net/if.h>
87 #include <net/route.h>
88 #ifdef PFIL_HOOKS
89 #include <net/pfil.h>
90 #endif
91 
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet6/in6_var.h>
95 #include <netinet/ip6.h>
96 #include <netinet/icmp6.h>
97 #include <netinet6/ip6_var.h>
98 #include <netinet/in_pcb.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet6/nd6.h>
101 
102 #ifdef IPSEC
103 #include <netinet6/ipsec.h>
104 #ifdef INET6
105 #include <netinet6/ipsec6.h>
106 #endif
107 #include <netkey/key.h>
108 #endif /* IPSEC */
109 
110 #ifdef FAST_IPSEC
111 #include <netipsec/ipsec.h>
112 #include <netipsec/ipsec6.h>
113 #include <netipsec/key.h>
114 #endif /* FAST_IPSEC */
115 
116 #include <netinet6/ip6_fw.h>
117 
118 #include <net/net_osdep.h>
119 
120 #include <netinet6/ip6protosw.h>
121 
122 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
123 
124 struct ip6_exthdrs {
125 	struct mbuf *ip6e_ip6;
126 	struct mbuf *ip6e_hbh;
127 	struct mbuf *ip6e_dest1;
128 	struct mbuf *ip6e_rthdr;
129 	struct mbuf *ip6e_dest2;
130 };
131 
132 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
133 			   int, int));
134 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
135 	struct socket *, struct sockopt *));
136 static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct sockopt *));
137 static int ip6_setpktoption __P((int, u_char *, int, struct ip6_pktopts *, int,
138 	int, int, int));
139 
140 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
141 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
142 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
143 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
144 	struct ip6_frag **));
145 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
146 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
147 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
148 	struct ifnet *, struct in6_addr *, u_long *, int *));
149 
150 
151 /*
152  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
153  * header (with pri, len, nxt, hlim, src, dst).
154  * This function may modify ver and hlim only.
155  * The mbuf chain containing the packet will be freed.
156  * The mbuf opt, if present, will not be freed.
157  *
158  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
159  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
160  * which is rt_rmx.rmx_mtu.
161  */
162 int
163 ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
164 	struct mbuf *m0;
165 	struct ip6_pktopts *opt;
166 	struct route_in6 *ro;
167 	int flags;
168 	struct ip6_moptions *im6o;
169 	struct ifnet **ifpp;		/* XXX: just for statistics */
170 	struct inpcb *inp;
171 {
172 	struct ip6_hdr *ip6, *mhip6;
173 	struct ifnet *ifp, *origifp;
174 	struct mbuf *m = m0;
175 	int hlen, tlen, len, off;
176 	struct route_in6 ip6route;
177 	struct sockaddr_in6 *dst;
178 	int error = 0;
179 	struct in6_ifaddr *ia = NULL;
180 	u_long mtu;
181 	int alwaysfrag, dontfrag;
182 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
183 	struct ip6_exthdrs exthdrs;
184 	struct in6_addr finaldst;
185 	struct route_in6 *ro_pmtu = NULL;
186 	int hdrsplit = 0;
187 	int needipsec = 0;
188 #if defined(IPSEC) || defined(FAST_IPSEC)
189 	int needipsectun = 0;
190 	struct secpolicy *sp = NULL;
191 #endif /*IPSEC || FAST_IPSEC*/
192 
193 	ip6 = mtod(m, struct ip6_hdr *);
194 	finaldst = ip6->ip6_dst;
195 
196 #define MAKE_EXTHDR(hp, mp)						\
197     do {								\
198 	if (hp) {							\
199 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
200 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
201 		    ((eh)->ip6e_len + 1) << 3);				\
202 		if (error)						\
203 			goto freehdrs;					\
204 	}								\
205     } while (/*CONSTCOND*/ 0)
206 
207 	bzero(&exthdrs, sizeof(exthdrs));
208 
209 	if (opt) {
210 		/* Hop-by-Hop options header */
211 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
212 		/* Destination options header(1st part) */
213 		if (opt->ip6po_rthdr) {
214 			/*
215 			 * Destination options header(1st part)
216 			 * This only makes sence with a routing header.
217 			 * See Section 9.2 of RFC 3542.
218 			 * Disabling this part just for MIP6 convenience is
219 			 * a bad idea.  We need to think carefully about a
220 			 * way to make the advanced API coexist with MIP6
221 			 * options, which might automatically be inserted in
222 			 * the kernel.
223 			 */
224 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
225 		}
226 		/* Routing header */
227 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
228 		/* Destination options header(2nd part) */
229 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
230 	}
231 
232 #ifdef IPSEC
233 	/* get a security policy for this packet */
234 	if (inp == NULL)
235 		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
236 	else
237 		sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
238 
239 	if (sp == NULL) {
240 		ipsec6stat.out_inval++;
241 		goto freehdrs;
242 	}
243 
244 	error = 0;
245 
246 	/* check policy */
247 	switch (sp->policy) {
248 	case IPSEC_POLICY_DISCARD:
249 		/*
250 		 * This packet is just discarded.
251 		 */
252 		ipsec6stat.out_polvio++;
253 		goto freehdrs;
254 
255 	case IPSEC_POLICY_BYPASS:
256 	case IPSEC_POLICY_NONE:
257 		/* no need to do IPsec. */
258 		needipsec = 0;
259 		break;
260 
261 	case IPSEC_POLICY_IPSEC:
262 		if (sp->req == NULL) {
263 			/* acquire a policy */
264 			error = key_spdacquire(sp);
265 			goto freehdrs;
266 		}
267 		needipsec = 1;
268 		break;
269 
270 	case IPSEC_POLICY_ENTRUST:
271 	default:
272 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
273 	}
274 #endif /* IPSEC */
275 #ifdef FAST_IPSEC
276 	/* get a security policy for this packet */
277 	if (inp == NULL)
278 		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
279 	else
280 		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
281 
282 	if (sp == NULL) {
283 		newipsecstat.ips_out_inval++;
284 		goto freehdrs;
285 	}
286 
287 	error = 0;
288 
289 	/* check policy */
290 	switch (sp->policy) {
291 	case IPSEC_POLICY_DISCARD:
292 		/*
293 		 * This packet is just discarded.
294 		 */
295 		newipsecstat.ips_out_polvio++;
296 		goto freehdrs;
297 
298 	case IPSEC_POLICY_BYPASS:
299 	case IPSEC_POLICY_NONE:
300 		/* no need to do IPsec. */
301 		needipsec = 0;
302 		break;
303 
304 	case IPSEC_POLICY_IPSEC:
305 		if (sp->req == NULL) {
306 			/* acquire a policy */
307 			error = key_spdacquire(sp);
308 			goto freehdrs;
309 		}
310 		needipsec = 1;
311 		break;
312 
313 	case IPSEC_POLICY_ENTRUST:
314 	default:
315 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
316 	}
317 #endif /* FAST_IPSEC */
318 
319 	/*
320 	 * Calculate the total length of the extension header chain.
321 	 * Keep the length of the unfragmentable part for fragmentation.
322 	 */
323 	optlen = 0;
324 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
325 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
326 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
327 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
328 	/* NOTE: we don't add AH/ESP length here. do that later. */
329 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
330 
331 	/*
332 	 * If we need IPsec, or there is at least one extension header,
333 	 * separate IP6 header from the payload.
334 	 */
335 	if ((needipsec || optlen) && !hdrsplit) {
336 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
337 			m = NULL;
338 			goto freehdrs;
339 		}
340 		m = exthdrs.ip6e_ip6;
341 		hdrsplit++;
342 	}
343 
344 	/* adjust pointer */
345 	ip6 = mtod(m, struct ip6_hdr *);
346 
347 	/* adjust mbuf packet header length */
348 	m->m_pkthdr.len += optlen;
349 	plen = m->m_pkthdr.len - sizeof(*ip6);
350 
351 	/* If this is a jumbo payload, insert a jumbo payload option. */
352 	if (plen > IPV6_MAXPACKET) {
353 		if (!hdrsplit) {
354 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
355 				m = NULL;
356 				goto freehdrs;
357 			}
358 			m = exthdrs.ip6e_ip6;
359 			hdrsplit++;
360 		}
361 		/* adjust pointer */
362 		ip6 = mtod(m, struct ip6_hdr *);
363 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
364 			goto freehdrs;
365 		ip6->ip6_plen = 0;
366 	} else
367 		ip6->ip6_plen = htons(plen);
368 
369 	/*
370 	 * Concatenate headers and fill in next header fields.
371 	 * Here we have, on "m"
372 	 *	IPv6 payload
373 	 * and we insert headers accordingly.  Finally, we should be getting:
374 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
375 	 *
376 	 * during the header composing process, "m" points to IPv6 header.
377 	 * "mprev" points to an extension header prior to esp.
378 	 */
379 	{
380 		u_char *nexthdrp = &ip6->ip6_nxt;
381 		struct mbuf *mprev = m;
382 
383 		/*
384 		 * we treat dest2 specially.  this makes IPsec processing
385 		 * much easier.  the goal here is to make mprev point the
386 		 * mbuf prior to dest2.
387 		 *
388 		 * result: IPv6 dest2 payload
389 		 * m and mprev will point to IPv6 header.
390 		 */
391 		if (exthdrs.ip6e_dest2) {
392 			if (!hdrsplit)
393 				panic("assumption failed: hdr not split");
394 			exthdrs.ip6e_dest2->m_next = m->m_next;
395 			m->m_next = exthdrs.ip6e_dest2;
396 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
397 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
398 		}
399 
400 #define MAKE_CHAIN(m, mp, p, i)\
401     do {\
402 	if (m) {\
403 		if (!hdrsplit) \
404 			panic("assumption failed: hdr not split"); \
405 		*mtod((m), u_char *) = *(p);\
406 		*(p) = (i);\
407 		p = mtod((m), u_char *);\
408 		(m)->m_next = (mp)->m_next;\
409 		(mp)->m_next = (m);\
410 		(mp) = (m);\
411 	}\
412     } while (/*CONSTCOND*/ 0)
413 		/*
414 		 * result: IPv6 hbh dest1 rthdr dest2 payload
415 		 * m will point to IPv6 header.  mprev will point to the
416 		 * extension header prior to dest2 (rthdr in the above case).
417 		 */
418 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
419 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
420 		    IPPROTO_DSTOPTS);
421 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
422 		    IPPROTO_ROUTING);
423 
424 #if defined(IPSEC) || defined(FAST_IPSEC)
425 		if (!needipsec)
426 			goto skip_ipsec2;
427 
428 		/*
429 		 * pointers after IPsec headers are not valid any more.
430 		 * other pointers need a great care too.
431 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
432 		 */
433 		exthdrs.ip6e_dest2 = NULL;
434 
435 	    {
436 		struct ip6_rthdr *rh = NULL;
437 		int segleft_org = 0;
438 		struct ipsec_output_state state;
439 
440 		if (exthdrs.ip6e_rthdr) {
441 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
442 			segleft_org = rh->ip6r_segleft;
443 			rh->ip6r_segleft = 0;
444 		}
445 
446 		bzero(&state, sizeof(state));
447 		state.m = m;
448 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
449 		    &needipsectun);
450 		m = state.m;
451 		if (error) {
452 			/* mbuf is already reclaimed in ipsec6_output_trans. */
453 			m = NULL;
454 			switch (error) {
455 			case EHOSTUNREACH:
456 			case ENETUNREACH:
457 			case EMSGSIZE:
458 			case ENOBUFS:
459 			case ENOMEM:
460 				break;
461 			default:
462 				printf("ip6_output (ipsec): error code %d\n", error);
463 				/* FALLTHROUGH */
464 			case ENOENT:
465 				/* don't show these error codes to the user */
466 				error = 0;
467 				break;
468 			}
469 			goto bad;
470 		}
471 		if (exthdrs.ip6e_rthdr) {
472 			/* ah6_output doesn't modify mbuf chain */
473 			rh->ip6r_segleft = segleft_org;
474 		}
475 	    }
476 skip_ipsec2:;
477 #endif
478 	}
479 
480 	/*
481 	 * If there is a routing header, replace the destination address field
482 	 * with the first hop of the routing header.
483 	 */
484 	if (exthdrs.ip6e_rthdr) {
485 		struct ip6_rthdr *rh =
486 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
487 						  struct ip6_rthdr *));
488 		struct ip6_rthdr0 *rh0;
489 		struct in6_addr *addrs;
490 
491 		switch (rh->ip6r_type) {
492 		case IPV6_RTHDR_TYPE_0:
493 			 rh0 = (struct ip6_rthdr0 *)rh;
494 			 addrs = (struct in6_addr *)(rh0 + 1);
495 
496 			 ip6->ip6_dst = *addrs;
497 			 bcopy((caddr_t)(addrs + 1), (caddr_t)addrs,
498 			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
499 				 );
500 			 *(addrs + rh0->ip6r0_segleft - 1) = finaldst;
501 			 break;
502 		default:	/* is it possible? */
503 			 error = EINVAL;
504 			 goto bad;
505 		}
506 	}
507 
508 	/* Source address validation */
509 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
510 	    (flags & IPV6_DADOUTPUT) == 0) {
511 		error = EOPNOTSUPP;
512 		ip6stat.ip6s_badscope++;
513 		goto bad;
514 	}
515 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
516 		error = EOPNOTSUPP;
517 		ip6stat.ip6s_badscope++;
518 		goto bad;
519 	}
520 
521 	ip6stat.ip6s_localout++;
522 
523 	/*
524 	 * Route packet.
525 	 */
526 	if (ro == 0) {
527 		ro = &ip6route;
528 		bzero((caddr_t)ro, sizeof(*ro));
529 	}
530 	ro_pmtu = ro;
531 	if (opt && opt->ip6po_rthdr)
532 		ro = &opt->ip6po_route;
533 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
534 
535 	/*
536 	 * If there is a cached route,
537 	 * check that it is to the same destination
538 	 * and is still up. If not, free it and try again.
539 	 */
540 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
541 			 dst->sin6_family != AF_INET6 ||
542 			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
543 		RTFREE(ro->ro_rt);
544 		ro->ro_rt = (struct rtentry *)0;
545 	}
546 	if (ro->ro_rt == 0) {
547 		bzero(dst, sizeof(*dst));
548 		dst->sin6_family = AF_INET6;
549 		dst->sin6_len = sizeof(struct sockaddr_in6);
550 		dst->sin6_addr = ip6->ip6_dst;
551 	}
552 
553  	/*
554 	 * if specified, try to fill in the traffic class field.
555 	 * do not override if a non-zero value is already set.
556 	 * we check the diffserv field and the ecn field separately.
557 	 */
558 	if (opt && opt->ip6po_tclass >= 0) {
559 		int mask = 0;
560 
561 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
562 			mask |= 0xfc;
563 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
564 			mask |= 0x03;
565 		if (mask != 0)
566 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
567 	}
568 
569 	/* fill in or override the hop limit field, if necessary. */
570 	if (opt && opt->ip6po_hlim != -1)
571 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
572 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
573 		if (im6o != NULL)
574 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
575 		else
576 			ip6->ip6_hlim = ip6_defmcasthlim;
577 	}
578 
579 #if defined(IPSEC) || defined(FAST_IPSEC)
580 	if (needipsec && needipsectun) {
581 		struct ipsec_output_state state;
582 
583 		/*
584 		 * All the extension headers will become inaccessible
585 		 * (since they can be encrypted).
586 		 * Don't panic, we need no more updates to extension headers
587 		 * on inner IPv6 packet (since they are now encapsulated).
588 		 *
589 		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
590 		 */
591 		bzero(&exthdrs, sizeof(exthdrs));
592 		exthdrs.ip6e_ip6 = m;
593 
594 		bzero(&state, sizeof(state));
595 		state.m = m;
596 		state.ro = (struct route *)ro;
597 		state.dst = (struct sockaddr *)dst;
598 
599 		error = ipsec6_output_tunnel(&state, sp, flags);
600 
601 		m = state.m;
602 		ro = (struct route_in6 *)state.ro;
603 		dst = (struct sockaddr_in6 *)state.dst;
604 		if (error) {
605 			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
606 			m0 = m = NULL;
607 			m = NULL;
608 			switch (error) {
609 			case EHOSTUNREACH:
610 			case ENETUNREACH:
611 			case EMSGSIZE:
612 			case ENOBUFS:
613 			case ENOMEM:
614 				break;
615 			default:
616 				printf("ip6_output (ipsec): error code %d\n", error);
617 				/* FALLTHROUGH */
618 			case ENOENT:
619 				/* don't show these error codes to the user */
620 				error = 0;
621 				break;
622 			}
623 			goto bad;
624 		}
625 
626 		exthdrs.ip6e_ip6 = m;
627 	}
628 #endif /* IPSEC */
629 
630 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
631 		/* Unicast */
632 
633 #define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
634 #define sin6tosa(sin6)	((struct sockaddr *)(sin6))
635 		/* xxx
636 		 * interface selection comes here
637 		 * if an interface is specified from an upper layer,
638 		 * ifp must point it.
639 		 */
640 		if (ro->ro_rt == 0) {
641 			/*
642 			 * non-bsdi always clone routes, if parent is
643 			 * PRF_CLONING.
644 			 */
645 			rtalloc((struct route *)ro);
646 		}
647 		if (ro->ro_rt == 0) {
648 			ip6stat.ip6s_noroute++;
649 			error = EHOSTUNREACH;
650 			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
651 			goto bad;
652 		}
653 		/* XXX rt not locked */
654 		ia = ifatoia6(ro->ro_rt->rt_ifa);
655 		ifp = ro->ro_rt->rt_ifp;
656 		ro->ro_rt->rt_rmx.rmx_pksent++;
657 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
658 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
659 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
660 
661 		in6_ifstat_inc(ifp, ifs6_out_request);
662 
663 		/*
664 		 * Check if the outgoing interface conflicts with
665 		 * the interface specified by ifi6_ifindex (if specified).
666 		 * Note that loopback interface is always okay.
667 		 * (this may happen when we are sending a packet to one of
668 		 *  our own addresses.)
669 		 */
670 		if (opt && opt->ip6po_pktinfo
671 		 && opt->ip6po_pktinfo->ipi6_ifindex) {
672 			if (!(ifp->if_flags & IFF_LOOPBACK)
673 			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
674 				ip6stat.ip6s_noroute++;
675 				in6_ifstat_inc(ifp, ifs6_out_discard);
676 				error = EHOSTUNREACH;
677 				goto bad;
678 			}
679 		}
680 
681 		if (opt && opt->ip6po_hlim != -1)
682 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
683 	} else {
684 		/* Multicast */
685 		struct	in6_multi *in6m;
686 
687 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
688 
689 		/*
690 		 * See if the caller provided any multicast options
691 		 */
692 		ifp = NULL;
693 		if (im6o != NULL) {
694 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
695 			if (im6o->im6o_multicast_ifp != NULL)
696 				ifp = im6o->im6o_multicast_ifp;
697 		} else
698 			ip6->ip6_hlim = ip6_defmcasthlim;
699 
700 		/*
701 		 * See if the caller provided the outgoing interface
702 		 * as an ancillary data.
703 		 * Boundary check for ifindex is assumed to be already done.
704 		 */
705 		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
706 			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
707 
708 		/*
709 		 * If the destination is a node-local scope multicast,
710 		 * the packet should be loop-backed only.
711 		 */
712 		if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
713 			/*
714 			 * If the outgoing interface is already specified,
715 			 * it should be a loopback interface.
716 			 */
717 			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
718 				ip6stat.ip6s_badscope++;
719 				error = ENETUNREACH; /* XXX: better error? */
720 				/* XXX correct ifp? */
721 				in6_ifstat_inc(ifp, ifs6_out_discard);
722 				goto bad;
723 			} else {
724 				ifp = &loif[0];
725 			}
726 		}
727 
728 		if (opt && opt->ip6po_hlim != -1)
729 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
730 
731 		/*
732 		 * If caller did not provide an interface lookup a
733 		 * default in the routing table.  This is either a
734 		 * default for the speicfied group (i.e. a host
735 		 * route), or a multicast default (a route for the
736 		 * ``net'' ff00::/8).
737 		 */
738 		if (ifp == NULL) {
739 			if (ro->ro_rt == 0)
740 				ro->ro_rt = rtalloc1((struct sockaddr *)
741 						&ro->ro_dst, 0, 0UL);
742 			else
743 				RT_LOCK(ro->ro_rt);
744 			if (ro->ro_rt == 0) {
745 				ip6stat.ip6s_noroute++;
746 				error = EHOSTUNREACH;
747 				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
748 				goto bad;
749 			}
750 			ia = ifatoia6(ro->ro_rt->rt_ifa);
751 			ifp = ro->ro_rt->rt_ifp;
752 			ro->ro_rt->rt_rmx.rmx_pksent++;
753 			RT_UNLOCK(ro->ro_rt);
754 		}
755 
756 		if ((flags & IPV6_FORWARDING) == 0)
757 			in6_ifstat_inc(ifp, ifs6_out_request);
758 		in6_ifstat_inc(ifp, ifs6_out_mcast);
759 
760 		/*
761 		 * Confirm that the outgoing interface supports multicast.
762 		 */
763 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
764 			ip6stat.ip6s_noroute++;
765 			in6_ifstat_inc(ifp, ifs6_out_discard);
766 			error = ENETUNREACH;
767 			goto bad;
768 		}
769 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
770 		if (in6m != NULL &&
771 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
772 			/*
773 			 * If we belong to the destination multicast group
774 			 * on the outgoing interface, and the caller did not
775 			 * forbid loopback, loop back a copy.
776 			 */
777 			ip6_mloopback(ifp, m, dst);
778 		} else {
779 			/*
780 			 * If we are acting as a multicast router, perform
781 			 * multicast forwarding as if the packet had just
782 			 * arrived on the interface to which we are about
783 			 * to send.  The multicast forwarding function
784 			 * recursively calls this function, using the
785 			 * IPV6_FORWARDING flag to prevent infinite recursion.
786 			 *
787 			 * Multicasts that are looped back by ip6_mloopback(),
788 			 * above, will be forwarded by the ip6_input() routine,
789 			 * if necessary.
790 			 */
791 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
792 				if (ip6_mforward(ip6, ifp, m) != 0) {
793 					m_freem(m);
794 					goto done;
795 				}
796 			}
797 		}
798 		/*
799 		 * Multicasts with a hoplimit of zero may be looped back,
800 		 * above, but must not be transmitted on a network.
801 		 * Also, multicasts addressed to the loopback interface
802 		 * are not sent -- the above call to ip6_mloopback() will
803 		 * loop back a copy if this host actually belongs to the
804 		 * destination group on the loopback interface.
805 		 */
806 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
807 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
808 			m_freem(m);
809 			goto done;
810 		}
811 	}
812 
813 	/*
814 	 * Fill the outgoing inteface to tell the upper layer
815 	 * to increment per-interface statistics.
816 	 */
817 	if (ifpp)
818 		*ifpp = ifp;
819 
820 	/* Determine path MTU. */
821 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
822 	    &alwaysfrag)) != 0)
823 		goto bad;
824 
825 	/*
826 	 * The caller of this function may specify to use the minimum MTU
827 	 * in some cases.
828 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
829 	 * setting.  The logic is a bit complicated; by default, unicast
830 	 * packets will follow path MTU while multicast packets will be sent at
831 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
832 	 * including unicast ones will be sent at the minimum MTU.  Multicast
833 	 * packets will always be sent at the minimum MTU unless
834 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
835 	 * See RFC 3542 for more details.
836 	 */
837 	if (mtu > IPV6_MMTU) {
838 		if ((flags & IPV6_MINMTU))
839 			mtu = IPV6_MMTU;
840 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
841 			mtu = IPV6_MMTU;
842 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
843 			 (opt == NULL ||
844 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
845 			mtu = IPV6_MMTU;
846 		}
847 	}
848 
849 	/* Fake scoped addresses */
850 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
851 		/*
852 		 * If source or destination address is a scoped address, and
853 		 * the packet is going to be sent to a loopback interface,
854 		 * we should keep the original interface.
855 		 */
856 
857 		/*
858 		 * XXX: this is a very experimental and temporary solution.
859 		 * We eventually have sockaddr_in6 and use the sin6_scope_id
860 		 * field of the structure here.
861 		 * We rely on the consistency between two scope zone ids
862 		 * of source and destination, which should already be assured.
863 		 * Larger scopes than link will be supported in the future.
864 		 */
865 		origifp = NULL;
866 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
867 			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
868 		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
869 			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
870 		/*
871 		 * XXX: origifp can be NULL even in those two cases above.
872 		 * For example, if we remove the (only) link-local address
873 		 * from the loopback interface, and try to send a link-local
874 		 * address without link-id information.  Then the source
875 		 * address is ::1, and the destination address is the
876 		 * link-local address with its s6_addr16[1] being zero.
877 		 * What is worse, if the packet goes to the loopback interface
878 		 * by a default rejected route, the null pointer would be
879 		 * passed to looutput, and the kernel would hang.
880 		 * The following last resort would prevent such disaster.
881 		 */
882 		if (origifp == NULL)
883 			origifp = ifp;
884 	}
885 	else
886 		origifp = ifp;
887 	/*
888 	 * clear embedded scope identifiers if necessary.
889 	 * in6_clearscope will touch the addresses only when necessary.
890 	 */
891 	in6_clearscope(&ip6->ip6_src);
892 	in6_clearscope(&ip6->ip6_dst);
893 
894 	/*
895 	 * Check with the firewall...
896 	 */
897 	if (ip6_fw_enable && ip6_fw_chk_ptr) {
898 		u_short port = 0;
899 		m->m_pkthdr.rcvif = NULL;	/* XXX */
900 		/* If ipfw says divert, we have to just drop packet */
901 		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
902 			m_freem(m);
903 			goto done;
904 		}
905 		if (!m) {
906 			error = EACCES;
907 			goto done;
908 		}
909 	}
910 
911 	/*
912 	 * If the outgoing packet contains a hop-by-hop options header,
913 	 * it must be examined and processed even by the source node.
914 	 * (RFC 2460, section 4.)
915 	 */
916 	if (exthdrs.ip6e_hbh) {
917 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
918 		u_int32_t dummy1; /* XXX unused */
919 		u_int32_t dummy2; /* XXX unused */
920 
921 #ifdef DIAGNOSTIC
922 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
923 			panic("ip6e_hbh is not continuous");
924 #endif
925 		/*
926 		 *  XXX: if we have to send an ICMPv6 error to the sender,
927 		 *       we need the M_LOOP flag since icmp6_error() expects
928 		 *       the IPv6 and the hop-by-hop options header are
929 		 *       continuous unless the flag is set.
930 		 */
931 		m->m_flags |= M_LOOP;
932 		m->m_pkthdr.rcvif = ifp;
933 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
934 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
935 		    &dummy1, &dummy2) < 0) {
936 			/* m was already freed at this point */
937 			error = EINVAL;/* better error? */
938 			goto done;
939 		}
940 		m->m_flags &= ~M_LOOP; /* XXX */
941 		m->m_pkthdr.rcvif = NULL;
942 	}
943 
944 #ifdef PFIL_HOOKS
945 	/*
946 	 * Run through list of hooks for output packets.
947 	 */
948 	error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
949 	if (error != 0 || m == NULL)
950 		goto done;
951 	ip6 = mtod(m, struct ip6_hdr *);
952 #endif /* PFIL_HOOKS */
953 
954 	/*
955 	 * Send the packet to the outgoing interface.
956 	 * If necessary, do IPv6 fragmentation before sending.
957 	 *
958 	 * the logic here is rather complex:
959 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
960 	 * 1-a:	send as is if tlen <= path mtu
961 	 * 1-b:	fragment if tlen > path mtu
962 	 *
963 	 * 2: if user asks us not to fragment (dontfrag == 1)
964 	 * 2-a:	send as is if tlen <= interface mtu
965 	 * 2-b:	error if tlen > interface mtu
966 	 *
967 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
968 	 *	always fragment
969 	 *
970 	 * 4: if dontfrag == 1 && alwaysfrag == 1
971 	 *	error, as we cannot handle this conflicting request
972 	 */
973 	tlen = m->m_pkthdr.len;
974 
975 	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
976 		dontfrag = 1;
977 	else
978 		dontfrag = 0;
979 	if (dontfrag && alwaysfrag) {	/* case 4 */
980 		/* conflicting request - can't transmit */
981 		error = EMSGSIZE;
982 		goto bad;
983 	}
984 	if (dontfrag && tlen > IN6_LINKMTU(ifp)) {	/* case 2-b */
985 		/*
986 		 * Even if the DONTFRAG option is specified, we cannot send the
987 		 * packet when the data length is larger than the MTU of the
988 		 * outgoing interface.
989 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
990 		 * well as returning an error code (the latter is not described
991 		 * in the API spec.)
992 		 */
993 		u_int32_t mtu32;
994 		struct ip6ctlparam ip6cp;
995 
996 		mtu32 = (u_int32_t)mtu;
997 		bzero(&ip6cp, sizeof(ip6cp));
998 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
999 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1000 		    (void *)&ip6cp);
1001 
1002 		error = EMSGSIZE;
1003 		goto bad;
1004 	}
1005 
1006 	/*
1007 	 * transmit packet without fragmentation
1008 	 */
1009 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
1010 		struct in6_ifaddr *ia6;
1011 
1012 		ip6 = mtod(m, struct ip6_hdr *);
1013 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1014 		if (ia6) {
1015 			/* Record statistics for this interface address. */
1016 			ia6->ia_ifa.if_opackets++;
1017 			ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1018 		}
1019 #ifdef IPSEC
1020 		/* clean ipsec history once it goes out of the node */
1021 		ipsec_delaux(m);
1022 #endif
1023 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1024 		goto done;
1025 	}
1026 
1027 	/*
1028 	 * try to fragment the packet.  case 1-b and 3
1029 	 */
1030 	if (mtu < IPV6_MMTU) {
1031 		/* path MTU cannot be less than IPV6_MMTU */
1032 		error = EMSGSIZE;
1033 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1034 		goto bad;
1035 	} else if (ip6->ip6_plen == 0) {
1036 		/* jumbo payload cannot be fragmented */
1037 		error = EMSGSIZE;
1038 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
1039 		goto bad;
1040 	} else {
1041 		struct mbuf **mnext, *m_frgpart;
1042 		struct ip6_frag *ip6f;
1043 #ifdef RANDOM_IP_ID
1044 		u_int32_t id = htonl(ip6_randomid());
1045 #else
1046 		u_int32_t id = htonl(ip6_id++);
1047 #endif
1048 		u_char nextproto;
1049 		struct ip6ctlparam ip6cp;
1050 		u_int32_t mtu32;
1051 
1052 		/*
1053 		 * Too large for the destination or interface;
1054 		 * fragment if possible.
1055 		 * Must be able to put at least 8 bytes per fragment.
1056 		 */
1057 		hlen = unfragpartlen;
1058 		if (mtu > IPV6_MAXPACKET)
1059 			mtu = IPV6_MAXPACKET;
1060 
1061 		/* Notify a proper path MTU to applications. */
1062 		mtu32 = (u_int32_t)mtu;
1063 		bzero(&ip6cp, sizeof(ip6cp));
1064 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
1065 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1066 		    (void *)&ip6cp);
1067 
1068 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1069 		if (len < 8) {
1070 			error = EMSGSIZE;
1071 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
1072 			goto bad;
1073 		}
1074 
1075 		mnext = &m->m_nextpkt;
1076 
1077 		/*
1078 		 * Change the next header field of the last header in the
1079 		 * unfragmentable part.
1080 		 */
1081 		if (exthdrs.ip6e_rthdr) {
1082 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1083 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1084 		} else if (exthdrs.ip6e_dest1) {
1085 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1086 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1087 		} else if (exthdrs.ip6e_hbh) {
1088 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1089 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1090 		} else {
1091 			nextproto = ip6->ip6_nxt;
1092 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
1093 		}
1094 
1095 		/*
1096 		 * Loop through length of segment after first fragment,
1097 		 * make new header and copy data of each part and link onto
1098 		 * chain.
1099 		 */
1100 		m0 = m;
1101 		for (off = hlen; off < tlen; off += len) {
1102 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
1103 			if (!m) {
1104 				error = ENOBUFS;
1105 				ip6stat.ip6s_odropped++;
1106 				goto sendorfree;
1107 			}
1108 			m->m_pkthdr.rcvif = NULL;
1109 			m->m_flags = m0->m_flags & M_COPYFLAGS;
1110 			*mnext = m;
1111 			mnext = &m->m_nextpkt;
1112 			m->m_data += max_linkhdr;
1113 			mhip6 = mtod(m, struct ip6_hdr *);
1114 			*mhip6 = *ip6;
1115 			m->m_len = sizeof(*mhip6);
1116 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1117 			if (error) {
1118 				ip6stat.ip6s_odropped++;
1119 				goto sendorfree;
1120 			}
1121 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1122 			if (off + len >= tlen)
1123 				len = tlen - off;
1124 			else
1125 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1126 			mhip6->ip6_plen = htons((u_short)(len + hlen +
1127 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1128 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1129 				error = ENOBUFS;
1130 				ip6stat.ip6s_odropped++;
1131 				goto sendorfree;
1132 			}
1133 			m_cat(m, m_frgpart);
1134 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1135 			m->m_pkthdr.rcvif = (struct ifnet *)0;
1136 			ip6f->ip6f_reserved = 0;
1137 			ip6f->ip6f_ident = id;
1138 			ip6f->ip6f_nxt = nextproto;
1139 			ip6stat.ip6s_ofragments++;
1140 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1141 		}
1142 
1143 		in6_ifstat_inc(ifp, ifs6_out_fragok);
1144 	}
1145 
1146 	/*
1147 	 * Remove leading garbages.
1148 	 */
1149 sendorfree:
1150 	m = m0->m_nextpkt;
1151 	m0->m_nextpkt = 0;
1152 	m_freem(m0);
1153 	for (m0 = m; m; m = m0) {
1154 		m0 = m->m_nextpkt;
1155 		m->m_nextpkt = 0;
1156 		if (error == 0) {
1157  			/* Record statistics for this interface address. */
1158  			if (ia) {
1159  				ia->ia_ifa.if_opackets++;
1160  				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1161  			}
1162 #ifdef IPSEC
1163 			/* clean ipsec history once it goes out of the node */
1164 			ipsec_delaux(m);
1165 #endif
1166 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1167 		} else
1168 			m_freem(m);
1169 	}
1170 
1171 	if (error == 0)
1172 		ip6stat.ip6s_fragmented++;
1173 
1174 done:
1175 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1176 		RTFREE(ro->ro_rt);
1177 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1178 		RTFREE(ro_pmtu->ro_rt);
1179 	}
1180 
1181 #ifdef IPSEC
1182 	if (sp != NULL)
1183 		key_freesp(sp);
1184 #endif /* IPSEC */
1185 #ifdef FAST_IPSEC
1186 	if (sp != NULL)
1187 		KEY_FREESP(&sp);
1188 #endif /* FAST_IPSEC */
1189 
1190 	return (error);
1191 
1192 freehdrs:
1193 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1194 	m_freem(exthdrs.ip6e_dest1);
1195 	m_freem(exthdrs.ip6e_rthdr);
1196 	m_freem(exthdrs.ip6e_dest2);
1197 	/* FALLTHROUGH */
1198 bad:
1199 	m_freem(m);
1200 	goto done;
1201 }
1202 
1203 static int
1204 ip6_copyexthdr(mp, hdr, hlen)
1205 	struct mbuf **mp;
1206 	caddr_t hdr;
1207 	int hlen;
1208 {
1209 	struct mbuf *m;
1210 
1211 	if (hlen > MCLBYTES)
1212 		return (ENOBUFS); /* XXX */
1213 
1214 	MGET(m, M_DONTWAIT, MT_DATA);
1215 	if (!m)
1216 		return (ENOBUFS);
1217 
1218 	if (hlen > MLEN) {
1219 		MCLGET(m, M_DONTWAIT);
1220 		if ((m->m_flags & M_EXT) == 0) {
1221 			m_free(m);
1222 			return (ENOBUFS);
1223 		}
1224 	}
1225 	m->m_len = hlen;
1226 	if (hdr)
1227 		bcopy(hdr, mtod(m, caddr_t), hlen);
1228 
1229 	*mp = m;
1230 	return (0);
1231 }
1232 
1233 /*
1234  * Insert jumbo payload option.
1235  */
1236 static int
1237 ip6_insert_jumboopt(exthdrs, plen)
1238 	struct ip6_exthdrs *exthdrs;
1239 	u_int32_t plen;
1240 {
1241 	struct mbuf *mopt;
1242 	u_char *optbuf;
1243 	u_int32_t v;
1244 
1245 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1246 
1247 	/*
1248 	 * If there is no hop-by-hop options header, allocate new one.
1249 	 * If there is one but it doesn't have enough space to store the
1250 	 * jumbo payload option, allocate a cluster to store the whole options.
1251 	 * Otherwise, use it to store the options.
1252 	 */
1253 	if (exthdrs->ip6e_hbh == 0) {
1254 		MGET(mopt, M_DONTWAIT, MT_DATA);
1255 		if (mopt == 0)
1256 			return (ENOBUFS);
1257 		mopt->m_len = JUMBOOPTLEN;
1258 		optbuf = mtod(mopt, u_char *);
1259 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1260 		exthdrs->ip6e_hbh = mopt;
1261 	} else {
1262 		struct ip6_hbh *hbh;
1263 
1264 		mopt = exthdrs->ip6e_hbh;
1265 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1266 			/*
1267 			 * XXX assumption:
1268 			 * - exthdrs->ip6e_hbh is not referenced from places
1269 			 *   other than exthdrs.
1270 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1271 			 */
1272 			int oldoptlen = mopt->m_len;
1273 			struct mbuf *n;
1274 
1275 			/*
1276 			 * XXX: give up if the whole (new) hbh header does
1277 			 * not fit even in an mbuf cluster.
1278 			 */
1279 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1280 				return (ENOBUFS);
1281 
1282 			/*
1283 			 * As a consequence, we must always prepare a cluster
1284 			 * at this point.
1285 			 */
1286 			MGET(n, M_DONTWAIT, MT_DATA);
1287 			if (n) {
1288 				MCLGET(n, M_DONTWAIT);
1289 				if ((n->m_flags & M_EXT) == 0) {
1290 					m_freem(n);
1291 					n = NULL;
1292 				}
1293 			}
1294 			if (!n)
1295 				return (ENOBUFS);
1296 			n->m_len = oldoptlen + JUMBOOPTLEN;
1297 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1298 			    oldoptlen);
1299 			optbuf = mtod(n, caddr_t) + oldoptlen;
1300 			m_freem(mopt);
1301 			mopt = exthdrs->ip6e_hbh = n;
1302 		} else {
1303 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1304 			mopt->m_len += JUMBOOPTLEN;
1305 		}
1306 		optbuf[0] = IP6OPT_PADN;
1307 		optbuf[1] = 1;
1308 
1309 		/*
1310 		 * Adjust the header length according to the pad and
1311 		 * the jumbo payload option.
1312 		 */
1313 		hbh = mtod(mopt, struct ip6_hbh *);
1314 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1315 	}
1316 
1317 	/* fill in the option. */
1318 	optbuf[2] = IP6OPT_JUMBO;
1319 	optbuf[3] = 4;
1320 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1321 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1322 
1323 	/* finally, adjust the packet header length */
1324 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1325 
1326 	return (0);
1327 #undef JUMBOOPTLEN
1328 }
1329 
1330 /*
1331  * Insert fragment header and copy unfragmentable header portions.
1332  */
1333 static int
1334 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1335 	struct mbuf *m0, *m;
1336 	int hlen;
1337 	struct ip6_frag **frghdrp;
1338 {
1339 	struct mbuf *n, *mlast;
1340 
1341 	if (hlen > sizeof(struct ip6_hdr)) {
1342 		n = m_copym(m0, sizeof(struct ip6_hdr),
1343 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1344 		if (n == 0)
1345 			return (ENOBUFS);
1346 		m->m_next = n;
1347 	} else
1348 		n = m;
1349 
1350 	/* Search for the last mbuf of unfragmentable part. */
1351 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1352 		;
1353 
1354 	if ((mlast->m_flags & M_EXT) == 0 &&
1355 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1356 		/* use the trailing space of the last mbuf for the fragment hdr */
1357 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1358 		    mlast->m_len);
1359 		mlast->m_len += sizeof(struct ip6_frag);
1360 		m->m_pkthdr.len += sizeof(struct ip6_frag);
1361 	} else {
1362 		/* allocate a new mbuf for the fragment header */
1363 		struct mbuf *mfrg;
1364 
1365 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1366 		if (mfrg == 0)
1367 			return (ENOBUFS);
1368 		mfrg->m_len = sizeof(struct ip6_frag);
1369 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1370 		mlast->m_next = mfrg;
1371 	}
1372 
1373 	return (0);
1374 }
1375 
1376 static int
1377 ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1378 	struct route_in6 *ro_pmtu, *ro;
1379 	struct ifnet *ifp;
1380 	struct in6_addr *dst;
1381 	u_long *mtup;
1382 	int *alwaysfragp;
1383 {
1384 	u_int32_t mtu = 0;
1385 	int alwaysfrag = 0;
1386 	int error = 0;
1387 
1388 	if (ro_pmtu != ro) {
1389 		/* The first hop and the final destination may differ. */
1390 		struct sockaddr_in6 *sa6_dst =
1391 		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1392 		if (ro_pmtu->ro_rt &&
1393 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1394 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1395 			RTFREE(ro_pmtu->ro_rt);
1396 			ro_pmtu->ro_rt = (struct rtentry *)NULL;
1397 		}
1398 		if (ro_pmtu->ro_rt == NULL) {
1399 			bzero(sa6_dst, sizeof(*sa6_dst));
1400 			sa6_dst->sin6_family = AF_INET6;
1401 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1402 			sa6_dst->sin6_addr = *dst;
1403 
1404 			rtalloc((struct route *)ro_pmtu);
1405 		}
1406 	}
1407 	if (ro_pmtu->ro_rt) {
1408 		u_int32_t ifmtu;
1409 		struct in_conninfo inc;
1410 
1411 		bzero(&inc, sizeof(inc));
1412 		inc.inc_flags = 1; /* IPv6 */
1413 		inc.inc6_faddr = *dst;
1414 
1415 		if (ifp == NULL)
1416 			ifp = ro_pmtu->ro_rt->rt_ifp;
1417 		ifmtu = IN6_LINKMTU(ifp);
1418 		mtu = tcp_hc_getmtu(&inc);
1419 		if (mtu)
1420 			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1421 		else
1422 			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1423 		if (mtu == 0)
1424 			mtu = ifmtu;
1425 		else if (mtu < IPV6_MMTU) {
1426 			/*
1427 			 * RFC2460 section 5, last paragraph:
1428 			 * if we record ICMPv6 too big message with
1429 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1430 			 * or smaller, with framgent header attached.
1431 			 * (fragment header is needed regardless from the
1432 			 * packet size, for translators to identify packets)
1433 			 */
1434 			alwaysfrag = 1;
1435 			mtu = IPV6_MMTU;
1436 		} else if (mtu > ifmtu) {
1437 			/*
1438 			 * The MTU on the route is larger than the MTU on
1439 			 * the interface!  This shouldn't happen, unless the
1440 			 * MTU of the interface has been changed after the
1441 			 * interface was brought up.  Change the MTU in the
1442 			 * route to match the interface MTU (as long as the
1443 			 * field isn't locked).
1444 			 */
1445 			mtu = ifmtu;
1446 			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1447 		}
1448 	} else if (ifp) {
1449 		mtu = IN6_LINKMTU(ifp);
1450 	} else
1451 		error = EHOSTUNREACH; /* XXX */
1452 
1453 	*mtup = mtu;
1454 	if (alwaysfragp)
1455 		*alwaysfragp = alwaysfrag;
1456 	return (error);
1457 }
1458 
1459 /*
1460  * IP6 socket option processing.
1461  */
1462 int
1463 ip6_ctloutput(so, sopt)
1464 	struct socket *so;
1465 	struct sockopt *sopt;
1466 {
1467 	int privileged, optdatalen, uproto;
1468 	void *optdata;
1469 	struct inpcb *in6p = sotoinpcb(so);
1470 	int error, optval;
1471 	int level, op, optname;
1472 	int optlen;
1473 	struct thread *td;
1474 
1475 	if (sopt) {
1476 		level = sopt->sopt_level;
1477 		op = sopt->sopt_dir;
1478 		optname = sopt->sopt_name;
1479 		optlen = sopt->sopt_valsize;
1480 		td = sopt->sopt_td;
1481 	} else {
1482 		panic("ip6_ctloutput: arg soopt is NULL");
1483 	}
1484 	error = optval = 0;
1485 
1486 	privileged = (td == 0 || suser(td)) ? 0 : 1;
1487 	uproto = (int)so->so_proto->pr_protocol;
1488 
1489 	if (level == IPPROTO_IPV6) {
1490 		switch (op) {
1491 
1492 		case SOPT_SET:
1493 			switch (optname) {
1494 			case IPV6_2292PKTOPTIONS:
1495 #ifdef IPV6_PKTOPTIONS
1496 			case IPV6_PKTOPTIONS:
1497 #endif
1498 			{
1499 				struct mbuf *m;
1500 
1501 				error = soopt_getm(sopt, &m); /* XXX */
1502 				if (error != 0)
1503 					break;
1504 				error = soopt_mcopyin(sopt, m); /* XXX */
1505 				if (error != 0)
1506 					break;
1507 				error = ip6_pcbopts(&in6p->in6p_outputopts,
1508 						    m, so, sopt);
1509 				m_freem(m); /* XXX */
1510 				break;
1511 			}
1512 
1513 			/*
1514 			 * Use of some Hop-by-Hop options or some
1515 			 * Destination options, might require special
1516 			 * privilege.  That is, normal applications
1517 			 * (without special privilege) might be forbidden
1518 			 * from setting certain options in outgoing packets,
1519 			 * and might never see certain options in received
1520 			 * packets. [RFC 2292 Section 6]
1521 			 * KAME specific note:
1522 			 *  KAME prevents non-privileged users from sending or
1523 			 *  receiving ANY hbh/dst options in order to avoid
1524 			 *  overhead of parsing options in the kernel.
1525 			 */
1526 			case IPV6_RECVHOPOPTS:
1527 			case IPV6_RECVDSTOPTS:
1528 			case IPV6_RECVRTHDRDSTOPTS:
1529 				if (!privileged) {
1530 					error = EPERM;
1531 					break;
1532 				}
1533 				/* FALLTHROUGH */
1534 			case IPV6_UNICAST_HOPS:
1535 			case IPV6_HOPLIMIT:
1536 			case IPV6_FAITH:
1537 
1538 			case IPV6_RECVPKTINFO:
1539 			case IPV6_RECVHOPLIMIT:
1540 			case IPV6_RECVRTHDR:
1541 			case IPV6_RECVPATHMTU:
1542 			case IPV6_RECVTCLASS:
1543 			case IPV6_V6ONLY:
1544 			case IPV6_AUTOFLOWLABEL:
1545 				if (optlen != sizeof(int)) {
1546 					error = EINVAL;
1547 					break;
1548 				}
1549 				error = sooptcopyin(sopt, &optval,
1550 					sizeof optval, sizeof optval);
1551 				if (error)
1552 					break;
1553 				switch (optname) {
1554 
1555 				case IPV6_UNICAST_HOPS:
1556 					if (optval < -1 || optval >= 256)
1557 						error = EINVAL;
1558 					else {
1559 						/* -1 = kernel default */
1560 						in6p->in6p_hops = optval;
1561 						if ((in6p->in6p_vflag &
1562 						     INP_IPV4) != 0)
1563 							in6p->inp_ip_ttl = optval;
1564 					}
1565 					break;
1566 #define OPTSET(bit) \
1567 do { \
1568 	if (optval) \
1569 		in6p->in6p_flags |= (bit); \
1570 	else \
1571 		in6p->in6p_flags &= ~(bit); \
1572 } while (/*CONSTCOND*/ 0)
1573 #define OPTSET2292(bit) \
1574 do { \
1575 	in6p->in6p_flags |= IN6P_RFC2292; \
1576 	if (optval) \
1577 		in6p->in6p_flags |= (bit); \
1578 	else \
1579 		in6p->in6p_flags &= ~(bit); \
1580 } while (/*CONSTCOND*/ 0)
1581 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1582 
1583 				case IPV6_RECVPKTINFO:
1584 					/* cannot mix with RFC2292 */
1585 					if (OPTBIT(IN6P_RFC2292)) {
1586 						error = EINVAL;
1587 						break;
1588 					}
1589 					OPTSET(IN6P_PKTINFO);
1590 					break;
1591 
1592 				case IPV6_HOPLIMIT:
1593 				{
1594 					struct ip6_pktopts **optp;
1595 
1596 					/* cannot mix with RFC2292 */
1597 					if (OPTBIT(IN6P_RFC2292)) {
1598 						error = EINVAL;
1599 						break;
1600 					}
1601 					optp = &in6p->in6p_outputopts;
1602 					error = ip6_pcbopt(IPV6_HOPLIMIT,
1603 							   (u_char *)&optval,
1604 							   sizeof(optval),
1605 							   optp,
1606 							   privileged, uproto);
1607 					break;
1608 				}
1609 
1610 				case IPV6_RECVHOPLIMIT:
1611 					/* cannot mix with RFC2292 */
1612 					if (OPTBIT(IN6P_RFC2292)) {
1613 						error = EINVAL;
1614 						break;
1615 					}
1616 					OPTSET(IN6P_HOPLIMIT);
1617 					break;
1618 
1619 				case IPV6_RECVHOPOPTS:
1620 					/* cannot mix with RFC2292 */
1621 					if (OPTBIT(IN6P_RFC2292)) {
1622 						error = EINVAL;
1623 						break;
1624 					}
1625 					OPTSET(IN6P_HOPOPTS);
1626 					break;
1627 
1628 				case IPV6_RECVDSTOPTS:
1629 					/* cannot mix with RFC2292 */
1630 					if (OPTBIT(IN6P_RFC2292)) {
1631 						error = EINVAL;
1632 						break;
1633 					}
1634 					OPTSET(IN6P_DSTOPTS);
1635 					break;
1636 
1637 				case IPV6_RECVRTHDRDSTOPTS:
1638 					/* cannot mix with RFC2292 */
1639 					if (OPTBIT(IN6P_RFC2292)) {
1640 						error = EINVAL;
1641 						break;
1642 					}
1643 					OPTSET(IN6P_RTHDRDSTOPTS);
1644 					break;
1645 
1646 				case IPV6_RECVRTHDR:
1647 					/* cannot mix with RFC2292 */
1648 					if (OPTBIT(IN6P_RFC2292)) {
1649 						error = EINVAL;
1650 						break;
1651 					}
1652 					OPTSET(IN6P_RTHDR);
1653 					break;
1654 
1655 				case IPV6_FAITH:
1656 					OPTSET(IN6P_FAITH);
1657 					break;
1658 
1659 				case IPV6_RECVPATHMTU:
1660 					/*
1661 					 * We ignore this option for TCP
1662 					 * sockets.
1663 					 * (rfc2292bis leaves this case
1664 					 * unspecified.)
1665 					 */
1666 					if (uproto != IPPROTO_TCP)
1667 						OPTSET(IN6P_MTU);
1668 					break;
1669 
1670 				case IPV6_V6ONLY:
1671 					/*
1672 					 * make setsockopt(IPV6_V6ONLY)
1673 					 * available only prior to bind(2).
1674 					 * see ipng mailing list, Jun 22 2001.
1675 					 */
1676 					if (in6p->in6p_lport ||
1677 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1678 						error = EINVAL;
1679 						break;
1680 					}
1681 					OPTSET(IN6P_IPV6_V6ONLY);
1682 					if (optval)
1683 						in6p->in6p_vflag &= ~INP_IPV4;
1684 					else
1685 						in6p->in6p_vflag |= INP_IPV4;
1686 					break;
1687 				case IPV6_RECVTCLASS:
1688 					/* cannot mix with RFC2292 XXX */
1689 					if (OPTBIT(IN6P_RFC2292)) {
1690 						error = EINVAL;
1691 						break;
1692 					}
1693 					OPTSET(IN6P_TCLASS);
1694 					break;
1695 				case IPV6_AUTOFLOWLABEL:
1696 					OPTSET(IN6P_AUTOFLOWLABEL);
1697 					break;
1698 
1699 				}
1700 				break;
1701 
1702 			case IPV6_TCLASS:
1703 			case IPV6_DONTFRAG:
1704 			case IPV6_USE_MIN_MTU:
1705 			case IPV6_PREFER_TEMPADDR:
1706 				if (optlen != sizeof(optval)) {
1707 					error = EINVAL;
1708 					break;
1709 				}
1710 				error = sooptcopyin(sopt, &optval,
1711 					sizeof optval, sizeof optval);
1712 				if (error)
1713 					break;
1714 				{
1715 					struct ip6_pktopts **optp;
1716 					optp = &in6p->in6p_outputopts;
1717 					error = ip6_pcbopt(optname,
1718 							   (u_char *)&optval,
1719 							   sizeof(optval),
1720 							   optp,
1721 							   privileged, uproto);
1722 					break;
1723 				}
1724 
1725 			case IPV6_2292PKTINFO:
1726 			case IPV6_2292HOPLIMIT:
1727 			case IPV6_2292HOPOPTS:
1728 			case IPV6_2292DSTOPTS:
1729 			case IPV6_2292RTHDR:
1730 				/* RFC 2292 */
1731 				if (optlen != sizeof(int)) {
1732 					error = EINVAL;
1733 					break;
1734 				}
1735 				error = sooptcopyin(sopt, &optval,
1736 					sizeof optval, sizeof optval);
1737 				if (error)
1738 					break;
1739 				switch (optname) {
1740 				case IPV6_2292PKTINFO:
1741 					OPTSET2292(IN6P_PKTINFO);
1742 					break;
1743 				case IPV6_2292HOPLIMIT:
1744 					OPTSET2292(IN6P_HOPLIMIT);
1745 					break;
1746 				case IPV6_2292HOPOPTS:
1747 					/*
1748 					 * Check super-user privilege.
1749 					 * See comments for IPV6_RECVHOPOPTS.
1750 					 */
1751 					if (!privileged)
1752 						return (EPERM);
1753 					OPTSET2292(IN6P_HOPOPTS);
1754 					break;
1755 				case IPV6_2292DSTOPTS:
1756 					if (!privileged)
1757 						return (EPERM);
1758 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1759 					break;
1760 				case IPV6_2292RTHDR:
1761 					OPTSET2292(IN6P_RTHDR);
1762 					break;
1763 				}
1764 				break;
1765 			case IPV6_PKTINFO:
1766 			case IPV6_HOPOPTS:
1767 			case IPV6_RTHDR:
1768 			case IPV6_DSTOPTS:
1769 			case IPV6_RTHDRDSTOPTS:
1770 			case IPV6_NEXTHOP:
1771 			{
1772 				/* new advanced API (2292bis) */
1773 				u_char *optbuf;
1774 				int optlen;
1775 				struct ip6_pktopts **optp;
1776 
1777 				/* cannot mix with RFC2292 */
1778 				if (OPTBIT(IN6P_RFC2292)) {
1779 					error = EINVAL;
1780 					break;
1781 				}
1782 
1783 				optbuf = sopt->sopt_val;
1784 				optlen = sopt->sopt_valsize;
1785 				optp = &in6p->in6p_outputopts;
1786 				error = ip6_pcbopt(optname,
1787 						   optbuf, optlen,
1788 						   optp, privileged, uproto);
1789 				break;
1790 			}
1791 #undef OPTSET
1792 
1793 			case IPV6_MULTICAST_IF:
1794 			case IPV6_MULTICAST_HOPS:
1795 			case IPV6_MULTICAST_LOOP:
1796 			case IPV6_JOIN_GROUP:
1797 			case IPV6_LEAVE_GROUP:
1798 			    {
1799 				if (sopt->sopt_valsize > MLEN) {
1800 					error = EMSGSIZE;
1801 					break;
1802 				}
1803 				/* XXX */
1804 			    }
1805 			    /* FALLTHROUGH */
1806 			    {
1807 				struct mbuf *m;
1808 
1809 				if (sopt->sopt_valsize > MCLBYTES) {
1810 					error = EMSGSIZE;
1811 					break;
1812 				}
1813 				/* XXX */
1814 				MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_HEADER);
1815 				if (m == 0) {
1816 					error = ENOBUFS;
1817 					break;
1818 				}
1819 				if (sopt->sopt_valsize > MLEN) {
1820 					MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1821 					if ((m->m_flags & M_EXT) == 0) {
1822 						m_free(m);
1823 						error = ENOBUFS;
1824 						break;
1825 					}
1826 				}
1827 				m->m_len = sopt->sopt_valsize;
1828 				error = sooptcopyin(sopt, mtod(m, char *),
1829 						    m->m_len, m->m_len);
1830 				if (error) {
1831 					(void)m_free(m);
1832 					break;
1833 				}
1834 				error =	ip6_setmoptions(sopt->sopt_name,
1835 							&in6p->in6p_moptions,
1836 							m);
1837 				(void)m_free(m);
1838 			    }
1839 				break;
1840 
1841 			case IPV6_PORTRANGE:
1842 				error = sooptcopyin(sopt, &optval,
1843 				    sizeof optval, sizeof optval);
1844 				if (error)
1845 					break;
1846 
1847 				switch (optval) {
1848 				case IPV6_PORTRANGE_DEFAULT:
1849 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1850 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1851 					break;
1852 
1853 				case IPV6_PORTRANGE_HIGH:
1854 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1855 					in6p->in6p_flags |= IN6P_HIGHPORT;
1856 					break;
1857 
1858 				case IPV6_PORTRANGE_LOW:
1859 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1860 					in6p->in6p_flags |= IN6P_LOWPORT;
1861 					break;
1862 
1863 				default:
1864 					error = EINVAL;
1865 					break;
1866 				}
1867 				break;
1868 
1869 #if defined(IPSEC) || defined(FAST_IPSEC)
1870 			case IPV6_IPSEC_POLICY:
1871 			    {
1872 				caddr_t req = NULL;
1873 				size_t len = 0;
1874 				struct mbuf *m;
1875 
1876 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1877 					break;
1878 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1879 					break;
1880 				if (m) {
1881 					req = mtod(m, caddr_t);
1882 					len = m->m_len;
1883 				}
1884 				error = ipsec6_set_policy(in6p, optname, req,
1885 							  len, privileged);
1886 				m_freem(m);
1887 			    }
1888 				break;
1889 #endif /* KAME IPSEC */
1890 
1891 			case IPV6_FW_ADD:
1892 			case IPV6_FW_DEL:
1893 			case IPV6_FW_FLUSH:
1894 			case IPV6_FW_ZERO:
1895 			    {
1896 				struct mbuf *m;
1897 				struct mbuf **mp = &m;
1898 
1899 				if (ip6_fw_ctl_ptr == NULL)
1900 					return EINVAL;
1901 				/* XXX */
1902 				if ((error = soopt_getm(sopt, &m)) != 0)
1903 					break;
1904 				/* XXX */
1905 				if ((error = soopt_mcopyin(sopt, m)) != 0)
1906 					break;
1907 				error = (*ip6_fw_ctl_ptr)(optname, mp);
1908 				m = *mp;
1909 			    }
1910 				break;
1911 
1912 			default:
1913 				error = ENOPROTOOPT;
1914 				break;
1915 			}
1916 			break;
1917 
1918 		case SOPT_GET:
1919 			switch (optname) {
1920 
1921 			case IPV6_2292PKTOPTIONS:
1922 #ifdef IPV6_PKTOPTIONS
1923 			case IPV6_PKTOPTIONS:
1924 #endif
1925 				/*
1926 				 * RFC3542 (effectively) deprecated the
1927 				 * semantics of the 2292-style pktoptions.
1928 				 * Since it was not reliable in nature (i.e.,
1929 				 * applications had to expect the lack of some
1930 				 * information after all), it would make sense
1931 				 * to simplify this part by always returning
1932 				 * empty data.
1933 				 */
1934 				sopt->sopt_valsize = 0;
1935 				break;
1936 
1937 			case IPV6_RECVHOPOPTS:
1938 			case IPV6_RECVDSTOPTS:
1939 			case IPV6_RECVRTHDRDSTOPTS:
1940 			case IPV6_UNICAST_HOPS:
1941 			case IPV6_RECVPKTINFO:
1942 			case IPV6_RECVHOPLIMIT:
1943 			case IPV6_RECVRTHDR:
1944 			case IPV6_RECVPATHMTU:
1945 
1946 			case IPV6_FAITH:
1947 			case IPV6_V6ONLY:
1948 			case IPV6_PORTRANGE:
1949 			case IPV6_RECVTCLASS:
1950 			case IPV6_AUTOFLOWLABEL:
1951 				switch (optname) {
1952 
1953 				case IPV6_RECVHOPOPTS:
1954 					optval = OPTBIT(IN6P_HOPOPTS);
1955 					break;
1956 
1957 				case IPV6_RECVDSTOPTS:
1958 					optval = OPTBIT(IN6P_DSTOPTS);
1959 					break;
1960 
1961 				case IPV6_RECVRTHDRDSTOPTS:
1962 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1963 					break;
1964 
1965 				case IPV6_UNICAST_HOPS:
1966 					optval = in6p->in6p_hops;
1967 					break;
1968 
1969 				case IPV6_RECVPKTINFO:
1970 					optval = OPTBIT(IN6P_PKTINFO);
1971 					break;
1972 
1973 				case IPV6_RECVHOPLIMIT:
1974 					optval = OPTBIT(IN6P_HOPLIMIT);
1975 					break;
1976 
1977 				case IPV6_RECVRTHDR:
1978 					optval = OPTBIT(IN6P_RTHDR);
1979 					break;
1980 
1981 				case IPV6_RECVPATHMTU:
1982 					optval = OPTBIT(IN6P_MTU);
1983 					break;
1984 
1985 				case IPV6_FAITH:
1986 					optval = OPTBIT(IN6P_FAITH);
1987 					break;
1988 
1989 				case IPV6_V6ONLY:
1990 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
1991 					break;
1992 
1993 				case IPV6_PORTRANGE:
1994 				    {
1995 					int flags;
1996 					flags = in6p->in6p_flags;
1997 					if (flags & IN6P_HIGHPORT)
1998 						optval = IPV6_PORTRANGE_HIGH;
1999 					else if (flags & IN6P_LOWPORT)
2000 						optval = IPV6_PORTRANGE_LOW;
2001 					else
2002 						optval = 0;
2003 					break;
2004 				    }
2005 				case IPV6_RECVTCLASS:
2006 					optval = OPTBIT(IN6P_TCLASS);
2007 					break;
2008 
2009 				case IPV6_AUTOFLOWLABEL:
2010 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2011 					break;
2012 				}
2013 				if (error)
2014 					break;
2015 				error = sooptcopyout(sopt, &optval,
2016 					sizeof optval);
2017 				break;
2018 
2019 			case IPV6_PATHMTU:
2020 			{
2021 				u_long pmtu = 0;
2022 				struct ip6_mtuinfo mtuinfo;
2023 				struct route_in6 sro;
2024 
2025 				bzero(&sro, sizeof(sro));
2026 
2027 				if (!(so->so_state & SS_ISCONNECTED))
2028 					return (ENOTCONN);
2029 				/*
2030 				 * XXX: we dot not consider the case of source
2031 				 * routing, or optional information to specify
2032 				 * the outgoing interface.
2033 				 */
2034 				error = ip6_getpmtu(&sro, NULL, NULL,
2035 				    &in6p->in6p_faddr, &pmtu, NULL);
2036 				if (sro.ro_rt)
2037 					RTFREE(sro.ro_rt);
2038 				if (error)
2039 					break;
2040 				if (pmtu > IPV6_MAXPACKET)
2041 					pmtu = IPV6_MAXPACKET;
2042 
2043 				bzero(&mtuinfo, sizeof(mtuinfo));
2044 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2045 				optdata = (void *)&mtuinfo;
2046 				optdatalen = sizeof(mtuinfo);
2047 				error = sooptcopyout(sopt, optdata,
2048 				    optdatalen);
2049 				break;
2050 			}
2051 
2052 			case IPV6_2292PKTINFO:
2053 			case IPV6_2292HOPLIMIT:
2054 			case IPV6_2292HOPOPTS:
2055 			case IPV6_2292RTHDR:
2056 			case IPV6_2292DSTOPTS:
2057 				switch (optname) {
2058 				case IPV6_2292PKTINFO:
2059 					optval = OPTBIT(IN6P_PKTINFO);
2060 					break;
2061 				case IPV6_2292HOPLIMIT:
2062 					optval = OPTBIT(IN6P_HOPLIMIT);
2063 					break;
2064 				case IPV6_2292HOPOPTS:
2065 					optval = OPTBIT(IN6P_HOPOPTS);
2066 					break;
2067 				case IPV6_2292RTHDR:
2068 					optval = OPTBIT(IN6P_RTHDR);
2069 					break;
2070 				case IPV6_2292DSTOPTS:
2071 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2072 					break;
2073 				}
2074 				error = sooptcopyout(sopt, &optval,
2075 				    sizeof optval);
2076 				break;
2077 			case IPV6_PKTINFO:
2078 			case IPV6_HOPOPTS:
2079 			case IPV6_RTHDR:
2080 			case IPV6_DSTOPTS:
2081 			case IPV6_RTHDRDSTOPTS:
2082 			case IPV6_NEXTHOP:
2083 			case IPV6_TCLASS:
2084 			case IPV6_DONTFRAG:
2085 			case IPV6_USE_MIN_MTU:
2086 			case IPV6_PREFER_TEMPADDR:
2087 				error = ip6_getpcbopt(in6p->in6p_outputopts,
2088 				    optname, sopt);
2089 				break;
2090 
2091 			case IPV6_MULTICAST_IF:
2092 			case IPV6_MULTICAST_HOPS:
2093 			case IPV6_MULTICAST_LOOP:
2094 			case IPV6_JOIN_GROUP:
2095 			case IPV6_LEAVE_GROUP:
2096 			    {
2097 				struct mbuf *m;
2098 				error = ip6_getmoptions(sopt->sopt_name,
2099 				    in6p->in6p_moptions, &m);
2100 				if (error == 0)
2101 					error = sooptcopyout(sopt,
2102 					    mtod(m, char *), m->m_len);
2103 				m_freem(m);
2104 			    }
2105 				break;
2106 
2107 #if defined(IPSEC) || defined(FAST_IPSEC)
2108 			case IPV6_IPSEC_POLICY:
2109 			  {
2110 				caddr_t req = NULL;
2111 				size_t len = 0;
2112 				struct mbuf *m = NULL;
2113 				struct mbuf **mp = &m;
2114 				size_t ovalsize = sopt->sopt_valsize;
2115 				caddr_t oval = (caddr_t)sopt->sopt_val;
2116 
2117 				error = soopt_getm(sopt, &m); /* XXX */
2118 				if (error != 0)
2119 					break;
2120 				error = soopt_mcopyin(sopt, m); /* XXX */
2121 				if (error != 0)
2122 					break;
2123 				sopt->sopt_valsize = ovalsize;
2124 				sopt->sopt_val = oval;
2125 				if (m) {
2126 					req = mtod(m, caddr_t);
2127 					len = m->m_len;
2128 				}
2129 				error = ipsec6_get_policy(in6p, req, len, mp);
2130 				if (error == 0)
2131 					error = soopt_mcopyout(sopt, m); /* XXX */
2132 				if (error == 0 && m)
2133 					m_freem(m);
2134 				break;
2135 			  }
2136 #endif /* KAME IPSEC */
2137 
2138 			case IPV6_FW_GET:
2139 			  {
2140 				struct mbuf *m;
2141 				struct mbuf **mp = &m;
2142 
2143 				if (ip6_fw_ctl_ptr == NULL)
2144 			        {
2145 					return EINVAL;
2146 				}
2147 				error = (*ip6_fw_ctl_ptr)(optname, mp);
2148 				if (error == 0)
2149 					error = soopt_mcopyout(sopt, m); /* XXX */
2150 				if (error == 0 && m)
2151 					m_freem(m);
2152 			  }
2153 				break;
2154 
2155 			default:
2156 				error = ENOPROTOOPT;
2157 				break;
2158 			}
2159 			break;
2160 		}
2161 	} else {		/* level != IPPROTO_IPV6 */
2162 		error = EINVAL;
2163 	}
2164 	return (error);
2165 }
2166 
2167 int
2168 ip6_raw_ctloutput(so, sopt)
2169 	struct socket *so;
2170 	struct sockopt *sopt;
2171 {
2172 	int error = 0, optval, optlen;
2173 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2174 	struct in6pcb *in6p = sotoin6pcb(so);
2175 	int level, op, optname;
2176 
2177 	if (sopt) {
2178 		level = sopt->sopt_level;
2179 		op = sopt->sopt_dir;
2180 		optname = sopt->sopt_name;
2181 		optlen = sopt->sopt_valsize;
2182 	} else
2183 		panic("ip6_raw_ctloutput: arg soopt is NULL");
2184 
2185 	if (level != IPPROTO_IPV6) {
2186 		return (EINVAL);
2187 	}
2188 
2189 	switch (optname) {
2190 	case IPV6_CHECKSUM:
2191 		/*
2192 		 * For ICMPv6 sockets, no modification allowed for checksum
2193 		 * offset, permit "no change" values to help existing apps.
2194 		 *
2195 		 * XXX 2292bis says: "An attempt to set IPV6_CHECKSUM
2196 		 * for an ICMPv6 socket will fail."
2197 		 * The current behavior does not meet 2292bis.
2198 		 */
2199 		switch (op) {
2200 		case SOPT_SET:
2201 			if (optlen != sizeof(int)) {
2202 				error = EINVAL;
2203 				break;
2204 			}
2205 			error = sooptcopyin(sopt, &optval, sizeof(optval),
2206 					    sizeof(optval));
2207 			if (error)
2208 				break;
2209 			if ((optval % 2) != 0) {
2210 				/* the API assumes even offset values */
2211 				error = EINVAL;
2212 			} else if (so->so_proto->pr_protocol ==
2213 			    IPPROTO_ICMPV6) {
2214 				if (optval != icmp6off)
2215 					error = EINVAL;
2216 			} else
2217 				in6p->in6p_cksum = optval;
2218 			break;
2219 
2220 		case SOPT_GET:
2221 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2222 				optval = icmp6off;
2223 			else
2224 				optval = in6p->in6p_cksum;
2225 
2226 			error = sooptcopyout(sopt, &optval, sizeof(optval));
2227 			break;
2228 
2229 		default:
2230 			error = EINVAL;
2231 			break;
2232 		}
2233 		break;
2234 
2235 	default:
2236 		error = ENOPROTOOPT;
2237 		break;
2238 	}
2239 
2240 	return (error);
2241 }
2242 
2243 /*
2244  * Set up IP6 options in pcb for insertion in output packets or
2245  * specifying behavior of outgoing packets.
2246  */
2247 static int
2248 ip6_pcbopts(pktopt, m, so, sopt)
2249 	struct ip6_pktopts **pktopt;
2250 	struct mbuf *m;
2251 	struct socket *so;
2252 	struct sockopt *sopt;
2253 {
2254 	struct ip6_pktopts *opt = *pktopt;
2255 	int error = 0;
2256 	struct thread *td = sopt->sopt_td;
2257 	int priv = 0;
2258 
2259 	/* turn off any old options. */
2260 	if (opt) {
2261 #ifdef DIAGNOSTIC
2262 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2263 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2264 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2265 			printf("ip6_pcbopts: all specified options are cleared.\n");
2266 #endif
2267 		ip6_clearpktopts(opt, -1);
2268 	} else
2269 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2270 	*pktopt = NULL;
2271 
2272 	if (!m || m->m_len == 0) {
2273 		/*
2274 		 * Only turning off any previous options, regardless of
2275 		 * whether the opt is just created or given.
2276 		 */
2277 		free(opt, M_IP6OPT);
2278 		return (0);
2279 	}
2280 
2281 	/*  set options specified by user. */
2282 	if (td && !suser(td))
2283 		priv = 1;
2284 	if ((error = ip6_setpktoptions(m, opt, NULL, priv, 1,
2285 	    so->so_proto->pr_protocol)) != 0) {
2286 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2287 		free(opt, M_IP6OPT);
2288 		return (error);
2289 	}
2290 	*pktopt = opt;
2291 	return (0);
2292 }
2293 
2294 /*
2295  * initialize ip6_pktopts.  beware that there are non-zero default values in
2296  * the struct.
2297  */
2298 void
2299 init_ip6pktopts(opt)
2300 	struct ip6_pktopts *opt;
2301 {
2302 
2303 	bzero(opt, sizeof(*opt));
2304 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2305 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2306 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2307 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2308 }
2309 
2310 static int
2311 ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
2312 	int optname, len, priv;
2313 	u_char *buf;
2314 	struct ip6_pktopts **pktopt;
2315 	int uproto;
2316 {
2317 	struct ip6_pktopts *opt;
2318 
2319 	if (*pktopt == NULL) {
2320 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2321 		    M_WAITOK);
2322 		init_ip6pktopts(*pktopt);
2323 		(*pktopt)->needfree = 1;
2324 	}
2325 	opt = *pktopt;
2326 
2327 	return (ip6_setpktoption(optname, buf, len, opt, priv, 1, 0, uproto));
2328 }
2329 
2330 static int
2331 ip6_getpcbopt(pktopt, optname, sopt)
2332 	struct ip6_pktopts *pktopt;
2333 	struct sockopt *sopt;
2334 	int optname;
2335 {
2336 	void *optdata = NULL;
2337 	int optdatalen = 0;
2338 	struct ip6_ext *ip6e;
2339 	int error = 0;
2340 	struct in6_pktinfo null_pktinfo;
2341 	int deftclass = 0, on;
2342 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2343 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2344 
2345 	switch (optname) {
2346 	case IPV6_PKTINFO:
2347 		if (pktopt && pktopt->ip6po_pktinfo)
2348 			optdata = (void *)pktopt->ip6po_pktinfo;
2349 		else {
2350 			/* XXX: we don't have to do this every time... */
2351 			bzero(&null_pktinfo, sizeof(null_pktinfo));
2352 			optdata = (void *)&null_pktinfo;
2353 		}
2354 		optdatalen = sizeof(struct in6_pktinfo);
2355 		break;
2356 	case IPV6_TCLASS:
2357 		if (pktopt && pktopt->ip6po_tclass >= 0)
2358 			optdata = (void *)&pktopt->ip6po_tclass;
2359 		else
2360 			optdata = (void *)&deftclass;
2361 		optdatalen = sizeof(int);
2362 		break;
2363 	case IPV6_HOPOPTS:
2364 		if (pktopt && pktopt->ip6po_hbh) {
2365 			optdata = (void *)pktopt->ip6po_hbh;
2366 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2367 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2368 		}
2369 		break;
2370 	case IPV6_RTHDR:
2371 		if (pktopt && pktopt->ip6po_rthdr) {
2372 			optdata = (void *)pktopt->ip6po_rthdr;
2373 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2374 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2375 		}
2376 		break;
2377 	case IPV6_RTHDRDSTOPTS:
2378 		if (pktopt && pktopt->ip6po_dest1) {
2379 			optdata = (void *)pktopt->ip6po_dest1;
2380 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2381 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2382 		}
2383 		break;
2384 	case IPV6_DSTOPTS:
2385 		if (pktopt && pktopt->ip6po_dest2) {
2386 			optdata = (void *)pktopt->ip6po_dest2;
2387 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2388 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2389 		}
2390 		break;
2391 	case IPV6_NEXTHOP:
2392 		if (pktopt && pktopt->ip6po_nexthop) {
2393 			optdata = (void *)pktopt->ip6po_nexthop;
2394 			optdatalen = pktopt->ip6po_nexthop->sa_len;
2395 		}
2396 		break;
2397 	case IPV6_USE_MIN_MTU:
2398 		if (pktopt)
2399 			optdata = (void *)&pktopt->ip6po_minmtu;
2400 		else
2401 			optdata = (void *)&defminmtu;
2402 		optdatalen = sizeof(int);
2403 		break;
2404 	case IPV6_DONTFRAG:
2405 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2406 			on = 1;
2407 		else
2408 			on = 0;
2409 		optdata = (void *)&on;
2410 		optdatalen = sizeof(on);
2411 		break;
2412 	case IPV6_PREFER_TEMPADDR:
2413 		if (pktopt)
2414 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2415 		else
2416 			optdata = (void *)&defpreftemp;
2417 		optdatalen = sizeof(int);
2418 		break;
2419 	default:		/* should not happen */
2420 #ifdef DIAGNOSTIC
2421 		panic("ip6_getpcbopt: unexpected option\n");
2422 #endif
2423 		return (ENOPROTOOPT);
2424 	}
2425 
2426 	error = sooptcopyout(sopt, optdata, optdatalen);
2427 
2428 	return (error);
2429 }
2430 
2431 void
2432 ip6_clearpktopts(pktopt, optname)
2433 	struct ip6_pktopts *pktopt;
2434 	int optname;
2435 {
2436 	int needfree;
2437 
2438 	if (pktopt == NULL)
2439 		return;
2440 
2441 	needfree = pktopt->needfree;
2442 
2443 	if (optname == -1 || optname == IPV6_PKTINFO) {
2444 		if (needfree && pktopt->ip6po_pktinfo)
2445 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2446 		pktopt->ip6po_pktinfo = NULL;
2447 	}
2448 	if (optname == -1 || optname == IPV6_HOPLIMIT)
2449 		pktopt->ip6po_hlim = -1;
2450 	if (optname == -1 || optname == IPV6_TCLASS)
2451 		pktopt->ip6po_tclass = -1;
2452 	if (optname == -1 || optname == IPV6_NEXTHOP) {
2453 		if (pktopt->ip6po_nextroute.ro_rt) {
2454 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
2455 			pktopt->ip6po_nextroute.ro_rt = NULL;
2456 		}
2457 		if (needfree && pktopt->ip6po_nexthop)
2458 			free(pktopt->ip6po_nexthop, M_IP6OPT);
2459 		pktopt->ip6po_nexthop = NULL;
2460 	}
2461 	if (optname == -1 || optname == IPV6_HOPOPTS) {
2462 		if (needfree && pktopt->ip6po_hbh)
2463 			free(pktopt->ip6po_hbh, M_IP6OPT);
2464 		pktopt->ip6po_hbh = NULL;
2465 	}
2466 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2467 		if (needfree && pktopt->ip6po_dest1)
2468 			free(pktopt->ip6po_dest1, M_IP6OPT);
2469 		pktopt->ip6po_dest1 = NULL;
2470 	}
2471 	if (optname == -1 || optname == IPV6_RTHDR) {
2472 		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2473 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2474 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2475 		if (pktopt->ip6po_route.ro_rt) {
2476 			RTFREE(pktopt->ip6po_route.ro_rt);
2477 			pktopt->ip6po_route.ro_rt = NULL;
2478 		}
2479 	}
2480 	if (optname == -1 || optname == IPV6_DSTOPTS) {
2481 		if (needfree && pktopt->ip6po_dest2)
2482 			free(pktopt->ip6po_dest2, M_IP6OPT);
2483 		pktopt->ip6po_dest2 = NULL;
2484 	}
2485 }
2486 
2487 #define PKTOPT_EXTHDRCPY(type) \
2488 do {\
2489 	if (src->type) {\
2490 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2491 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
2492 		if (dst->type == NULL && canwait == M_NOWAIT)\
2493 			goto bad;\
2494 		bcopy(src->type, dst->type, hlen);\
2495 	}\
2496 } while (/*CONSTCOND*/ 0)
2497 
2498 struct ip6_pktopts *
2499 ip6_copypktopts(src, canwait)
2500 	struct ip6_pktopts *src;
2501 	int canwait;
2502 {
2503 	struct ip6_pktopts *dst;
2504 
2505 	if (src == NULL) {
2506 		printf("ip6_clearpktopts: invalid argument\n");
2507 		return (NULL);
2508 	}
2509 
2510 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2511 	if (dst == NULL && canwait == M_NOWAIT)
2512 		return (NULL);
2513 	bzero(dst, sizeof(*dst));
2514 	dst->needfree = 1;
2515 
2516 	dst->ip6po_hlim = src->ip6po_hlim;
2517 	dst->ip6po_tclass = src->ip6po_tclass;
2518 	dst->ip6po_flags = src->ip6po_flags;
2519 	if (src->ip6po_pktinfo) {
2520 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2521 		    M_IP6OPT, canwait);
2522 		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2523 			goto bad;
2524 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2525 	}
2526 	if (src->ip6po_nexthop) {
2527 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2528 		    M_IP6OPT, canwait);
2529 		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2530 			goto bad;
2531 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2532 		    src->ip6po_nexthop->sa_len);
2533 	}
2534 	PKTOPT_EXTHDRCPY(ip6po_hbh);
2535 	PKTOPT_EXTHDRCPY(ip6po_dest1);
2536 	PKTOPT_EXTHDRCPY(ip6po_dest2);
2537 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2538 	return (dst);
2539 
2540   bad:
2541 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2542 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2543 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2544 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2545 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2546 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2547 	free(dst, M_IP6OPT);
2548 	return (NULL);
2549 }
2550 #undef PKTOPT_EXTHDRCPY
2551 
2552 void
2553 ip6_freepcbopts(pktopt)
2554 	struct ip6_pktopts *pktopt;
2555 {
2556 	if (pktopt == NULL)
2557 		return;
2558 
2559 	ip6_clearpktopts(pktopt, -1);
2560 
2561 	free(pktopt, M_IP6OPT);
2562 }
2563 
2564 /*
2565  * Set the IP6 multicast options in response to user setsockopt().
2566  */
2567 static int
2568 ip6_setmoptions(optname, im6op, m)
2569 	int optname;
2570 	struct ip6_moptions **im6op;
2571 	struct mbuf *m;
2572 {
2573 	int error = 0;
2574 	u_int loop, ifindex;
2575 	struct ipv6_mreq *mreq;
2576 	struct ifnet *ifp;
2577 	struct ip6_moptions *im6o = *im6op;
2578 	struct route_in6 ro;
2579 	struct sockaddr_in6 *dst;
2580 	struct in6_multi_mship *imm;
2581 	struct thread *td = curthread;
2582 
2583 	if (im6o == NULL) {
2584 		/*
2585 		 * No multicast option buffer attached to the pcb;
2586 		 * allocate one and initialize to default values.
2587 		 */
2588 		im6o = (struct ip6_moptions *)
2589 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2590 
2591 		if (im6o == NULL)
2592 			return (ENOBUFS);
2593 		*im6op = im6o;
2594 		im6o->im6o_multicast_ifp = NULL;
2595 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2596 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2597 		LIST_INIT(&im6o->im6o_memberships);
2598 	}
2599 
2600 	switch (optname) {
2601 
2602 	case IPV6_MULTICAST_IF:
2603 		/*
2604 		 * Select the interface for outgoing multicast packets.
2605 		 */
2606 		if (m == NULL || m->m_len != sizeof(u_int)) {
2607 			error = EINVAL;
2608 			break;
2609 		}
2610 		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2611 		if (ifindex < 0 || if_index < ifindex) {
2612 			error = ENXIO;	/* XXX EINVAL? */
2613 			break;
2614 		}
2615 		ifp = ifnet_byindex(ifindex);
2616 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2617 			error = EADDRNOTAVAIL;
2618 			break;
2619 		}
2620 		im6o->im6o_multicast_ifp = ifp;
2621 		break;
2622 
2623 	case IPV6_MULTICAST_HOPS:
2624 	    {
2625 		/*
2626 		 * Set the IP6 hoplimit for outgoing multicast packets.
2627 		 */
2628 		int optval;
2629 		if (m == NULL || m->m_len != sizeof(int)) {
2630 			error = EINVAL;
2631 			break;
2632 		}
2633 		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2634 		if (optval < -1 || optval >= 256)
2635 			error = EINVAL;
2636 		else if (optval == -1)
2637 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2638 		else
2639 			im6o->im6o_multicast_hlim = optval;
2640 		break;
2641 	    }
2642 
2643 	case IPV6_MULTICAST_LOOP:
2644 		/*
2645 		 * Set the loopback flag for outgoing multicast packets.
2646 		 * Must be zero or one.
2647 		 */
2648 		if (m == NULL || m->m_len != sizeof(u_int)) {
2649 			error = EINVAL;
2650 			break;
2651 		}
2652 		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2653 		if (loop > 1) {
2654 			error = EINVAL;
2655 			break;
2656 		}
2657 		im6o->im6o_multicast_loop = loop;
2658 		break;
2659 
2660 	case IPV6_JOIN_GROUP:
2661 		/*
2662 		 * Add a multicast group membership.
2663 		 * Group must be a valid IP6 multicast address.
2664 		 */
2665 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2666 			error = EINVAL;
2667 			break;
2668 		}
2669 		mreq = mtod(m, struct ipv6_mreq *);
2670 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2671 			/*
2672 			 * We use the unspecified address to specify to accept
2673 			 * all multicast addresses. Only super user is allowed
2674 			 * to do this.
2675 			 */
2676 			if (suser(td)) {
2677 				error = EACCES;
2678 				break;
2679 			}
2680 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2681 			error = EINVAL;
2682 			break;
2683 		}
2684 
2685 		/*
2686 		 * If the interface is specified, validate it.
2687 		 */
2688 		if (mreq->ipv6mr_interface < 0 ||
2689 		    if_index < mreq->ipv6mr_interface) {
2690 			error = ENXIO;	/* XXX EINVAL? */
2691 			break;
2692 		}
2693 		/*
2694 		 * If no interface was explicitly specified, choose an
2695 		 * appropriate one according to the given multicast address.
2696 		 */
2697 		if (mreq->ipv6mr_interface == 0) {
2698 			/*
2699 			 * If the multicast address is in node-local scope,
2700 			 * the interface should be a loopback interface.
2701 			 * Otherwise, look up the routing table for the
2702 			 * address, and choose the outgoing interface.
2703 			 *   XXX: is it a good approach?
2704 			 */
2705 			if (IN6_IS_ADDR_MC_INTFACELOCAL(&mreq->ipv6mr_multiaddr)) {
2706 				ifp = &loif[0];
2707 			} else {
2708 				ro.ro_rt = NULL;
2709 				dst = (struct sockaddr_in6 *)&ro.ro_dst;
2710 				bzero(dst, sizeof(*dst));
2711 				dst->sin6_len = sizeof(struct sockaddr_in6);
2712 				dst->sin6_family = AF_INET6;
2713 				dst->sin6_addr = mreq->ipv6mr_multiaddr;
2714 				rtalloc((struct route *)&ro);
2715 				if (ro.ro_rt == NULL) {
2716 					error = EADDRNOTAVAIL;
2717 					break;
2718 				}
2719 				ifp = ro.ro_rt->rt_ifp;
2720 				RTFREE(ro.ro_rt);
2721 			}
2722 		} else
2723 			ifp = ifnet_byindex(mreq->ipv6mr_interface);
2724 
2725 		/*
2726 		 * See if we found an interface, and confirm that it
2727 		 * supports multicast
2728 		 */
2729 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2730 			error = EADDRNOTAVAIL;
2731 			break;
2732 		}
2733 		/*
2734 		 * Put interface index into the multicast address,
2735 		 * if the address has link-local scope.
2736 		 */
2737 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2738 			mreq->ipv6mr_multiaddr.s6_addr16[1] =
2739 			    htons(ifp->if_index);
2740 		}
2741 		/*
2742 		 * See if the membership already exists.
2743 		 */
2744 		for (imm = im6o->im6o_memberships.lh_first;
2745 		     imm != NULL; imm = imm->i6mm_chain.le_next)
2746 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2747 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2748 					       &mreq->ipv6mr_multiaddr))
2749 				break;
2750 		if (imm != NULL) {
2751 			error = EADDRINUSE;
2752 			break;
2753 		}
2754 		/*
2755 		 * Everything looks good; add a new record to the multicast
2756 		 * address list for the given interface.
2757 		 */
2758 		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2759 		if (imm == NULL) {
2760 			error = ENOBUFS;
2761 			break;
2762 		}
2763 		if ((imm->i6mm_maddr =
2764 		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2765 			free(imm, M_IPMADDR);
2766 			break;
2767 		}
2768 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2769 		break;
2770 
2771 	case IPV6_LEAVE_GROUP:
2772 		/*
2773 		 * Drop a multicast group membership.
2774 		 * Group must be a valid IP6 multicast address.
2775 		 */
2776 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2777 			error = EINVAL;
2778 			break;
2779 		}
2780 		mreq = mtod(m, struct ipv6_mreq *);
2781 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2782 			if (suser(td)) {
2783 				error = EACCES;
2784 				break;
2785 			}
2786 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2787 			error = EINVAL;
2788 			break;
2789 		}
2790 		/*
2791 		 * If an interface address was specified, get a pointer
2792 		 * to its ifnet structure.
2793 		 */
2794 		if (mreq->ipv6mr_interface < 0
2795 		 || if_index < mreq->ipv6mr_interface) {
2796 			error = ENXIO;	/* XXX EINVAL? */
2797 			break;
2798 		}
2799 		ifp = ifnet_byindex(mreq->ipv6mr_interface);
2800 		/*
2801 		 * Put interface index into the multicast address,
2802 		 * if the address has link-local scope.
2803 		 */
2804 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2805 			mreq->ipv6mr_multiaddr.s6_addr16[1]
2806 				= htons(mreq->ipv6mr_interface);
2807 		}
2808 
2809 		/*
2810 		 * Find the membership in the membership list.
2811 		 */
2812 		for (imm = im6o->im6o_memberships.lh_first;
2813 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
2814 			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2815 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2816 			    &mreq->ipv6mr_multiaddr))
2817 				break;
2818 		}
2819 		if (imm == NULL) {
2820 			/* Unable to resolve interface */
2821 			error = EADDRNOTAVAIL;
2822 			break;
2823 		}
2824 		/*
2825 		 * Give up the multicast address record to which the
2826 		 * membership points.
2827 		 */
2828 		LIST_REMOVE(imm, i6mm_chain);
2829 		in6_delmulti(imm->i6mm_maddr);
2830 		free(imm, M_IPMADDR);
2831 		break;
2832 
2833 	default:
2834 		error = EOPNOTSUPP;
2835 		break;
2836 	}
2837 
2838 	/*
2839 	 * If all options have default values, no need to keep the mbuf.
2840 	 */
2841 	if (im6o->im6o_multicast_ifp == NULL &&
2842 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2843 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2844 	    im6o->im6o_memberships.lh_first == NULL) {
2845 		free(*im6op, M_IPMOPTS);
2846 		*im6op = NULL;
2847 	}
2848 
2849 	return (error);
2850 }
2851 
2852 /*
2853  * Return the IP6 multicast options in response to user getsockopt().
2854  */
2855 static int
2856 ip6_getmoptions(optname, im6o, mp)
2857 	int optname;
2858 	struct ip6_moptions *im6o;
2859 	struct mbuf **mp;
2860 {
2861 	u_int *hlim, *loop, *ifindex;
2862 
2863 	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
2864 
2865 	switch (optname) {
2866 
2867 	case IPV6_MULTICAST_IF:
2868 		ifindex = mtod(*mp, u_int *);
2869 		(*mp)->m_len = sizeof(u_int);
2870 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2871 			*ifindex = 0;
2872 		else
2873 			*ifindex = im6o->im6o_multicast_ifp->if_index;
2874 		return (0);
2875 
2876 	case IPV6_MULTICAST_HOPS:
2877 		hlim = mtod(*mp, u_int *);
2878 		(*mp)->m_len = sizeof(u_int);
2879 		if (im6o == NULL)
2880 			*hlim = ip6_defmcasthlim;
2881 		else
2882 			*hlim = im6o->im6o_multicast_hlim;
2883 		return (0);
2884 
2885 	case IPV6_MULTICAST_LOOP:
2886 		loop = mtod(*mp, u_int *);
2887 		(*mp)->m_len = sizeof(u_int);
2888 		if (im6o == NULL)
2889 			*loop = ip6_defmcasthlim;
2890 		else
2891 			*loop = im6o->im6o_multicast_loop;
2892 		return (0);
2893 
2894 	default:
2895 		return (EOPNOTSUPP);
2896 	}
2897 }
2898 
2899 /*
2900  * Discard the IP6 multicast options.
2901  */
2902 void
2903 ip6_freemoptions(im6o)
2904 	struct ip6_moptions *im6o;
2905 {
2906 	struct in6_multi_mship *imm;
2907 
2908 	if (im6o == NULL)
2909 		return;
2910 
2911 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2912 		LIST_REMOVE(imm, i6mm_chain);
2913 		if (imm->i6mm_maddr)
2914 			in6_delmulti(imm->i6mm_maddr);
2915 		free(imm, M_IPMADDR);
2916 	}
2917 	free(im6o, M_IPMOPTS);
2918 }
2919 
2920 /*
2921  * Set IPv6 outgoing packet options based on advanced API.
2922  */
2923 int
2924 ip6_setpktoptions(control, opt, stickyopt, priv, needcopy, uproto)
2925 	struct mbuf *control;
2926 	struct ip6_pktopts *opt, *stickyopt;
2927 	int priv, needcopy, uproto;
2928 {
2929 	struct cmsghdr *cm = 0;
2930 
2931 	if (control == 0 || opt == 0)
2932 		return (EINVAL);
2933 
2934 	if (stickyopt) {
2935 		/*
2936 		 * If stickyopt is provided, make a local copy of the options
2937 		 * for this particular packet, then override them by ancillary
2938 		 * objects.
2939 		 * XXX: need to gain a reference for the cached route of the
2940 		 * next hop in case of the overriding.
2941 		 */
2942 		*opt = *stickyopt;
2943 		if (opt->ip6po_nextroute.ro_rt) {
2944 			RT_LOCK(opt->ip6po_nextroute.ro_rt);
2945 			RT_ADDREF(opt->ip6po_nextroute.ro_rt);
2946 			RT_UNLOCK(opt->ip6po_nextroute.ro_rt);
2947 		}
2948 	} else
2949 		init_ip6pktopts(opt);
2950 	opt->needfree = needcopy;
2951 
2952 	/*
2953 	 * XXX: Currently, we assume all the optional information is stored
2954 	 * in a single mbuf.
2955 	 */
2956 	if (control->m_next)
2957 		return (EINVAL);
2958 
2959 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2960 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2961 		int error;
2962 
2963 		if (control->m_len < CMSG_LEN(0))
2964 			return (EINVAL);
2965 
2966 		cm = mtod(control, struct cmsghdr *);
2967 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2968 			return (EINVAL);
2969 		if (cm->cmsg_level != IPPROTO_IPV6)
2970 			continue;
2971 
2972 		error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
2973 		    cm->cmsg_len - CMSG_LEN(0), opt, priv, needcopy, 1, uproto);
2974 		if (error)
2975 			return (error);
2976 	}
2977 
2978 	return (0);
2979 }
2980 
2981 /*
2982  * Set a particular packet option, as a sticky option or an ancillary data
2983  * item.  "len" can be 0 only when it's a sticky option.
2984  * We have 4 cases of combination of "sticky" and "cmsg":
2985  * "sticky=0, cmsg=0": impossible
2986  * "sticky=0, cmsg=1": RFC2292 or rfc2292bis ancillary data
2987  * "sticky=1, cmsg=0": rfc2292bis socket option
2988  * "sticky=1, cmsg=1": RFC2292 socket option
2989  */
2990 static int
2991 ip6_setpktoption(optname, buf, len, opt, priv, sticky, cmsg, uproto)
2992 	int optname, len, priv, sticky, cmsg, uproto;
2993 	u_char *buf;
2994 	struct ip6_pktopts *opt;
2995 {
2996 	int minmtupolicy, preftemp;
2997 
2998 	if (!sticky && !cmsg) {
2999 #ifdef DIAGNOSTIC
3000 		printf("ip6_setpktoption: impossible case\n");
3001 #endif
3002 		return (EINVAL);
3003 	}
3004 
3005 	/*
3006 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3007 	 * not be specified in the context of rfc2292bis.  Conversely,
3008 	 * rfc2292bis types should not be specified in the context of RFC2292.
3009 	 */
3010 	if (!cmsg) {
3011 		switch (optname) {
3012 		case IPV6_2292PKTINFO:
3013 		case IPV6_2292HOPLIMIT:
3014 		case IPV6_2292NEXTHOP:
3015 		case IPV6_2292HOPOPTS:
3016 		case IPV6_2292DSTOPTS:
3017 		case IPV6_2292RTHDR:
3018 		case IPV6_2292PKTOPTIONS:
3019 			return (ENOPROTOOPT);
3020 		}
3021 	}
3022 	if (sticky && cmsg) {
3023 		switch (optname) {
3024 		case IPV6_PKTINFO:
3025 		case IPV6_HOPLIMIT:
3026 		case IPV6_NEXTHOP:
3027 		case IPV6_HOPOPTS:
3028 		case IPV6_DSTOPTS:
3029 		case IPV6_RTHDRDSTOPTS:
3030 		case IPV6_RTHDR:
3031 		case IPV6_USE_MIN_MTU:
3032 		case IPV6_DONTFRAG:
3033 		case IPV6_TCLASS:
3034 		case IPV6_PREFER_TEMPADDR: /* XXX: not an rfc2292bis option */
3035 			return (ENOPROTOOPT);
3036 		}
3037 	}
3038 
3039 	switch (optname) {
3040 	case IPV6_2292PKTINFO:
3041 	case IPV6_PKTINFO:
3042 	{
3043 		struct ifnet *ifp = NULL;
3044 		struct in6_pktinfo *pktinfo;
3045 
3046 		if (len != sizeof(struct in6_pktinfo))
3047 			return (EINVAL);
3048 
3049 		pktinfo = (struct in6_pktinfo *)buf;
3050 
3051 		/*
3052 		 * An application can clear any sticky IPV6_PKTINFO option by
3053 		 * doing a "regular" setsockopt with ipi6_addr being
3054 		 * in6addr_any and ipi6_ifindex being zero.
3055 		 * [RFC 3542, Section 6]
3056 		 */
3057 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3058 		    pktinfo->ipi6_ifindex == 0 &&
3059 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3060 			ip6_clearpktopts(opt, optname);
3061 			break;
3062 		}
3063 
3064 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3065 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3066 			return (EINVAL);
3067 		}
3068 
3069 		/* validate the interface index if specified. */
3070 		if (pktinfo->ipi6_ifindex > if_index ||
3071 		    pktinfo->ipi6_ifindex < 0) {
3072 			 return (ENXIO);
3073 		}
3074 		if (pktinfo->ipi6_ifindex) {
3075 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
3076 			if (ifp == NULL)
3077 				return (ENXIO);
3078 		}
3079 
3080 		/*
3081 		 * We store the address anyway, and let in6_selectsrc()
3082 		 * validate the specified address.  This is because ipi6_addr
3083 		 * may not have enough information about its scope zone, and
3084 		 * we may need additional information (such as outgoing
3085 		 * interface or the scope zone of a destination address) to
3086 		 * disambiguate the scope.
3087 		 * XXX: the delay of the validation may confuse the
3088 		 * application when it is used as a sticky option.
3089 		 */
3090 		if (sticky) {
3091 			if (opt->ip6po_pktinfo == NULL) {
3092 				opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3093 				    M_IP6OPT, M_WAITOK);
3094 			}
3095 			bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3096 		} else
3097 			opt->ip6po_pktinfo = pktinfo;
3098 		break;
3099 	}
3100 
3101 	case IPV6_2292HOPLIMIT:
3102 	case IPV6_HOPLIMIT:
3103 	{
3104 		int *hlimp;
3105 
3106 		/*
3107 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3108 		 * to simplify the ordering among hoplimit options.
3109 		 */
3110 		if (optname == IPV6_HOPLIMIT && sticky)
3111 			return (ENOPROTOOPT);
3112 
3113 		if (len != sizeof(int))
3114 			return (EINVAL);
3115 		hlimp = (int *)buf;
3116 		if (*hlimp < -1 || *hlimp > 255)
3117 			return (EINVAL);
3118 
3119 		opt->ip6po_hlim = *hlimp;
3120 		break;
3121 	}
3122 
3123 	case IPV6_TCLASS:
3124 	{
3125 		int tclass;
3126 
3127 		if (len != sizeof(int))
3128 			return (EINVAL);
3129 		tclass = *(int *)buf;
3130 		if (tclass < -1 || tclass > 255)
3131 			return (EINVAL);
3132 
3133 		opt->ip6po_tclass = tclass;
3134 		break;
3135 	}
3136 
3137 	case IPV6_2292NEXTHOP:
3138 	case IPV6_NEXTHOP:
3139 		if (!priv)
3140 			return (EPERM);
3141 
3142 		if (len == 0) {	/* just remove the option */
3143 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3144 			break;
3145 		}
3146 
3147 		/* check if cmsg_len is large enough for sa_len */
3148 		if (len < sizeof(struct sockaddr) || len < *buf)
3149 			return (EINVAL);
3150 
3151 		switch (((struct sockaddr *)buf)->sa_family) {
3152 		case AF_INET6:
3153 		{
3154 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3155 #if 0
3156 			int error;
3157 #endif
3158 
3159 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3160 				return (EINVAL);
3161 
3162 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3163 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3164 				return (EINVAL);
3165 			}
3166 #if 0
3167 			if ((error = scope6_check_id(sa6, ip6_use_defzone))
3168 			    != 0) {
3169 				return (error);
3170 			}
3171 #endif
3172 			sa6->sin6_scope_id = 0; /* XXX */
3173 			break;
3174 		}
3175 		case AF_LINK:	/* should eventually be supported */
3176 		default:
3177 			return (EAFNOSUPPORT);
3178 		}
3179 
3180 		/* turn off the previous option, then set the new option. */
3181 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3182 		if (sticky) {
3183 			opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_WAITOK);
3184 			bcopy(buf, opt->ip6po_nexthop, *buf);
3185 		} else
3186 			opt->ip6po_nexthop = (struct sockaddr *)buf;
3187 		break;
3188 
3189 	case IPV6_2292HOPOPTS:
3190 	case IPV6_HOPOPTS:
3191 	{
3192 		struct ip6_hbh *hbh;
3193 		int hbhlen;
3194 
3195 		/*
3196 		 * XXX: We don't allow a non-privileged user to set ANY HbH
3197 		 * options, since per-option restriction has too much
3198 		 * overhead.
3199 		 */
3200 		if (!priv)
3201 			return (EPERM);
3202 
3203 		if (len == 0) {
3204 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3205 			break;	/* just remove the option */
3206 		}
3207 
3208 		/* message length validation */
3209 		if (len < sizeof(struct ip6_hbh))
3210 			return (EINVAL);
3211 		hbh = (struct ip6_hbh *)buf;
3212 		hbhlen = (hbh->ip6h_len + 1) << 3;
3213 		if (len != hbhlen)
3214 			return (EINVAL);
3215 
3216 		/* turn off the previous option, then set the new option. */
3217 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3218 		if (sticky) {
3219 			opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK);
3220 			bcopy(hbh, opt->ip6po_hbh, hbhlen);
3221 		} else
3222 			opt->ip6po_hbh = hbh;
3223 
3224 		break;
3225 	}
3226 
3227 	case IPV6_2292DSTOPTS:
3228 	case IPV6_DSTOPTS:
3229 	case IPV6_RTHDRDSTOPTS:
3230 	{
3231 		struct ip6_dest *dest, **newdest = NULL;
3232 		int destlen;
3233 
3234 		if (!priv)	/* XXX: see the comment for IPV6_HOPOPTS */
3235 			return (EPERM);
3236 
3237 		if (len == 0) {
3238 			ip6_clearpktopts(opt, optname);
3239 			break;	/* just remove the option */
3240 		}
3241 
3242 		/* message length validation */
3243 		if (len < sizeof(struct ip6_dest))
3244 			return (EINVAL);
3245 		dest = (struct ip6_dest *)buf;
3246 		destlen = (dest->ip6d_len + 1) << 3;
3247 		if (len != destlen)
3248 			return (EINVAL);
3249 
3250 		/*
3251 		 * Determine the position that the destination options header
3252 		 * should be inserted; before or after the routing header.
3253 		 */
3254 		switch (optname) {
3255 		case IPV6_2292DSTOPTS:
3256 			/*
3257 			 * The old advacned API is ambiguous on this point.
3258 			 * Our approach is to determine the position based
3259 			 * according to the existence of a routing header.
3260 			 * Note, however, that this depends on the order of the
3261 			 * extension headers in the ancillary data; the 1st
3262 			 * part of the destination options header must appear
3263 			 * before the routing header in the ancillary data,
3264 			 * too.
3265 			 * RFC2292bis solved the ambiguity by introducing
3266 			 * separate ancillary data or option types.
3267 			 */
3268 			if (opt->ip6po_rthdr == NULL)
3269 				newdest = &opt->ip6po_dest1;
3270 			else
3271 				newdest = &opt->ip6po_dest2;
3272 			break;
3273 		case IPV6_RTHDRDSTOPTS:
3274 			newdest = &opt->ip6po_dest1;
3275 			break;
3276 		case IPV6_DSTOPTS:
3277 			newdest = &opt->ip6po_dest2;
3278 			break;
3279 		}
3280 
3281 		/* turn off the previous option, then set the new option. */
3282 		ip6_clearpktopts(opt, optname);
3283 		if (sticky) {
3284 			*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
3285 			bcopy(dest, *newdest, destlen);
3286 		} else
3287 			*newdest = dest;
3288 
3289 		break;
3290 	}
3291 
3292 	case IPV6_2292RTHDR:
3293 	case IPV6_RTHDR:
3294 	{
3295 		struct ip6_rthdr *rth;
3296 		int rthlen;
3297 
3298 		if (len == 0) {
3299 			ip6_clearpktopts(opt, IPV6_RTHDR);
3300 			break;	/* just remove the option */
3301 		}
3302 
3303 		/* message length validation */
3304 		if (len < sizeof(struct ip6_rthdr))
3305 			return (EINVAL);
3306 		rth = (struct ip6_rthdr *)buf;
3307 		rthlen = (rth->ip6r_len + 1) << 3;
3308 		if (len != rthlen)
3309 			return (EINVAL);
3310 
3311 		switch (rth->ip6r_type) {
3312 		case IPV6_RTHDR_TYPE_0:
3313 			if (rth->ip6r_len == 0)	/* must contain one addr */
3314 				return (EINVAL);
3315 			if (rth->ip6r_len % 2) /* length must be even */
3316 				return (EINVAL);
3317 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3318 				return (EINVAL);
3319 			break;
3320 		default:
3321 			return (EINVAL);	/* not supported */
3322 		}
3323 
3324 		/* turn off the previous option */
3325 		ip6_clearpktopts(opt, IPV6_RTHDR);
3326 		if (sticky) {
3327 			opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK);
3328 			bcopy(rth, opt->ip6po_rthdr, rthlen);
3329 		} else
3330 			opt->ip6po_rthdr = rth;
3331 
3332 		break;
3333 	}
3334 
3335 	case IPV6_USE_MIN_MTU:
3336 		if (len != sizeof(int))
3337 			return (EINVAL);
3338 		minmtupolicy = *(int *)buf;
3339 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3340 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3341 		    minmtupolicy != IP6PO_MINMTU_ALL) {
3342 			return (EINVAL);
3343 		}
3344 		opt->ip6po_minmtu = minmtupolicy;
3345 		break;
3346 
3347 	case IPV6_DONTFRAG:
3348 		if (len != sizeof(int))
3349 			return (EINVAL);
3350 
3351 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3352 			/*
3353 			 * we ignore this option for TCP sockets.
3354 			 * (rfc2292bis leaves this case unspecified.)
3355 			 */
3356 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3357 		} else
3358 			opt->ip6po_flags |= IP6PO_DONTFRAG;
3359 		break;
3360 
3361 	case IPV6_PREFER_TEMPADDR:
3362 		if (len != sizeof(int))
3363 			return (EINVAL);
3364 		preftemp = *(int *)buf;
3365 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3366 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3367 		    preftemp != IP6PO_TEMPADDR_PREFER) {
3368 			return (EINVAL);
3369 		}
3370 		opt->ip6po_prefer_tempaddr = preftemp;
3371 		break;
3372 
3373 	default:
3374 		return (ENOPROTOOPT);
3375 	} /* end of switch */
3376 
3377 	return (0);
3378 }
3379 
3380 /*
3381  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3382  * packet to the input queue of a specified interface.  Note that this
3383  * calls the output routine of the loopback "driver", but with an interface
3384  * pointer that might NOT be &loif -- easier than replicating that code here.
3385  */
3386 void
3387 ip6_mloopback(ifp, m, dst)
3388 	struct ifnet *ifp;
3389 	struct mbuf *m;
3390 	struct sockaddr_in6 *dst;
3391 {
3392 	struct mbuf *copym;
3393 	struct ip6_hdr *ip6;
3394 
3395 	copym = m_copy(m, 0, M_COPYALL);
3396 	if (copym == NULL)
3397 		return;
3398 
3399 	/*
3400 	 * Make sure to deep-copy IPv6 header portion in case the data
3401 	 * is in an mbuf cluster, so that we can safely override the IPv6
3402 	 * header portion later.
3403 	 */
3404 	if ((copym->m_flags & M_EXT) != 0 ||
3405 	    copym->m_len < sizeof(struct ip6_hdr)) {
3406 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3407 		if (copym == NULL)
3408 			return;
3409 	}
3410 
3411 #ifdef DIAGNOSTIC
3412 	if (copym->m_len < sizeof(*ip6)) {
3413 		m_freem(copym);
3414 		return;
3415 	}
3416 #endif
3417 
3418 	ip6 = mtod(copym, struct ip6_hdr *);
3419 	/*
3420 	 * clear embedded scope identifiers if necessary.
3421 	 * in6_clearscope will touch the addresses only when necessary.
3422 	 */
3423 	in6_clearscope(&ip6->ip6_src);
3424 	in6_clearscope(&ip6->ip6_dst);
3425 
3426 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
3427 }
3428 
3429 /*
3430  * Chop IPv6 header off from the payload.
3431  */
3432 static int
3433 ip6_splithdr(m, exthdrs)
3434 	struct mbuf *m;
3435 	struct ip6_exthdrs *exthdrs;
3436 {
3437 	struct mbuf *mh;
3438 	struct ip6_hdr *ip6;
3439 
3440 	ip6 = mtod(m, struct ip6_hdr *);
3441 	if (m->m_len > sizeof(*ip6)) {
3442 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3443 		if (mh == 0) {
3444 			m_freem(m);
3445 			return ENOBUFS;
3446 		}
3447 		M_MOVE_PKTHDR(mh, m);
3448 		MH_ALIGN(mh, sizeof(*ip6));
3449 		m->m_len -= sizeof(*ip6);
3450 		m->m_data += sizeof(*ip6);
3451 		mh->m_next = m;
3452 		m = mh;
3453 		m->m_len = sizeof(*ip6);
3454 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3455 	}
3456 	exthdrs->ip6e_ip6 = m;
3457 	return 0;
3458 }
3459 
3460 /*
3461  * Compute IPv6 extension header length.
3462  */
3463 int
3464 ip6_optlen(in6p)
3465 	struct in6pcb *in6p;
3466 {
3467 	int len;
3468 
3469 	if (!in6p->in6p_outputopts)
3470 		return 0;
3471 
3472 	len = 0;
3473 #define elen(x) \
3474     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3475 
3476 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3477 	if (in6p->in6p_outputopts->ip6po_rthdr)
3478 		/* dest1 is valid with rthdr only */
3479 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
3480 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3481 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3482 	return len;
3483 #undef elen
3484 }
3485