1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
32 */
33
34 /*-
35 * Copyright (c) 1982, 1986, 1988, 1990, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66 #include "opt_ipsec.h"
67 #include "opt_kern_tls.h"
68 #include "opt_ratelimit.h"
69 #include "opt_route.h"
70 #include "opt_rss.h"
71 #include "opt_sctp.h"
72
73 #include <sys/param.h>
74 #include <sys/kernel.h>
75 #include <sys/ktls.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/errno.h>
79 #include <sys/priv.h>
80 #include <sys/proc.h>
81 #include <sys/protosw.h>
82 #include <sys/socket.h>
83 #include <sys/socketvar.h>
84 #include <sys/syslog.h>
85 #include <sys/ucred.h>
86
87 #include <machine/in_cksum.h>
88
89 #include <net/if.h>
90 #include <net/if_var.h>
91 #include <net/if_private.h>
92 #include <net/if_vlan_var.h>
93 #include <net/if_llatbl.h>
94 #include <net/ethernet.h>
95 #include <net/netisr.h>
96 #include <net/route.h>
97 #include <net/route/nhop.h>
98 #include <net/pfil.h>
99 #include <net/rss_config.h>
100 #include <net/vnet.h>
101
102 #include <netinet/in.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105 #include <netinet6/in6_fib.h>
106 #include <netinet6/in6_var.h>
107 #include <netinet/ip6.h>
108 #include <netinet/icmp6.h>
109 #include <netinet6/ip6_var.h>
110 #include <netinet/in_pcb.h>
111 #include <netinet/tcp_var.h>
112 #include <netinet6/nd6.h>
113 #include <netinet6/in6_rss.h>
114
115 #include <netipsec/ipsec_support.h>
116 #if defined(SCTP) || defined(SCTP_SUPPORT)
117 #include <netinet/sctp.h>
118 #include <netinet/sctp_crc32.h>
119 #endif
120
121 #include <netinet6/scope6_var.h>
122
123 extern int in6_mcast_loop;
124
125 struct ip6_exthdrs {
126 struct mbuf *ip6e_ip6;
127 struct mbuf *ip6e_hbh;
128 struct mbuf *ip6e_dest1;
129 struct mbuf *ip6e_rthdr;
130 struct mbuf *ip6e_dest2;
131 };
132
133 static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
134
135 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
136 struct ucred *, int);
137 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
138 struct socket *, struct sockopt *);
139 static int ip6_getpcbopt(struct inpcb *, int, struct sockopt *);
140 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
141 struct ucred *, int, int, int);
142
143 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
144 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
145 struct ip6_frag **);
146 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
147 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
148 static int ip6_getpmtu(struct route_in6 *, int,
149 struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
150 u_int);
151 static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
152 u_long *, int *, u_int);
153 static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
154 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
155
156 /*
157 * Make an extension header from option data. hp is the source,
158 * mp is the destination, and _ol is the optlen.
159 */
160 #define MAKE_EXTHDR(hp, mp, _ol) \
161 do { \
162 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
163 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
164 ((eh)->ip6e_len + 1) << 3); \
165 if (error) \
166 goto freehdrs; \
167 (_ol) += (*(mp))->m_len; \
168 } while (/*CONSTCOND*/ 0)
169
170 /*
171 * Form a chain of extension headers.
172 * m is the extension header mbuf
173 * mp is the previous mbuf in the chain
174 * p is the next header
175 * i is the type of option.
176 */
177 #define MAKE_CHAIN(m, mp, p, i)\
178 do {\
179 if (m) {\
180 if (!hdrsplit) \
181 panic("%s:%d: assumption failed: "\
182 "hdr not split: hdrsplit %d exthdrs %p",\
183 __func__, __LINE__, hdrsplit, &exthdrs);\
184 *mtod((m), u_char *) = *(p);\
185 *(p) = (i);\
186 p = mtod((m), u_char *);\
187 (m)->m_next = (mp)->m_next;\
188 (mp)->m_next = (m);\
189 (mp) = (m);\
190 }\
191 } while (/*CONSTCOND*/ 0)
192
193 void
in6_delayed_cksum(struct mbuf * m,uint32_t plen,u_short offset)194 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
195 {
196 u_short csum;
197
198 csum = in_cksum_skip(m, offset + plen, offset);
199 if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
200 csum = 0xffff;
201 offset += m->m_pkthdr.csum_data; /* checksum offset */
202
203 if (offset + sizeof(csum) > m->m_len)
204 m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
205 else
206 *(u_short *)mtodo(m, offset) = csum;
207 }
208
209 static void
ip6_output_delayed_csum(struct mbuf * m,struct ifnet * ifp,int csum_flags,int plen,int optlen)210 ip6_output_delayed_csum(struct mbuf *m, struct ifnet *ifp, int csum_flags,
211 int plen, int optlen)
212 {
213
214 KASSERT((plen >= optlen), ("%s:%d: plen %d < optlen %d, m %p, ifp %p "
215 "csum_flags %#x",
216 __func__, __LINE__, plen, optlen, m, ifp, csum_flags));
217
218 if (csum_flags & CSUM_DELAY_DATA_IPV6) {
219 in6_delayed_cksum(m, plen - optlen,
220 sizeof(struct ip6_hdr) + optlen);
221 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
222 }
223 #if defined(SCTP) || defined(SCTP_SUPPORT)
224 if (csum_flags & CSUM_SCTP_IPV6) {
225 sctp_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
226 m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
227 }
228 #endif
229 }
230
231 int
ip6_fragment(struct ifnet * ifp,struct mbuf * m0,int hlen,u_char nextproto,int fraglen,uint32_t id)232 ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
233 int fraglen , uint32_t id)
234 {
235 struct mbuf *m, **mnext, *m_frgpart;
236 struct ip6_hdr *ip6, *mhip6;
237 struct ip6_frag *ip6f;
238 int off;
239 int error;
240 int tlen = m0->m_pkthdr.len;
241
242 KASSERT((fraglen % 8 == 0), ("Fragment length must be a multiple of 8"));
243
244 m = m0;
245 ip6 = mtod(m, struct ip6_hdr *);
246 mnext = &m->m_nextpkt;
247
248 for (off = hlen; off < tlen; off += fraglen) {
249 m = m_gethdr(M_NOWAIT, MT_DATA);
250 if (!m) {
251 IP6STAT_INC(ip6s_odropped);
252 return (ENOBUFS);
253 }
254
255 /*
256 * Make sure the complete packet header gets copied
257 * from the originating mbuf to the newly created
258 * mbuf. This also ensures that existing firewall
259 * classification(s), VLAN tags and so on get copied
260 * to the resulting fragmented packet(s):
261 */
262 if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
263 m_free(m);
264 IP6STAT_INC(ip6s_odropped);
265 return (ENOBUFS);
266 }
267
268 *mnext = m;
269 mnext = &m->m_nextpkt;
270 m->m_data += max_linkhdr;
271 mhip6 = mtod(m, struct ip6_hdr *);
272 *mhip6 = *ip6;
273 m->m_len = sizeof(*mhip6);
274 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
275 if (error) {
276 IP6STAT_INC(ip6s_odropped);
277 return (error);
278 }
279 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
280 if (off + fraglen >= tlen)
281 fraglen = tlen - off;
282 else
283 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
284 mhip6->ip6_plen = htons((u_short)(fraglen + hlen +
285 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
286 if ((m_frgpart = m_copym(m0, off, fraglen, M_NOWAIT)) == NULL) {
287 IP6STAT_INC(ip6s_odropped);
288 return (ENOBUFS);
289 }
290 m_cat(m, m_frgpart);
291 m->m_pkthdr.len = fraglen + hlen + sizeof(*ip6f);
292 ip6f->ip6f_reserved = 0;
293 ip6f->ip6f_ident = id;
294 ip6f->ip6f_nxt = nextproto;
295 IP6STAT_INC(ip6s_ofragments);
296 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
297 }
298
299 return (0);
300 }
301
302 static int
ip6_output_send(struct inpcb * inp,struct ifnet * ifp,struct ifnet * origifp,struct mbuf * m,struct sockaddr_in6 * dst,struct route_in6 * ro,bool stamp_tag)303 ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
304 struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro,
305 bool stamp_tag)
306 {
307 #ifdef KERN_TLS
308 struct ktls_session *tls = NULL;
309 #endif
310 struct m_snd_tag *mst;
311 int error;
312
313 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
314 mst = NULL;
315
316 #ifdef KERN_TLS
317 /*
318 * If this is an unencrypted TLS record, save a reference to
319 * the record. This local reference is used to call
320 * ktls_output_eagain after the mbuf has been freed (thus
321 * dropping the mbuf's reference) in if_output.
322 */
323 if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
324 tls = ktls_hold(m->m_next->m_epg_tls);
325 mst = tls->snd_tag;
326
327 /*
328 * If a TLS session doesn't have a valid tag, it must
329 * have had an earlier ifp mismatch, so drop this
330 * packet.
331 */
332 if (mst == NULL) {
333 m_freem(m);
334 error = EAGAIN;
335 goto done;
336 }
337 /*
338 * Always stamp tags that include NIC ktls.
339 */
340 stamp_tag = true;
341 }
342 #endif
343 #ifdef RATELIMIT
344 if (inp != NULL && mst == NULL) {
345 if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
346 (inp->inp_snd_tag != NULL &&
347 inp->inp_snd_tag->ifp != ifp))
348 in_pcboutput_txrtlmt(inp, ifp, m);
349
350 if (inp->inp_snd_tag != NULL)
351 mst = inp->inp_snd_tag;
352 }
353 #endif
354 if (stamp_tag && mst != NULL) {
355 KASSERT(m->m_pkthdr.rcvif == NULL,
356 ("trying to add a send tag to a forwarded packet"));
357 if (mst->ifp != ifp) {
358 m_freem(m);
359 error = EAGAIN;
360 goto done;
361 }
362
363 /* stamp send tag on mbuf */
364 m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
365 m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
366 }
367
368 error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
369
370 done:
371 /* Check for route change invalidating send tags. */
372 #ifdef KERN_TLS
373 if (tls != NULL) {
374 if (error == EAGAIN)
375 error = ktls_output_eagain(inp, tls);
376 ktls_free(tls);
377 }
378 #endif
379 #ifdef RATELIMIT
380 if (error == EAGAIN)
381 in_pcboutput_eagain(inp);
382 #endif
383 return (error);
384 }
385
386 /*
387 * IP6 output.
388 * The packet in mbuf chain m contains a skeletal IP6 header (with pri, len,
389 * nxt, hlim, src, dst).
390 * This function may modify ver and hlim only.
391 * The mbuf chain containing the packet will be freed.
392 * The mbuf opt, if present, will not be freed.
393 * If route_in6 ro is present and has ro_nh initialized, route lookup would be
394 * skipped and ro->ro_nh would be used. If ro is present but ro->ro_nh is NULL,
395 * then result of route lookup is stored in ro->ro_nh.
396 *
397 * Type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and nd_ifinfo.linkmtu
398 * is uint32_t. So we use u_long to hold largest one, which is rt_mtu.
399 *
400 * ifpp - XXX: just for statistics
401 */
402 int
ip6_output(struct mbuf * m0,struct ip6_pktopts * opt,struct route_in6 * ro,int flags,struct ip6_moptions * im6o,struct ifnet ** ifpp,struct inpcb * inp)403 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
404 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
405 struct ifnet **ifpp, struct inpcb *inp)
406 {
407 struct ip6_hdr *ip6;
408 struct ifnet *ifp, *origifp;
409 struct mbuf *m = m0;
410 struct mbuf *mprev;
411 struct route_in6 *ro_pmtu;
412 struct nhop_object *nh;
413 struct sockaddr_in6 *dst, sin6, src_sa, dst_sa;
414 struct in6_addr odst;
415 u_char *nexthdrp;
416 int tlen, len;
417 int error = 0;
418 int vlan_pcp = -1;
419 struct in6_ifaddr *ia = NULL;
420 u_long mtu;
421 int alwaysfrag, dontfrag;
422 u_int32_t optlen, plen = 0, unfragpartlen;
423 struct ip6_exthdrs exthdrs;
424 struct in6_addr src0, dst0;
425 u_int32_t zone;
426 bool hdrsplit;
427 int sw_csum, tso;
428 int needfiblookup;
429 uint32_t fibnum;
430 struct m_tag *fwd_tag = NULL;
431 uint32_t id;
432 uint32_t optvalid;
433
434 NET_EPOCH_ASSERT();
435
436 if (inp != NULL) {
437 INP_LOCK_ASSERT(inp);
438 M_SETFIB(m, inp->inp_inc.inc_fibnum);
439 if ((flags & IP_NODEFAULTFLOWID) == 0) {
440 /* Unconditionally set flowid. */
441 m->m_pkthdr.flowid = inp->inp_flowid;
442 M_HASHTYPE_SET(m, inp->inp_flowtype);
443 }
444 if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
445 vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
446 INP_2PCP_SHIFT;
447 #ifdef NUMA
448 m->m_pkthdr.numa_domain = inp->inp_numa_domain;
449 #endif
450 }
451
452 /* Source address validation. */
453 ip6 = mtod(m, struct ip6_hdr *);
454 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
455 (flags & IPV6_UNSPECSRC) == 0) {
456 error = EOPNOTSUPP;
457 IP6STAT_INC(ip6s_badscope);
458 goto bad;
459 }
460 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
461 error = EOPNOTSUPP;
462 IP6STAT_INC(ip6s_badscope);
463 goto bad;
464 }
465
466 /*
467 * If we are given packet options to add extension headers prepare them.
468 * Calculate the total length of the extension header chain.
469 * Keep the length of the unfragmentable part for fragmentation.
470 */
471 bzero(&exthdrs, sizeof(exthdrs));
472 optlen = optvalid = 0;
473 unfragpartlen = sizeof(struct ip6_hdr);
474 if (opt) {
475 optvalid = opt->ip6po_valid;
476
477 /* Hop-by-Hop options header. */
478 if ((optvalid & IP6PO_VALID_HBH) != 0)
479 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh, optlen);
480
481 /* Destination options header (1st part). */
482 if ((optvalid & IP6PO_VALID_RHINFO) != 0) {
483 #ifndef RTHDR_SUPPORT_IMPLEMENTED
484 /*
485 * If there is a routing header, discard the packet
486 * right away here. RH0/1 are obsolete and we do not
487 * currently support RH2/3/4.
488 * People trying to use RH253/254 may want to disable
489 * this check.
490 * The moment we do support any routing header (again)
491 * this block should check the routing type more
492 * selectively.
493 */
494 error = EINVAL;
495 goto bad;
496 #endif
497
498 /*
499 * Destination options header (1st part).
500 * This only makes sense with a routing header.
501 * See Section 9.2 of RFC 3542.
502 * Disabling this part just for MIP6 convenience is
503 * a bad idea. We need to think carefully about a
504 * way to make the advanced API coexist with MIP6
505 * options, which might automatically be inserted in
506 * the kernel.
507 */
508 if ((optvalid & IP6PO_VALID_DEST1) != 0)
509 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1,
510 optlen);
511 }
512 /* Routing header. */
513 if ((optvalid & IP6PO_VALID_RHINFO) != 0)
514 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr, optlen);
515
516 unfragpartlen += optlen;
517
518 /*
519 * NOTE: we don't add AH/ESP length here (done in
520 * ip6_ipsec_output()).
521 */
522
523 /* Destination options header (2nd part). */
524 if ((optvalid & IP6PO_VALID_DEST2) != 0)
525 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2, optlen);
526 }
527
528 /*
529 * If there is at least one extension header,
530 * separate IP6 header from the payload.
531 */
532 hdrsplit = false;
533 if (optlen) {
534 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
535 m = NULL;
536 goto freehdrs;
537 }
538 m = exthdrs.ip6e_ip6;
539 ip6 = mtod(m, struct ip6_hdr *);
540 hdrsplit = true;
541 }
542
543 /* Adjust mbuf packet header length. */
544 m->m_pkthdr.len += optlen;
545 plen = m->m_pkthdr.len - sizeof(*ip6);
546
547 /* If this is a jumbo payload, insert a jumbo payload option. */
548 if (plen > IPV6_MAXPACKET) {
549 if (!hdrsplit) {
550 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
551 m = NULL;
552 goto freehdrs;
553 }
554 m = exthdrs.ip6e_ip6;
555 ip6 = mtod(m, struct ip6_hdr *);
556 hdrsplit = true;
557 }
558 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
559 goto freehdrs;
560 ip6->ip6_plen = 0;
561 } else
562 ip6->ip6_plen = htons(plen);
563 nexthdrp = &ip6->ip6_nxt;
564
565 if (optlen) {
566 /*
567 * Concatenate headers and fill in next header fields.
568 * Here we have, on "m"
569 * IPv6 payload
570 * and we insert headers accordingly.
571 * Finally, we should be getting:
572 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload].
573 *
574 * During the header composing process "m" points to IPv6
575 * header. "mprev" points to an extension header prior to esp.
576 */
577 mprev = m;
578
579 /*
580 * We treat dest2 specially. This makes IPsec processing
581 * much easier. The goal here is to make mprev point the
582 * mbuf prior to dest2.
583 *
584 * Result: IPv6 dest2 payload.
585 * m and mprev will point to IPv6 header.
586 */
587 if (exthdrs.ip6e_dest2) {
588 if (!hdrsplit)
589 panic("%s:%d: assumption failed: "
590 "hdr not split: hdrsplit %d exthdrs %p",
591 __func__, __LINE__, hdrsplit, &exthdrs);
592 exthdrs.ip6e_dest2->m_next = m->m_next;
593 m->m_next = exthdrs.ip6e_dest2;
594 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
595 ip6->ip6_nxt = IPPROTO_DSTOPTS;
596 }
597
598 /*
599 * Result: IPv6 hbh dest1 rthdr dest2 payload.
600 * m will point to IPv6 header. mprev will point to the
601 * extension header prior to dest2 (rthdr in the above case).
602 */
603 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
604 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
605 IPPROTO_DSTOPTS);
606 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
607 IPPROTO_ROUTING);
608 }
609
610 IP6STAT_INC(ip6s_localout);
611
612 /* Route packet. */
613 ro_pmtu = ro;
614 if ((optvalid & IP6PO_VALID_RHINFO) != 0)
615 ro = &opt->ip6po_route;
616 if (ro != NULL)
617 dst = (struct sockaddr_in6 *)&ro->ro_dst;
618 else
619 dst = &sin6;
620 fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
621
622 again:
623 /*
624 * If specified, try to fill in the traffic class field.
625 * Do not override if a non-zero value is already set.
626 * We check the diffserv field and the ECN field separately.
627 */
628 if ((optvalid & IP6PO_VALID_TC) != 0){
629 int mask = 0;
630
631 if (IPV6_DSCP(ip6) == 0)
632 mask |= 0xfc;
633 if (IPV6_ECN(ip6) == 0)
634 mask |= 0x03;
635 if (mask != 0)
636 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
637 }
638
639 /* Fill in or override the hop limit field, if necessary. */
640 if ((optvalid & IP6PO_VALID_HLIM) != 0)
641 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
642 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
643 if (im6o != NULL)
644 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
645 else
646 ip6->ip6_hlim = V_ip6_defmcasthlim;
647 }
648
649 if (ro == NULL || ro->ro_nh == NULL) {
650 bzero(dst, sizeof(*dst));
651 dst->sin6_family = AF_INET6;
652 dst->sin6_len = sizeof(*dst);
653 dst->sin6_addr = ip6->ip6_dst;
654 }
655 /*
656 * Validate route against routing table changes.
657 * Make sure that the address family is set in route.
658 */
659 nh = NULL;
660 ifp = NULL;
661 mtu = 0;
662 if (ro != NULL) {
663 if (ro->ro_nh != NULL && inp != NULL) {
664 ro->ro_dst.sin6_family = AF_INET6; /* XXX KASSERT? */
665 NH_VALIDATE((struct route *)ro, &inp->inp_rt_cookie,
666 fibnum);
667 }
668 if (ro->ro_nh != NULL && fwd_tag == NULL &&
669 (!NH_IS_VALID(ro->ro_nh) ||
670 ro->ro_dst.sin6_family != AF_INET6 ||
671 !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)))
672 RO_INVALIDATE_CACHE(ro);
673
674 if (ro->ro_nh != NULL && fwd_tag == NULL &&
675 ro->ro_dst.sin6_family == AF_INET6 &&
676 IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
677 /* Nexthop is valid and contains valid ifp */
678 nh = ro->ro_nh;
679 } else {
680 if (ro->ro_lle)
681 LLE_FREE(ro->ro_lle); /* zeros ro_lle */
682 ro->ro_lle = NULL;
683 if (fwd_tag == NULL) {
684 bzero(&dst_sa, sizeof(dst_sa));
685 dst_sa.sin6_family = AF_INET6;
686 dst_sa.sin6_len = sizeof(dst_sa);
687 dst_sa.sin6_addr = ip6->ip6_dst;
688 }
689 error = in6_selectroute(&dst_sa, opt, im6o, ro, &ifp,
690 &nh, fibnum, m->m_pkthdr.flowid);
691 if (error != 0) {
692 IP6STAT_INC(ip6s_noroute);
693 if (ifp != NULL)
694 in6_ifstat_inc(ifp, ifs6_out_discard);
695 goto bad;
696 }
697 /*
698 * At this point at least @ifp is not NULL
699 * Can be the case when dst is multicast, link-local or
700 * interface is explicitly specificed by the caller.
701 */
702 }
703 if (nh == NULL) {
704 /*
705 * If in6_selectroute() does not return a nexthop
706 * dst may not have been updated.
707 */
708 *dst = dst_sa; /* XXX */
709 origifp = ifp;
710 mtu = ifp->if_mtu;
711 } else {
712 ifp = nh->nh_ifp;
713 origifp = nh->nh_aifp;
714 ia = (struct in6_ifaddr *)(nh->nh_ifa);
715 counter_u64_add(nh->nh_pksent, 1);
716 }
717 } else {
718 struct nhop_object *nh;
719 struct in6_addr kdst;
720 uint32_t scopeid;
721
722 if (fwd_tag == NULL) {
723 bzero(&dst_sa, sizeof(dst_sa));
724 dst_sa.sin6_family = AF_INET6;
725 dst_sa.sin6_len = sizeof(dst_sa);
726 dst_sa.sin6_addr = ip6->ip6_dst;
727 }
728
729 if (IN6_IS_ADDR_MULTICAST(&dst_sa.sin6_addr) &&
730 im6o != NULL &&
731 (ifp = im6o->im6o_multicast_ifp) != NULL) {
732 /* We do not need a route lookup. */
733 *dst = dst_sa; /* XXX */
734 origifp = ifp;
735 goto nonh6lookup;
736 }
737
738 in6_splitscope(&dst_sa.sin6_addr, &kdst, &scopeid);
739
740 if (IN6_IS_ADDR_MC_LINKLOCAL(&dst_sa.sin6_addr) ||
741 IN6_IS_ADDR_MC_NODELOCAL(&dst_sa.sin6_addr)) {
742 if (scopeid > 0) {
743 ifp = in6_getlinkifnet(scopeid);
744 if (ifp == NULL) {
745 error = EHOSTUNREACH;
746 goto bad;
747 }
748 *dst = dst_sa; /* XXX */
749 origifp = ifp;
750 goto nonh6lookup;
751 }
752 }
753
754 nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE,
755 m->m_pkthdr.flowid);
756 if (nh == NULL) {
757 IP6STAT_INC(ip6s_noroute);
758 /* No ifp in6_ifstat_inc(ifp, ifs6_out_discard); */
759 error = EHOSTUNREACH;
760 goto bad;
761 }
762
763 ifp = nh->nh_ifp;
764 origifp = nh->nh_aifp;
765 ia = ifatoia6(nh->nh_ifa);
766 if (nh->nh_flags & NHF_GATEWAY)
767 dst->sin6_addr = nh->gw6_sa.sin6_addr;
768 else if (fwd_tag != NULL)
769 dst->sin6_addr = dst_sa.sin6_addr;
770 nonh6lookup:
771 ;
772 }
773 /*
774 * At this point ifp MUST be pointing to the valid transmit ifp.
775 * origifp MUST be valid and pointing to either the same ifp or,
776 * in case of loopback output, to the interface which ip6_src
777 * belongs to.
778 * Examples:
779 * fe80::1%em0 -> fe80::2%em0 -> ifp=em0, origifp=em0
780 * fe80::1%em0 -> fe80::1%em0 -> ifp=lo0, origifp=em0
781 * ::1 -> ::1 -> ifp=lo0, origifp=lo0
782 *
783 * mtu can be 0 and will be refined later.
784 */
785 KASSERT((ifp != NULL), ("output interface must not be NULL"));
786 KASSERT((origifp != NULL), ("output address interface must not be NULL"));
787
788 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
789 /*
790 * IPSec checking which handles several cases.
791 * FAST IPSEC: We re-injected the packet.
792 * XXX: need scope argument.
793 */
794 if (IPSEC_ENABLED(ipv6)) {
795 if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu == 0 ?
796 ifp->if_mtu : mtu)) != 0) {
797 if (error == EINPROGRESS)
798 error = 0;
799 goto done;
800 }
801 }
802 #endif /* IPSEC */
803
804 if ((flags & IPV6_FORWARDING) == 0) {
805 /* XXX: the FORWARDING flag can be set for mrouting. */
806 in6_ifstat_inc(ifp, ifs6_out_request);
807 }
808
809 /* Setup data structures for scope ID checks. */
810 src0 = ip6->ip6_src;
811 bzero(&src_sa, sizeof(src_sa));
812 src_sa.sin6_family = AF_INET6;
813 src_sa.sin6_len = sizeof(src_sa);
814 src_sa.sin6_addr = ip6->ip6_src;
815
816 dst0 = ip6->ip6_dst;
817 /* Re-initialize to be sure. */
818 bzero(&dst_sa, sizeof(dst_sa));
819 dst_sa.sin6_family = AF_INET6;
820 dst_sa.sin6_len = sizeof(dst_sa);
821 dst_sa.sin6_addr = ip6->ip6_dst;
822
823 /* Check for valid scope ID. */
824 if (in6_setscope(&src0, origifp, &zone) == 0 &&
825 sa6_recoverscope(&src_sa) == 0 && zone == src_sa.sin6_scope_id &&
826 in6_setscope(&dst0, origifp, &zone) == 0 &&
827 sa6_recoverscope(&dst_sa) == 0 && zone == dst_sa.sin6_scope_id) {
828 /*
829 * The outgoing interface is in the zone of the source
830 * and destination addresses.
831 *
832 */
833 } else if ((origifp->if_flags & IFF_LOOPBACK) == 0 ||
834 sa6_recoverscope(&src_sa) != 0 ||
835 sa6_recoverscope(&dst_sa) != 0 ||
836 dst_sa.sin6_scope_id == 0 ||
837 (src_sa.sin6_scope_id != 0 &&
838 src_sa.sin6_scope_id != dst_sa.sin6_scope_id) ||
839 ifnet_byindex(dst_sa.sin6_scope_id) == NULL) {
840 /*
841 * If the destination network interface is not a
842 * loopback interface, or the destination network
843 * address has no scope ID, or the source address has
844 * a scope ID set which is different from the
845 * destination address one, or there is no network
846 * interface representing this scope ID, the address
847 * pair is considered invalid.
848 */
849 IP6STAT_INC(ip6s_badscope);
850 in6_ifstat_inc(origifp, ifs6_out_discard);
851 if (error == 0)
852 error = EHOSTUNREACH; /* XXX */
853 goto bad;
854 }
855 /* All scope ID checks are successful. */
856
857 if (nh && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
858 if ((optvalid & IP6PO_VALID_NHINFO) != 0) {
859 /*
860 * The nexthop is explicitly specified by the
861 * application. We assume the next hop is an IPv6
862 * address.
863 */
864 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
865 }
866 else if ((nh->nh_flags & NHF_GATEWAY))
867 dst = &nh->gw6_sa;
868 }
869
870 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
871 m->m_flags &= ~(M_BCAST | M_MCAST); /* Just in case. */
872 } else {
873 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
874 in6_ifstat_inc(ifp, ifs6_out_mcast);
875
876 /* Confirm that the outgoing interface supports multicast. */
877 if (!(ifp->if_flags & IFF_MULTICAST)) {
878 IP6STAT_INC(ip6s_noroute);
879 in6_ifstat_inc(ifp, ifs6_out_discard);
880 error = ENETUNREACH;
881 goto bad;
882 }
883 if ((im6o == NULL && in6_mcast_loop) ||
884 (im6o && im6o->im6o_multicast_loop)) {
885 /*
886 * Loop back multicast datagram if not expressly
887 * forbidden to do so, even if we have not joined
888 * the address; protocols will filter it later,
889 * thus deferring a hash lookup and lock acquisition
890 * at the expense of an m_copym().
891 */
892 ip6_mloopback(ifp, m);
893 } else {
894 /*
895 * If we are acting as a multicast router, perform
896 * multicast forwarding as if the packet had just
897 * arrived on the interface to which we are about
898 * to send. The multicast forwarding function
899 * recursively calls this function, using the
900 * IPV6_FORWARDING flag to prevent infinite recursion.
901 *
902 * Multicasts that are looped back by ip6_mloopback(),
903 * above, will be forwarded by the ip6_input() routine,
904 * if necessary.
905 */
906 if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
907 /*
908 * XXX: ip6_mforward expects that rcvif is NULL
909 * when it is called from the originating path.
910 * However, it may not always be the case.
911 */
912 m->m_pkthdr.rcvif = NULL;
913 if (ip6_mforward(ip6, ifp, m) != 0) {
914 m_freem(m);
915 goto done;
916 }
917 }
918 }
919 /*
920 * Multicasts with a hoplimit of zero may be looped back,
921 * above, but must not be transmitted on a network.
922 * Also, multicasts addressed to the loopback interface
923 * are not sent -- the above call to ip6_mloopback() will
924 * loop back a copy if this host actually belongs to the
925 * destination group on the loopback interface.
926 */
927 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
928 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
929 m_freem(m);
930 goto done;
931 }
932 }
933
934 /*
935 * Fill the outgoing inteface to tell the upper layer
936 * to increment per-interface statistics.
937 */
938 if (ifpp)
939 *ifpp = ifp;
940
941 /* Determine path MTU. */
942 if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
943 &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
944 goto bad;
945 KASSERT(mtu > 0, ("%s:%d: mtu %ld, ro_pmtu %p ro %p ifp %p "
946 "alwaysfrag %d fibnum %u\n", __func__, __LINE__, mtu, ro_pmtu, ro,
947 ifp, alwaysfrag, fibnum));
948
949 /*
950 * The caller of this function may specify to use the minimum MTU
951 * in some cases.
952 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
953 * setting. The logic is a bit complicated; by default, unicast
954 * packets will follow path MTU while multicast packets will be sent at
955 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
956 * including unicast ones will be sent at the minimum MTU. Multicast
957 * packets will always be sent at the minimum MTU unless
958 * IP6PO_MINMTU_DISABLE is explicitly specified.
959 * See RFC 3542 for more details.
960 */
961 if (mtu > IPV6_MMTU) {
962 if ((flags & IPV6_MINMTU))
963 mtu = IPV6_MMTU;
964 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
965 mtu = IPV6_MMTU;
966 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
967 (opt == NULL ||
968 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
969 mtu = IPV6_MMTU;
970 }
971 }
972
973 /*
974 * Clear embedded scope identifiers if necessary.
975 * in6_clearscope() will touch the addresses only when necessary.
976 */
977 in6_clearscope(&ip6->ip6_src);
978 in6_clearscope(&ip6->ip6_dst);
979
980 /*
981 * If the outgoing packet contains a hop-by-hop options header,
982 * it must be examined and processed even by the source node.
983 * (RFC 2460, section 4.)
984 */
985 if (exthdrs.ip6e_hbh) {
986 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
987 u_int32_t dummy; /* XXX unused */
988 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
989
990 #ifdef DIAGNOSTIC
991 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
992 panic("ip6e_hbh is not contiguous");
993 #endif
994 /*
995 * XXX: if we have to send an ICMPv6 error to the sender,
996 * we need the M_LOOP flag since icmp6_error() expects
997 * the IPv6 and the hop-by-hop options header are
998 * contiguous unless the flag is set.
999 */
1000 m->m_flags |= M_LOOP;
1001 m->m_pkthdr.rcvif = ifp;
1002 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1003 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1004 &dummy, &plen) < 0) {
1005 /* m was already freed at this point. */
1006 error = EINVAL;/* better error? */
1007 goto done;
1008 }
1009 m->m_flags &= ~M_LOOP; /* XXX */
1010 m->m_pkthdr.rcvif = NULL;
1011 }
1012
1013 /* Jump over all PFIL processing if hooks are not active. */
1014 if (!PFIL_HOOKED_OUT(V_inet6_pfil_head))
1015 goto passout;
1016
1017 odst = ip6->ip6_dst;
1018 /* Run through list of hooks for output packets. */
1019 switch (pfil_mbuf_out(V_inet6_pfil_head, &m, ifp, inp)) {
1020 case PFIL_PASS:
1021 ip6 = mtod(m, struct ip6_hdr *);
1022 break;
1023 case PFIL_DROPPED:
1024 error = EACCES;
1025 /* FALLTHROUGH */
1026 case PFIL_CONSUMED:
1027 goto done;
1028 }
1029
1030 needfiblookup = 0;
1031 /* See if destination IP address was changed by packet filter. */
1032 if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
1033 m->m_flags |= M_SKIP_FIREWALL;
1034 /* If destination is now ourself drop to ip6_input(). */
1035 if (in6_localip(&ip6->ip6_dst)) {
1036 m->m_flags |= M_FASTFWD_OURS;
1037 if (m->m_pkthdr.rcvif == NULL)
1038 m->m_pkthdr.rcvif = V_loif;
1039 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1040 m->m_pkthdr.csum_flags |=
1041 CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
1042 m->m_pkthdr.csum_data = 0xffff;
1043 }
1044 #if defined(SCTP) || defined(SCTP_SUPPORT)
1045 if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
1046 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1047 #endif
1048 error = netisr_queue(NETISR_IPV6, m);
1049 goto done;
1050 } else {
1051 if (ro != NULL)
1052 RO_INVALIDATE_CACHE(ro);
1053 needfiblookup = 1; /* Redo the routing table lookup. */
1054 }
1055 }
1056 /* See if fib was changed by packet filter. */
1057 if (fibnum != M_GETFIB(m)) {
1058 m->m_flags |= M_SKIP_FIREWALL;
1059 fibnum = M_GETFIB(m);
1060 if (ro != NULL)
1061 RO_INVALIDATE_CACHE(ro);
1062 needfiblookup = 1;
1063 }
1064 if (needfiblookup)
1065 goto again;
1066
1067 /* See if local, if yes, send it to netisr. */
1068 if (m->m_flags & M_FASTFWD_OURS) {
1069 if (m->m_pkthdr.rcvif == NULL)
1070 m->m_pkthdr.rcvif = V_loif;
1071 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1072 m->m_pkthdr.csum_flags |=
1073 CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
1074 m->m_pkthdr.csum_data = 0xffff;
1075 }
1076 #if defined(SCTP) || defined(SCTP_SUPPORT)
1077 if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
1078 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
1079 #endif
1080 error = netisr_queue(NETISR_IPV6, m);
1081 goto done;
1082 }
1083 /* Or forward to some other address? */
1084 if ((m->m_flags & M_IP6_NEXTHOP) &&
1085 (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
1086 if (ro != NULL)
1087 dst = (struct sockaddr_in6 *)&ro->ro_dst;
1088 else
1089 dst = &sin6;
1090 bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
1091 m->m_flags |= M_SKIP_FIREWALL;
1092 m->m_flags &= ~M_IP6_NEXTHOP;
1093 m_tag_delete(m, fwd_tag);
1094 goto again;
1095 }
1096
1097 passout:
1098 if (vlan_pcp > -1)
1099 EVL_APPLY_PRI(m, vlan_pcp);
1100
1101 /* Ensure the packet data is mapped if the interface requires it. */
1102 if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
1103 struct mbuf *m1;
1104
1105 error = mb_unmapped_to_ext(m, &m1);
1106 if (error != 0) {
1107 if (error == EINVAL) {
1108 if_printf(ifp, "TLS packet\n");
1109 /* XXXKIB */
1110 } else if (error == ENOMEM) {
1111 error = ENOBUFS;
1112 }
1113 IP6STAT_INC(ip6s_odropped);
1114 return (error);
1115 } else {
1116 m = m1;
1117 }
1118 }
1119
1120 /*
1121 * Send the packet to the outgoing interface.
1122 * If necessary, do IPv6 fragmentation before sending.
1123 *
1124 * The logic here is rather complex:
1125 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1126 * 1-a: send as is if tlen <= path mtu
1127 * 1-b: fragment if tlen > path mtu
1128 *
1129 * 2: if user asks us not to fragment (dontfrag == 1)
1130 * 2-a: send as is if tlen <= interface mtu
1131 * 2-b: error if tlen > interface mtu
1132 *
1133 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1134 * always fragment
1135 *
1136 * 4: if dontfrag == 1 && alwaysfrag == 1
1137 * error, as we cannot handle this conflicting request.
1138 */
1139 sw_csum = m->m_pkthdr.csum_flags;
1140 if (!hdrsplit) {
1141 tso = ((sw_csum & ifp->if_hwassist &
1142 (CSUM_TSO | CSUM_INNER_TSO)) != 0) ? 1 : 0;
1143 sw_csum &= ~ifp->if_hwassist;
1144 } else
1145 tso = 0;
1146 /*
1147 * If we added extension headers, we will not do TSO and calculate the
1148 * checksums ourselves for now.
1149 * XXX-BZ Need a framework to know when the NIC can handle it, even
1150 * with ext. hdrs.
1151 */
1152 ip6_output_delayed_csum(m, ifp, sw_csum, plen, optlen);
1153 /* XXX-BZ m->m_pkthdr.csum_flags &= ~ifp->if_hwassist; */
1154 tlen = m->m_pkthdr.len;
1155
1156 if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
1157 dontfrag = 1;
1158 else
1159 dontfrag = 0;
1160 if (dontfrag && alwaysfrag) { /* Case 4. */
1161 /* Conflicting request - can't transmit. */
1162 error = EMSGSIZE;
1163 goto bad;
1164 }
1165 if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* Case 2-b. */
1166 /*
1167 * Even if the DONTFRAG option is specified, we cannot send the
1168 * packet when the data length is larger than the MTU of the
1169 * outgoing interface.
1170 * Notify the error by sending IPV6_PATHMTU ancillary data if
1171 * application wanted to know the MTU value. Also return an
1172 * error code (this is not described in the API spec).
1173 */
1174 if (inp != NULL)
1175 ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
1176 error = EMSGSIZE;
1177 goto bad;
1178 }
1179
1180 /* Transmit packet without fragmentation. */
1181 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* Cases 1-a and 2-a. */
1182 struct in6_ifaddr *ia6;
1183
1184 ip6 = mtod(m, struct ip6_hdr *);
1185 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1186 if (ia6) {
1187 /* Record statistics for this interface address. */
1188 counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
1189 counter_u64_add(ia6->ia_ifa.ifa_obytes,
1190 m->m_pkthdr.len);
1191 }
1192 error = ip6_output_send(inp, ifp, origifp, m, dst, ro,
1193 (flags & IP_NO_SND_TAG_RL) ? false : true);
1194 goto done;
1195 }
1196
1197 /* Try to fragment the packet. Cases 1-b and 3. */
1198 if (mtu < IPV6_MMTU) {
1199 /* Path MTU cannot be less than IPV6_MMTU. */
1200 error = EMSGSIZE;
1201 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1202 goto bad;
1203 } else if (ip6->ip6_plen == 0) {
1204 /* Jumbo payload cannot be fragmented. */
1205 error = EMSGSIZE;
1206 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1207 goto bad;
1208 } else {
1209 u_char nextproto;
1210
1211 /*
1212 * Too large for the destination or interface;
1213 * fragment if possible.
1214 * Must be able to put at least 8 bytes per fragment.
1215 */
1216 if (mtu > IPV6_MAXPACKET)
1217 mtu = IPV6_MAXPACKET;
1218
1219 len = (mtu - unfragpartlen - sizeof(struct ip6_frag)) & ~7;
1220 if (len < 8) {
1221 error = EMSGSIZE;
1222 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1223 goto bad;
1224 }
1225
1226 /*
1227 * If the interface will not calculate checksums on
1228 * fragmented packets, then do it here.
1229 * XXX-BZ handle the hw offloading case. Need flags.
1230 */
1231 ip6_output_delayed_csum(m, ifp, m->m_pkthdr.csum_flags, plen,
1232 optlen);
1233
1234 /*
1235 * Change the next header field of the last header in the
1236 * unfragmentable part.
1237 */
1238 if (exthdrs.ip6e_rthdr) {
1239 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1240 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1241 } else if (exthdrs.ip6e_dest1) {
1242 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1243 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1244 } else if (exthdrs.ip6e_hbh) {
1245 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1246 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1247 } else {
1248 ip6 = mtod(m, struct ip6_hdr *);
1249 nextproto = ip6->ip6_nxt;
1250 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1251 }
1252
1253 /*
1254 * Loop through length of segment after first fragment,
1255 * make new header and copy data of each part and link onto
1256 * chain.
1257 */
1258 m0 = m;
1259 id = htonl(ip6_randomid());
1260 error = ip6_fragment(ifp, m, unfragpartlen, nextproto,len, id);
1261 if (error != 0)
1262 goto sendorfree;
1263
1264 in6_ifstat_inc(ifp, ifs6_out_fragok);
1265 }
1266
1267 /* Remove leading garbage. */
1268 sendorfree:
1269 m = m0->m_nextpkt;
1270 m0->m_nextpkt = 0;
1271 m_freem(m0);
1272 for (; m; m = m0) {
1273 m0 = m->m_nextpkt;
1274 m->m_nextpkt = 0;
1275 if (error == 0) {
1276 /* Record statistics for this interface address. */
1277 if (ia) {
1278 counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
1279 counter_u64_add(ia->ia_ifa.ifa_obytes,
1280 m->m_pkthdr.len);
1281 }
1282 if (vlan_pcp > -1)
1283 EVL_APPLY_PRI(m, vlan_pcp);
1284 error = ip6_output_send(inp, ifp, origifp, m, dst, ro,
1285 true);
1286 } else
1287 m_freem(m);
1288 }
1289
1290 if (error == 0)
1291 IP6STAT_INC(ip6s_fragmented);
1292
1293 done:
1294 return (error);
1295
1296 freehdrs:
1297 m_freem(exthdrs.ip6e_hbh); /* m_freem() checks if mbuf is NULL. */
1298 m_freem(exthdrs.ip6e_dest1);
1299 m_freem(exthdrs.ip6e_rthdr);
1300 m_freem(exthdrs.ip6e_dest2);
1301 /* FALLTHROUGH */
1302 bad:
1303 if (m)
1304 m_freem(m);
1305 goto done;
1306 }
1307
1308 static int
ip6_copyexthdr(struct mbuf ** mp,caddr_t hdr,int hlen)1309 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1310 {
1311 struct mbuf *m;
1312
1313 if (hlen > MCLBYTES)
1314 return (ENOBUFS); /* XXX */
1315
1316 if (hlen > MLEN)
1317 m = m_getcl(M_NOWAIT, MT_DATA, 0);
1318 else
1319 m = m_get(M_NOWAIT, MT_DATA);
1320 if (m == NULL)
1321 return (ENOBUFS);
1322 m->m_len = hlen;
1323 if (hdr)
1324 bcopy(hdr, mtod(m, caddr_t), hlen);
1325
1326 *mp = m;
1327 return (0);
1328 }
1329
1330 /*
1331 * Insert jumbo payload option.
1332 */
1333 static int
ip6_insert_jumboopt(struct ip6_exthdrs * exthdrs,u_int32_t plen)1334 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1335 {
1336 struct mbuf *mopt;
1337 u_char *optbuf;
1338 u_int32_t v;
1339
1340 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1341
1342 /*
1343 * If there is no hop-by-hop options header, allocate new one.
1344 * If there is one but it doesn't have enough space to store the
1345 * jumbo payload option, allocate a cluster to store the whole options.
1346 * Otherwise, use it to store the options.
1347 */
1348 if (exthdrs->ip6e_hbh == NULL) {
1349 mopt = m_get(M_NOWAIT, MT_DATA);
1350 if (mopt == NULL)
1351 return (ENOBUFS);
1352 mopt->m_len = JUMBOOPTLEN;
1353 optbuf = mtod(mopt, u_char *);
1354 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1355 exthdrs->ip6e_hbh = mopt;
1356 } else {
1357 struct ip6_hbh *hbh;
1358
1359 mopt = exthdrs->ip6e_hbh;
1360 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1361 /*
1362 * XXX assumption:
1363 * - exthdrs->ip6e_hbh is not referenced from places
1364 * other than exthdrs.
1365 * - exthdrs->ip6e_hbh is not an mbuf chain.
1366 */
1367 int oldoptlen = mopt->m_len;
1368 struct mbuf *n;
1369
1370 /*
1371 * XXX: give up if the whole (new) hbh header does
1372 * not fit even in an mbuf cluster.
1373 */
1374 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1375 return (ENOBUFS);
1376
1377 /*
1378 * As a consequence, we must always prepare a cluster
1379 * at this point.
1380 */
1381 n = m_getcl(M_NOWAIT, MT_DATA, 0);
1382 if (n == NULL)
1383 return (ENOBUFS);
1384 n->m_len = oldoptlen + JUMBOOPTLEN;
1385 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1386 oldoptlen);
1387 optbuf = mtod(n, caddr_t) + oldoptlen;
1388 m_freem(mopt);
1389 mopt = exthdrs->ip6e_hbh = n;
1390 } else {
1391 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1392 mopt->m_len += JUMBOOPTLEN;
1393 }
1394 optbuf[0] = IP6OPT_PADN;
1395 optbuf[1] = 1;
1396
1397 /*
1398 * Adjust the header length according to the pad and
1399 * the jumbo payload option.
1400 */
1401 hbh = mtod(mopt, struct ip6_hbh *);
1402 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1403 }
1404
1405 /* fill in the option. */
1406 optbuf[2] = IP6OPT_JUMBO;
1407 optbuf[3] = 4;
1408 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1409 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1410
1411 /* finally, adjust the packet header length */
1412 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1413
1414 return (0);
1415 #undef JUMBOOPTLEN
1416 }
1417
1418 /*
1419 * Insert fragment header and copy unfragmentable header portions.
1420 */
1421 static int
ip6_insertfraghdr(struct mbuf * m0,struct mbuf * m,int hlen,struct ip6_frag ** frghdrp)1422 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1423 struct ip6_frag **frghdrp)
1424 {
1425 struct mbuf *n, *mlast;
1426
1427 if (hlen > sizeof(struct ip6_hdr)) {
1428 n = m_copym(m0, sizeof(struct ip6_hdr),
1429 hlen - sizeof(struct ip6_hdr), M_NOWAIT);
1430 if (n == NULL)
1431 return (ENOBUFS);
1432 m->m_next = n;
1433 } else
1434 n = m;
1435
1436 /* Search for the last mbuf of unfragmentable part. */
1437 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1438 ;
1439
1440 if (M_WRITABLE(mlast) &&
1441 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1442 /* use the trailing space of the last mbuf for the fragment hdr */
1443 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1444 mlast->m_len);
1445 mlast->m_len += sizeof(struct ip6_frag);
1446 m->m_pkthdr.len += sizeof(struct ip6_frag);
1447 } else {
1448 /* allocate a new mbuf for the fragment header */
1449 struct mbuf *mfrg;
1450
1451 mfrg = m_get(M_NOWAIT, MT_DATA);
1452 if (mfrg == NULL)
1453 return (ENOBUFS);
1454 mfrg->m_len = sizeof(struct ip6_frag);
1455 *frghdrp = mtod(mfrg, struct ip6_frag *);
1456 mlast->m_next = mfrg;
1457 }
1458
1459 return (0);
1460 }
1461
1462 /*
1463 * Calculates IPv6 path mtu for destination @dst.
1464 * Resulting MTU is stored in @mtup.
1465 *
1466 * Returns 0 on success.
1467 */
1468 static int
ip6_getpmtu_ctl(u_int fibnum,const struct in6_addr * dst,u_long * mtup)1469 ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
1470 {
1471 struct epoch_tracker et;
1472 struct nhop_object *nh;
1473 struct in6_addr kdst;
1474 uint32_t scopeid;
1475 int error;
1476
1477 in6_splitscope(dst, &kdst, &scopeid);
1478
1479 NET_EPOCH_ENTER(et);
1480 nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0);
1481 if (nh != NULL)
1482 error = ip6_calcmtu(nh->nh_ifp, dst, nh->nh_mtu, mtup, NULL, 0);
1483 else
1484 error = EHOSTUNREACH;
1485 NET_EPOCH_EXIT(et);
1486
1487 return (error);
1488 }
1489
1490 /*
1491 * Calculates IPv6 path MTU for @dst based on transmit @ifp,
1492 * and cached data in @ro_pmtu.
1493 * MTU from (successful) route lookup is saved (along with dst)
1494 * inside @ro_pmtu to avoid subsequent route lookups after packet
1495 * filter processing.
1496 *
1497 * Stores mtu and always-frag value into @mtup and @alwaysfragp.
1498 * Returns 0 on success.
1499 */
1500 static int
ip6_getpmtu(struct route_in6 * ro_pmtu,int do_lookup,struct ifnet * ifp,const struct in6_addr * dst,u_long * mtup,int * alwaysfragp,u_int fibnum,u_int proto)1501 ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
1502 struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
1503 int *alwaysfragp, u_int fibnum, u_int proto)
1504 {
1505 struct nhop_object *nh;
1506 struct in6_addr kdst;
1507 uint32_t scopeid;
1508 struct sockaddr_in6 *sa6_dst, sin6;
1509 u_long mtu;
1510
1511 NET_EPOCH_ASSERT();
1512
1513 mtu = 0;
1514 if (ro_pmtu == NULL || do_lookup) {
1515 /*
1516 * Here ro_pmtu has final destination address, while
1517 * ro might represent immediate destination.
1518 * Use ro_pmtu destination since mtu might differ.
1519 */
1520 if (ro_pmtu != NULL) {
1521 sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1522 if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
1523 ro_pmtu->ro_mtu = 0;
1524 } else
1525 sa6_dst = &sin6;
1526
1527 if (ro_pmtu == NULL || ro_pmtu->ro_mtu == 0) {
1528 bzero(sa6_dst, sizeof(*sa6_dst));
1529 sa6_dst->sin6_family = AF_INET6;
1530 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1531 sa6_dst->sin6_addr = *dst;
1532
1533 in6_splitscope(dst, &kdst, &scopeid);
1534 nh = fib6_lookup(fibnum, &kdst, scopeid, NHR_NONE, 0);
1535 if (nh != NULL) {
1536 mtu = nh->nh_mtu;
1537 if (ro_pmtu != NULL)
1538 ro_pmtu->ro_mtu = mtu;
1539 }
1540 } else
1541 mtu = ro_pmtu->ro_mtu;
1542 }
1543
1544 if (ro_pmtu != NULL && ro_pmtu->ro_nh != NULL)
1545 mtu = ro_pmtu->ro_nh->nh_mtu;
1546
1547 return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
1548 }
1549
1550 /*
1551 * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
1552 * hostcache data for @dst.
1553 * Stores mtu and always-frag value into @mtup and @alwaysfragp.
1554 *
1555 * Returns 0 on success.
1556 */
1557 static int
ip6_calcmtu(struct ifnet * ifp,const struct in6_addr * dst,u_long rt_mtu,u_long * mtup,int * alwaysfragp,u_int proto)1558 ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
1559 u_long *mtup, int *alwaysfragp, u_int proto)
1560 {
1561 u_long mtu = 0;
1562 int alwaysfrag = 0;
1563 int error = 0;
1564
1565 if (rt_mtu > 0) {
1566 u_int32_t ifmtu;
1567 struct in_conninfo inc;
1568
1569 bzero(&inc, sizeof(inc));
1570 inc.inc_flags |= INC_ISIPV6;
1571 inc.inc6_faddr = *dst;
1572
1573 ifmtu = IN6_LINKMTU(ifp);
1574
1575 /* TCP is known to react to pmtu changes so skip hc */
1576 if (proto != IPPROTO_TCP)
1577 mtu = tcp_hc_getmtu(&inc);
1578
1579 if (mtu)
1580 mtu = min(mtu, rt_mtu);
1581 else
1582 mtu = rt_mtu;
1583 if (mtu == 0)
1584 mtu = ifmtu;
1585 else if (mtu < IPV6_MMTU) {
1586 /*
1587 * RFC2460 section 5, last paragraph:
1588 * if we record ICMPv6 too big message with
1589 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1590 * or smaller, with framgent header attached.
1591 * (fragment header is needed regardless from the
1592 * packet size, for translators to identify packets)
1593 */
1594 alwaysfrag = 1;
1595 mtu = IPV6_MMTU;
1596 }
1597 } else if (ifp) {
1598 mtu = IN6_LINKMTU(ifp);
1599 } else
1600 error = EHOSTUNREACH; /* XXX */
1601
1602 *mtup = mtu;
1603 if (alwaysfragp)
1604 *alwaysfragp = alwaysfrag;
1605 return (error);
1606 }
1607
1608 /*
1609 * IP6 socket option processing.
1610 */
1611 int
ip6_ctloutput(struct socket * so,struct sockopt * sopt)1612 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1613 {
1614 int optdatalen, uproto;
1615 void *optdata;
1616 struct inpcb *inp = sotoinpcb(so);
1617 int error, optval;
1618 int level, op, optname;
1619 int optlen;
1620 struct thread *td;
1621 #ifdef RSS
1622 uint32_t rss_bucket;
1623 int retval;
1624 #endif
1625
1626 /*
1627 * Don't use more than a quarter of mbuf clusters. N.B.:
1628 * nmbclusters is an int, but nmbclusters * MCLBYTES may overflow
1629 * on LP64 architectures, so cast to u_long to avoid undefined
1630 * behavior. ILP32 architectures cannot have nmbclusters
1631 * large enough to overflow for other reasons.
1632 */
1633 #define IPV6_PKTOPTIONS_MBUF_LIMIT ((u_long)nmbclusters * MCLBYTES / 4)
1634
1635 level = sopt->sopt_level;
1636 op = sopt->sopt_dir;
1637 optname = sopt->sopt_name;
1638 optlen = sopt->sopt_valsize;
1639 td = sopt->sopt_td;
1640 error = 0;
1641 optval = 0;
1642 uproto = (int)so->so_proto->pr_protocol;
1643
1644 if (level != IPPROTO_IPV6) {
1645 error = EINVAL;
1646
1647 if (sopt->sopt_level == SOL_SOCKET &&
1648 sopt->sopt_dir == SOPT_SET) {
1649 switch (sopt->sopt_name) {
1650 case SO_SETFIB:
1651 INP_WLOCK(inp);
1652 inp->inp_inc.inc_fibnum = so->so_fibnum;
1653 INP_WUNLOCK(inp);
1654 error = 0;
1655 break;
1656 case SO_MAX_PACING_RATE:
1657 #ifdef RATELIMIT
1658 INP_WLOCK(inp);
1659 inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
1660 INP_WUNLOCK(inp);
1661 error = 0;
1662 #else
1663 error = EOPNOTSUPP;
1664 #endif
1665 break;
1666 default:
1667 break;
1668 }
1669 }
1670 } else { /* level == IPPROTO_IPV6 */
1671 switch (op) {
1672 case SOPT_SET:
1673 switch (optname) {
1674 case IPV6_2292PKTOPTIONS:
1675 #ifdef IPV6_PKTOPTIONS
1676 case IPV6_PKTOPTIONS:
1677 #endif
1678 {
1679 struct mbuf *m;
1680
1681 if (optlen > IPV6_PKTOPTIONS_MBUF_LIMIT) {
1682 printf("ip6_ctloutput: mbuf limit hit\n");
1683 error = ENOBUFS;
1684 break;
1685 }
1686
1687 error = soopt_getm(sopt, &m); /* XXX */
1688 if (error != 0)
1689 break;
1690 error = soopt_mcopyin(sopt, m); /* XXX */
1691 if (error != 0)
1692 break;
1693 INP_WLOCK(inp);
1694 error = ip6_pcbopts(&inp->in6p_outputopts, m,
1695 so, sopt);
1696 INP_WUNLOCK(inp);
1697 m_freem(m); /* XXX */
1698 break;
1699 }
1700
1701 /*
1702 * Use of some Hop-by-Hop options or some
1703 * Destination options, might require special
1704 * privilege. That is, normal applications
1705 * (without special privilege) might be forbidden
1706 * from setting certain options in outgoing packets,
1707 * and might never see certain options in received
1708 * packets. [RFC 2292 Section 6]
1709 * KAME specific note:
1710 * KAME prevents non-privileged users from sending or
1711 * receiving ANY hbh/dst options in order to avoid
1712 * overhead of parsing options in the kernel.
1713 */
1714 case IPV6_RECVHOPOPTS:
1715 case IPV6_RECVDSTOPTS:
1716 case IPV6_RECVRTHDRDSTOPTS:
1717 if (td != NULL) {
1718 error = priv_check(td,
1719 PRIV_NETINET_SETHDROPTS);
1720 if (error)
1721 break;
1722 }
1723 /* FALLTHROUGH */
1724 case IPV6_UNICAST_HOPS:
1725 case IPV6_HOPLIMIT:
1726
1727 case IPV6_RECVPKTINFO:
1728 case IPV6_RECVHOPLIMIT:
1729 case IPV6_RECVRTHDR:
1730 case IPV6_RECVPATHMTU:
1731 case IPV6_RECVTCLASS:
1732 case IPV6_RECVFLOWID:
1733 #ifdef RSS
1734 case IPV6_RECVRSSBUCKETID:
1735 #endif
1736 case IPV6_V6ONLY:
1737 case IPV6_AUTOFLOWLABEL:
1738 case IPV6_ORIGDSTADDR:
1739 case IPV6_BINDANY:
1740 case IPV6_VLAN_PCP:
1741 if (optname == IPV6_BINDANY && td != NULL) {
1742 error = priv_check(td,
1743 PRIV_NETINET_BINDANY);
1744 if (error)
1745 break;
1746 }
1747
1748 if (optlen != sizeof(int)) {
1749 error = EINVAL;
1750 break;
1751 }
1752 error = sooptcopyin(sopt, &optval,
1753 sizeof optval, sizeof optval);
1754 if (error)
1755 break;
1756 switch (optname) {
1757 case IPV6_UNICAST_HOPS:
1758 if (optval < -1 || optval >= 256)
1759 error = EINVAL;
1760 else {
1761 /* -1 = kernel default */
1762 inp->in6p_hops = optval;
1763 if ((inp->inp_vflag &
1764 INP_IPV4) != 0)
1765 inp->inp_ip_ttl = optval;
1766 }
1767 break;
1768 #define OPTSET(bit) \
1769 do { \
1770 INP_WLOCK(inp); \
1771 if (optval) \
1772 inp->inp_flags |= (bit); \
1773 else \
1774 inp->inp_flags &= ~(bit); \
1775 INP_WUNLOCK(inp); \
1776 } while (/*CONSTCOND*/ 0)
1777 #define OPTSET2292(bit) \
1778 do { \
1779 INP_WLOCK(inp); \
1780 inp->inp_flags |= IN6P_RFC2292; \
1781 if (optval) \
1782 inp->inp_flags |= (bit); \
1783 else \
1784 inp->inp_flags &= ~(bit); \
1785 INP_WUNLOCK(inp); \
1786 } while (/*CONSTCOND*/ 0)
1787 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
1788
1789 #define OPTSET2_N(bit, val) do { \
1790 if (val) \
1791 inp->inp_flags2 |= bit; \
1792 else \
1793 inp->inp_flags2 &= ~bit; \
1794 } while (0)
1795 #define OPTSET2(bit, val) do { \
1796 INP_WLOCK(inp); \
1797 OPTSET2_N(bit, val); \
1798 INP_WUNLOCK(inp); \
1799 } while (0)
1800 #define OPTBIT2(bit) (inp->inp_flags2 & (bit) ? 1 : 0)
1801 #define OPTSET2292_EXCLUSIVE(bit) \
1802 do { \
1803 INP_WLOCK(inp); \
1804 if (OPTBIT(IN6P_RFC2292)) { \
1805 error = EINVAL; \
1806 } else { \
1807 if (optval) \
1808 inp->inp_flags |= (bit); \
1809 else \
1810 inp->inp_flags &= ~(bit); \
1811 } \
1812 INP_WUNLOCK(inp); \
1813 } while (/*CONSTCOND*/ 0)
1814
1815 case IPV6_RECVPKTINFO:
1816 OPTSET2292_EXCLUSIVE(IN6P_PKTINFO);
1817 break;
1818
1819 case IPV6_HOPLIMIT:
1820 {
1821 struct ip6_pktopts **optp;
1822
1823 /* cannot mix with RFC2292 */
1824 if (OPTBIT(IN6P_RFC2292)) {
1825 error = EINVAL;
1826 break;
1827 }
1828 INP_WLOCK(inp);
1829 if (inp->inp_flags & INP_DROPPED) {
1830 INP_WUNLOCK(inp);
1831 return (ECONNRESET);
1832 }
1833 optp = &inp->in6p_outputopts;
1834 error = ip6_pcbopt(IPV6_HOPLIMIT,
1835 (u_char *)&optval, sizeof(optval),
1836 optp, (td != NULL) ? td->td_ucred :
1837 NULL, uproto);
1838 INP_WUNLOCK(inp);
1839 break;
1840 }
1841
1842 case IPV6_RECVHOPLIMIT:
1843 OPTSET2292_EXCLUSIVE(IN6P_HOPLIMIT);
1844 break;
1845
1846 case IPV6_RECVHOPOPTS:
1847 OPTSET2292_EXCLUSIVE(IN6P_HOPOPTS);
1848 break;
1849
1850 case IPV6_RECVDSTOPTS:
1851 OPTSET2292_EXCLUSIVE(IN6P_DSTOPTS);
1852 break;
1853
1854 case IPV6_RECVRTHDRDSTOPTS:
1855 OPTSET2292_EXCLUSIVE(IN6P_RTHDRDSTOPTS);
1856 break;
1857
1858 case IPV6_RECVRTHDR:
1859 OPTSET2292_EXCLUSIVE(IN6P_RTHDR);
1860 break;
1861
1862 case IPV6_RECVPATHMTU:
1863 /*
1864 * We ignore this option for TCP
1865 * sockets.
1866 * (RFC3542 leaves this case
1867 * unspecified.)
1868 */
1869 if (uproto != IPPROTO_TCP)
1870 OPTSET(IN6P_MTU);
1871 break;
1872
1873 case IPV6_RECVFLOWID:
1874 OPTSET2(INP_RECVFLOWID, optval);
1875 break;
1876
1877 #ifdef RSS
1878 case IPV6_RECVRSSBUCKETID:
1879 OPTSET2(INP_RECVRSSBUCKETID, optval);
1880 break;
1881 #endif
1882
1883 case IPV6_V6ONLY:
1884 INP_WLOCK(inp);
1885 if (inp->inp_lport ||
1886 !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1887 /*
1888 * The socket is already bound.
1889 */
1890 INP_WUNLOCK(inp);
1891 error = EINVAL;
1892 break;
1893 }
1894 if (optval) {
1895 inp->inp_flags |= IN6P_IPV6_V6ONLY;
1896 inp->inp_vflag &= ~INP_IPV4;
1897 } else {
1898 inp->inp_flags &= ~IN6P_IPV6_V6ONLY;
1899 inp->inp_vflag |= INP_IPV4;
1900 }
1901 INP_WUNLOCK(inp);
1902 break;
1903 case IPV6_RECVTCLASS:
1904 /* cannot mix with RFC2292 XXX */
1905 OPTSET2292_EXCLUSIVE(IN6P_TCLASS);
1906 break;
1907 case IPV6_AUTOFLOWLABEL:
1908 OPTSET(IN6P_AUTOFLOWLABEL);
1909 break;
1910
1911 case IPV6_ORIGDSTADDR:
1912 OPTSET2(INP_ORIGDSTADDR, optval);
1913 break;
1914 case IPV6_BINDANY:
1915 OPTSET(INP_BINDANY);
1916 break;
1917 case IPV6_VLAN_PCP:
1918 if ((optval >= -1) && (optval <=
1919 (INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
1920 if (optval == -1) {
1921 INP_WLOCK(inp);
1922 inp->inp_flags2 &=
1923 ~(INP_2PCP_SET |
1924 INP_2PCP_MASK);
1925 INP_WUNLOCK(inp);
1926 } else {
1927 INP_WLOCK(inp);
1928 inp->inp_flags2 |=
1929 INP_2PCP_SET;
1930 inp->inp_flags2 &=
1931 ~INP_2PCP_MASK;
1932 inp->inp_flags2 |=
1933 optval <<
1934 INP_2PCP_SHIFT;
1935 INP_WUNLOCK(inp);
1936 }
1937 } else
1938 error = EINVAL;
1939 break;
1940 }
1941 break;
1942
1943 case IPV6_TCLASS:
1944 case IPV6_DONTFRAG:
1945 case IPV6_USE_MIN_MTU:
1946 case IPV6_PREFER_TEMPADDR:
1947 if (optlen != sizeof(optval)) {
1948 error = EINVAL;
1949 break;
1950 }
1951 error = sooptcopyin(sopt, &optval,
1952 sizeof optval, sizeof optval);
1953 if (error)
1954 break;
1955 {
1956 struct ip6_pktopts **optp;
1957 INP_WLOCK(inp);
1958 if (inp->inp_flags & INP_DROPPED) {
1959 INP_WUNLOCK(inp);
1960 return (ECONNRESET);
1961 }
1962 optp = &inp->in6p_outputopts;
1963 error = ip6_pcbopt(optname,
1964 (u_char *)&optval, sizeof(optval),
1965 optp, (td != NULL) ? td->td_ucred :
1966 NULL, uproto);
1967 INP_WUNLOCK(inp);
1968 break;
1969 }
1970
1971 case IPV6_2292PKTINFO:
1972 case IPV6_2292HOPLIMIT:
1973 case IPV6_2292HOPOPTS:
1974 case IPV6_2292DSTOPTS:
1975 case IPV6_2292RTHDR:
1976 /* RFC 2292 */
1977 if (optlen != sizeof(int)) {
1978 error = EINVAL;
1979 break;
1980 }
1981 error = sooptcopyin(sopt, &optval,
1982 sizeof optval, sizeof optval);
1983 if (error)
1984 break;
1985 switch (optname) {
1986 case IPV6_2292PKTINFO:
1987 OPTSET2292(IN6P_PKTINFO);
1988 break;
1989 case IPV6_2292HOPLIMIT:
1990 OPTSET2292(IN6P_HOPLIMIT);
1991 break;
1992 case IPV6_2292HOPOPTS:
1993 /*
1994 * Check super-user privilege.
1995 * See comments for IPV6_RECVHOPOPTS.
1996 */
1997 if (td != NULL) {
1998 error = priv_check(td,
1999 PRIV_NETINET_SETHDROPTS);
2000 if (error)
2001 return (error);
2002 }
2003 OPTSET2292(IN6P_HOPOPTS);
2004 break;
2005 case IPV6_2292DSTOPTS:
2006 if (td != NULL) {
2007 error = priv_check(td,
2008 PRIV_NETINET_SETHDROPTS);
2009 if (error)
2010 return (error);
2011 }
2012 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
2013 break;
2014 case IPV6_2292RTHDR:
2015 OPTSET2292(IN6P_RTHDR);
2016 break;
2017 }
2018 break;
2019 case IPV6_PKTINFO:
2020 case IPV6_HOPOPTS:
2021 case IPV6_RTHDR:
2022 case IPV6_DSTOPTS:
2023 case IPV6_RTHDRDSTOPTS:
2024 case IPV6_NEXTHOP:
2025 {
2026 /* new advanced API (RFC3542) */
2027 u_char *optbuf;
2028 u_char optbuf_storage[MCLBYTES];
2029 int optlen;
2030 struct ip6_pktopts **optp;
2031
2032 /* cannot mix with RFC2292 */
2033 if (OPTBIT(IN6P_RFC2292)) {
2034 error = EINVAL;
2035 break;
2036 }
2037
2038 /*
2039 * We only ensure valsize is not too large
2040 * here. Further validation will be done
2041 * later.
2042 */
2043 error = sooptcopyin(sopt, optbuf_storage,
2044 sizeof(optbuf_storage), 0);
2045 if (error)
2046 break;
2047 optlen = sopt->sopt_valsize;
2048 optbuf = optbuf_storage;
2049 INP_WLOCK(inp);
2050 if (inp->inp_flags & INP_DROPPED) {
2051 INP_WUNLOCK(inp);
2052 return (ECONNRESET);
2053 }
2054 optp = &inp->in6p_outputopts;
2055 error = ip6_pcbopt(optname, optbuf, optlen,
2056 optp, (td != NULL) ? td->td_ucred : NULL,
2057 uproto);
2058 INP_WUNLOCK(inp);
2059 break;
2060 }
2061 #undef OPTSET
2062
2063 case IPV6_MULTICAST_IF:
2064 case IPV6_MULTICAST_HOPS:
2065 case IPV6_MULTICAST_LOOP:
2066 case IPV6_JOIN_GROUP:
2067 case IPV6_LEAVE_GROUP:
2068 case IPV6_MSFILTER:
2069 case MCAST_BLOCK_SOURCE:
2070 case MCAST_UNBLOCK_SOURCE:
2071 case MCAST_JOIN_GROUP:
2072 case MCAST_LEAVE_GROUP:
2073 case MCAST_JOIN_SOURCE_GROUP:
2074 case MCAST_LEAVE_SOURCE_GROUP:
2075 error = ip6_setmoptions(inp, sopt);
2076 break;
2077
2078 case IPV6_PORTRANGE:
2079 error = sooptcopyin(sopt, &optval,
2080 sizeof optval, sizeof optval);
2081 if (error)
2082 break;
2083
2084 INP_WLOCK(inp);
2085 switch (optval) {
2086 case IPV6_PORTRANGE_DEFAULT:
2087 inp->inp_flags &= ~(INP_LOWPORT);
2088 inp->inp_flags &= ~(INP_HIGHPORT);
2089 break;
2090
2091 case IPV6_PORTRANGE_HIGH:
2092 inp->inp_flags &= ~(INP_LOWPORT);
2093 inp->inp_flags |= INP_HIGHPORT;
2094 break;
2095
2096 case IPV6_PORTRANGE_LOW:
2097 inp->inp_flags &= ~(INP_HIGHPORT);
2098 inp->inp_flags |= INP_LOWPORT;
2099 break;
2100
2101 default:
2102 error = EINVAL;
2103 break;
2104 }
2105 INP_WUNLOCK(inp);
2106 break;
2107
2108 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
2109 case IPV6_IPSEC_POLICY:
2110 if (IPSEC_ENABLED(ipv6)) {
2111 error = IPSEC_PCBCTL(ipv6, inp, sopt);
2112 break;
2113 }
2114 /* FALLTHROUGH */
2115 #endif /* IPSEC */
2116
2117 default:
2118 error = ENOPROTOOPT;
2119 break;
2120 }
2121 break;
2122
2123 case SOPT_GET:
2124 switch (optname) {
2125 case IPV6_2292PKTOPTIONS:
2126 #ifdef IPV6_PKTOPTIONS
2127 case IPV6_PKTOPTIONS:
2128 #endif
2129 /*
2130 * RFC3542 (effectively) deprecated the
2131 * semantics of the 2292-style pktoptions.
2132 * Since it was not reliable in nature (i.e.,
2133 * applications had to expect the lack of some
2134 * information after all), it would make sense
2135 * to simplify this part by always returning
2136 * empty data.
2137 */
2138 sopt->sopt_valsize = 0;
2139 break;
2140
2141 case IPV6_RECVHOPOPTS:
2142 case IPV6_RECVDSTOPTS:
2143 case IPV6_RECVRTHDRDSTOPTS:
2144 case IPV6_UNICAST_HOPS:
2145 case IPV6_RECVPKTINFO:
2146 case IPV6_RECVHOPLIMIT:
2147 case IPV6_RECVRTHDR:
2148 case IPV6_RECVPATHMTU:
2149
2150 case IPV6_V6ONLY:
2151 case IPV6_PORTRANGE:
2152 case IPV6_RECVTCLASS:
2153 case IPV6_AUTOFLOWLABEL:
2154 case IPV6_BINDANY:
2155 case IPV6_FLOWID:
2156 case IPV6_FLOWTYPE:
2157 case IPV6_RECVFLOWID:
2158 #ifdef RSS
2159 case IPV6_RSSBUCKETID:
2160 case IPV6_RECVRSSBUCKETID:
2161 #endif
2162 case IPV6_VLAN_PCP:
2163 switch (optname) {
2164 case IPV6_RECVHOPOPTS:
2165 optval = OPTBIT(IN6P_HOPOPTS);
2166 break;
2167
2168 case IPV6_RECVDSTOPTS:
2169 optval = OPTBIT(IN6P_DSTOPTS);
2170 break;
2171
2172 case IPV6_RECVRTHDRDSTOPTS:
2173 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2174 break;
2175
2176 case IPV6_UNICAST_HOPS:
2177 optval = inp->in6p_hops;
2178 break;
2179
2180 case IPV6_RECVPKTINFO:
2181 optval = OPTBIT(IN6P_PKTINFO);
2182 break;
2183
2184 case IPV6_RECVHOPLIMIT:
2185 optval = OPTBIT(IN6P_HOPLIMIT);
2186 break;
2187
2188 case IPV6_RECVRTHDR:
2189 optval = OPTBIT(IN6P_RTHDR);
2190 break;
2191
2192 case IPV6_RECVPATHMTU:
2193 optval = OPTBIT(IN6P_MTU);
2194 break;
2195
2196 case IPV6_V6ONLY:
2197 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2198 break;
2199
2200 case IPV6_PORTRANGE:
2201 {
2202 int flags;
2203 flags = inp->inp_flags;
2204 if (flags & INP_HIGHPORT)
2205 optval = IPV6_PORTRANGE_HIGH;
2206 else if (flags & INP_LOWPORT)
2207 optval = IPV6_PORTRANGE_LOW;
2208 else
2209 optval = 0;
2210 break;
2211 }
2212 case IPV6_RECVTCLASS:
2213 optval = OPTBIT(IN6P_TCLASS);
2214 break;
2215
2216 case IPV6_AUTOFLOWLABEL:
2217 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2218 break;
2219
2220 case IPV6_ORIGDSTADDR:
2221 optval = OPTBIT2(INP_ORIGDSTADDR);
2222 break;
2223
2224 case IPV6_BINDANY:
2225 optval = OPTBIT(INP_BINDANY);
2226 break;
2227
2228 case IPV6_FLOWID:
2229 optval = inp->inp_flowid;
2230 break;
2231
2232 case IPV6_FLOWTYPE:
2233 optval = inp->inp_flowtype;
2234 break;
2235
2236 case IPV6_RECVFLOWID:
2237 optval = OPTBIT2(INP_RECVFLOWID);
2238 break;
2239 #ifdef RSS
2240 case IPV6_RSSBUCKETID:
2241 retval =
2242 rss_hash2bucket(inp->inp_flowid,
2243 inp->inp_flowtype,
2244 &rss_bucket);
2245 if (retval == 0)
2246 optval = rss_bucket;
2247 else
2248 error = EINVAL;
2249 break;
2250
2251 case IPV6_RECVRSSBUCKETID:
2252 optval = OPTBIT2(INP_RECVRSSBUCKETID);
2253 break;
2254 #endif
2255
2256
2257 case IPV6_VLAN_PCP:
2258 if (OPTBIT2(INP_2PCP_SET)) {
2259 optval = (inp->inp_flags2 &
2260 INP_2PCP_MASK) >>
2261 INP_2PCP_SHIFT;
2262 } else {
2263 optval = -1;
2264 }
2265 break;
2266 }
2267
2268 if (error)
2269 break;
2270 error = sooptcopyout(sopt, &optval,
2271 sizeof optval);
2272 break;
2273
2274 case IPV6_PATHMTU:
2275 {
2276 u_long pmtu = 0;
2277 struct ip6_mtuinfo mtuinfo;
2278 struct in6_addr addr;
2279
2280 if (!(so->so_state & SS_ISCONNECTED))
2281 return (ENOTCONN);
2282 /*
2283 * XXX: we dot not consider the case of source
2284 * routing, or optional information to specify
2285 * the outgoing interface.
2286 * Copy faddr out of inp to avoid holding lock
2287 * on inp during route lookup.
2288 */
2289 INP_RLOCK(inp);
2290 bcopy(&inp->in6p_faddr, &addr, sizeof(addr));
2291 INP_RUNLOCK(inp);
2292 error = ip6_getpmtu_ctl(so->so_fibnum,
2293 &addr, &pmtu);
2294 if (error)
2295 break;
2296 if (pmtu > IPV6_MAXPACKET)
2297 pmtu = IPV6_MAXPACKET;
2298
2299 bzero(&mtuinfo, sizeof(mtuinfo));
2300 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2301 optdata = (void *)&mtuinfo;
2302 optdatalen = sizeof(mtuinfo);
2303 error = sooptcopyout(sopt, optdata,
2304 optdatalen);
2305 break;
2306 }
2307
2308 case IPV6_2292PKTINFO:
2309 case IPV6_2292HOPLIMIT:
2310 case IPV6_2292HOPOPTS:
2311 case IPV6_2292RTHDR:
2312 case IPV6_2292DSTOPTS:
2313 switch (optname) {
2314 case IPV6_2292PKTINFO:
2315 optval = OPTBIT(IN6P_PKTINFO);
2316 break;
2317 case IPV6_2292HOPLIMIT:
2318 optval = OPTBIT(IN6P_HOPLIMIT);
2319 break;
2320 case IPV6_2292HOPOPTS:
2321 optval = OPTBIT(IN6P_HOPOPTS);
2322 break;
2323 case IPV6_2292RTHDR:
2324 optval = OPTBIT(IN6P_RTHDR);
2325 break;
2326 case IPV6_2292DSTOPTS:
2327 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2328 break;
2329 }
2330 error = sooptcopyout(sopt, &optval,
2331 sizeof optval);
2332 break;
2333 case IPV6_PKTINFO:
2334 case IPV6_HOPOPTS:
2335 case IPV6_RTHDR:
2336 case IPV6_DSTOPTS:
2337 case IPV6_RTHDRDSTOPTS:
2338 case IPV6_NEXTHOP:
2339 case IPV6_TCLASS:
2340 case IPV6_DONTFRAG:
2341 case IPV6_USE_MIN_MTU:
2342 case IPV6_PREFER_TEMPADDR:
2343 error = ip6_getpcbopt(inp, optname, sopt);
2344 break;
2345
2346 case IPV6_MULTICAST_IF:
2347 case IPV6_MULTICAST_HOPS:
2348 case IPV6_MULTICAST_LOOP:
2349 case IPV6_MSFILTER:
2350 error = ip6_getmoptions(inp, sopt);
2351 break;
2352
2353 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
2354 case IPV6_IPSEC_POLICY:
2355 if (IPSEC_ENABLED(ipv6)) {
2356 error = IPSEC_PCBCTL(ipv6, inp, sopt);
2357 break;
2358 }
2359 /* FALLTHROUGH */
2360 #endif /* IPSEC */
2361 default:
2362 error = ENOPROTOOPT;
2363 break;
2364 }
2365 break;
2366 }
2367 }
2368 return (error);
2369 }
2370
2371 int
ip6_raw_ctloutput(struct socket * so,struct sockopt * sopt)2372 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2373 {
2374 int error = 0, optval, optlen;
2375 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2376 struct inpcb *inp = sotoinpcb(so);
2377 int level, op, optname;
2378
2379 level = sopt->sopt_level;
2380 op = sopt->sopt_dir;
2381 optname = sopt->sopt_name;
2382 optlen = sopt->sopt_valsize;
2383
2384 if (level != IPPROTO_IPV6) {
2385 return (EINVAL);
2386 }
2387
2388 switch (optname) {
2389 case IPV6_CHECKSUM:
2390 /*
2391 * For ICMPv6 sockets, no modification allowed for checksum
2392 * offset, permit "no change" values to help existing apps.
2393 *
2394 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2395 * for an ICMPv6 socket will fail."
2396 * The current behavior does not meet RFC3542.
2397 */
2398 switch (op) {
2399 case SOPT_SET:
2400 if (optlen != sizeof(int)) {
2401 error = EINVAL;
2402 break;
2403 }
2404 error = sooptcopyin(sopt, &optval, sizeof(optval),
2405 sizeof(optval));
2406 if (error)
2407 break;
2408 if (optval < -1 || (optval % 2) != 0) {
2409 /*
2410 * The API assumes non-negative even offset
2411 * values or -1 as a special value.
2412 */
2413 error = EINVAL;
2414 } else if (inp->inp_ip_p == IPPROTO_ICMPV6) {
2415 if (optval != icmp6off)
2416 error = EINVAL;
2417 } else
2418 inp->in6p_cksum = optval;
2419 break;
2420
2421 case SOPT_GET:
2422 if (inp->inp_ip_p == IPPROTO_ICMPV6)
2423 optval = icmp6off;
2424 else
2425 optval = inp->in6p_cksum;
2426
2427 error = sooptcopyout(sopt, &optval, sizeof(optval));
2428 break;
2429
2430 default:
2431 error = EINVAL;
2432 break;
2433 }
2434 break;
2435
2436 default:
2437 error = ENOPROTOOPT;
2438 break;
2439 }
2440
2441 return (error);
2442 }
2443
2444 /*
2445 * Set up IP6 options in pcb for insertion in output packets or
2446 * specifying behavior of outgoing packets.
2447 */
2448 static int
ip6_pcbopts(struct ip6_pktopts ** pktopt,struct mbuf * m,struct socket * so,struct sockopt * sopt)2449 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2450 struct socket *so, struct sockopt *sopt)
2451 {
2452 struct ip6_pktopts *opt = *pktopt;
2453 int error = 0;
2454 struct thread *td = sopt->sopt_td;
2455 struct epoch_tracker et;
2456
2457 /* turn off any old options. */
2458 if (opt) {
2459 #ifdef DIAGNOSTIC
2460 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2461 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2462 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2463 printf("ip6_pcbopts: all specified options are cleared.\n");
2464 #endif
2465 ip6_clearpktopts(opt, -1);
2466 } else {
2467 opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
2468 if (opt == NULL)
2469 return (ENOMEM);
2470 }
2471 *pktopt = NULL;
2472
2473 if (!m || m->m_len == 0) {
2474 /*
2475 * Only turning off any previous options, regardless of
2476 * whether the opt is just created or given.
2477 */
2478 free(opt, M_IP6OPT);
2479 return (0);
2480 }
2481
2482 /* set options specified by user. */
2483 NET_EPOCH_ENTER(et);
2484 if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2485 td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2486 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2487 free(opt, M_IP6OPT);
2488 NET_EPOCH_EXIT(et);
2489 return (error);
2490 }
2491 NET_EPOCH_EXIT(et);
2492 *pktopt = opt;
2493 return (0);
2494 }
2495
2496 /*
2497 * initialize ip6_pktopts. beware that there are non-zero default values in
2498 * the struct.
2499 */
2500 void
ip6_initpktopts(struct ip6_pktopts * opt)2501 ip6_initpktopts(struct ip6_pktopts *opt)
2502 {
2503
2504 bzero(opt, sizeof(*opt));
2505 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2506 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2507 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2508 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2509 }
2510
2511 static int
ip6_pcbopt(int optname,u_char * buf,int len,struct ip6_pktopts ** pktopt,struct ucred * cred,int uproto)2512 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2513 struct ucred *cred, int uproto)
2514 {
2515 struct epoch_tracker et;
2516 struct ip6_pktopts *opt;
2517 int ret;
2518
2519 if (*pktopt == NULL) {
2520 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2521 M_NOWAIT);
2522 if (*pktopt == NULL)
2523 return (ENOBUFS);
2524 ip6_initpktopts(*pktopt);
2525 }
2526 opt = *pktopt;
2527
2528 NET_EPOCH_ENTER(et);
2529 ret = ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto);
2530 NET_EPOCH_EXIT(et);
2531
2532 return (ret);
2533 }
2534
2535 #define GET_PKTOPT_VAR(field, lenexpr) do { \
2536 if (pktopt && pktopt->field) { \
2537 INP_RUNLOCK(inp); \
2538 optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \
2539 malloc_optdata = true; \
2540 INP_RLOCK(inp); \
2541 if (inp->inp_flags & INP_DROPPED) { \
2542 INP_RUNLOCK(inp); \
2543 free(optdata, M_TEMP); \
2544 return (ECONNRESET); \
2545 } \
2546 pktopt = inp->in6p_outputopts; \
2547 if (pktopt && pktopt->field) { \
2548 optdatalen = min(lenexpr, sopt->sopt_valsize); \
2549 bcopy(pktopt->field, optdata, optdatalen); \
2550 } else { \
2551 free(optdata, M_TEMP); \
2552 optdata = NULL; \
2553 malloc_optdata = false; \
2554 } \
2555 } \
2556 } while(0)
2557
2558 #define GET_PKTOPT_EXT_HDR(field) GET_PKTOPT_VAR(field, \
2559 (((struct ip6_ext *)pktopt->field)->ip6e_len + 1) << 3)
2560
2561 #define GET_PKTOPT_SOCKADDR(field) GET_PKTOPT_VAR(field, \
2562 pktopt->field->sa_len)
2563
2564 static int
ip6_getpcbopt(struct inpcb * inp,int optname,struct sockopt * sopt)2565 ip6_getpcbopt(struct inpcb *inp, int optname, struct sockopt *sopt)
2566 {
2567 void *optdata = NULL;
2568 bool malloc_optdata = false;
2569 int optdatalen = 0;
2570 int error = 0;
2571 struct in6_pktinfo null_pktinfo;
2572 int deftclass = 0, on;
2573 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2574 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2575 struct ip6_pktopts *pktopt;
2576
2577 INP_RLOCK(inp);
2578 pktopt = inp->in6p_outputopts;
2579
2580 switch (optname) {
2581 case IPV6_PKTINFO:
2582 optdata = (void *)&null_pktinfo;
2583 if (pktopt && pktopt->ip6po_pktinfo) {
2584 bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
2585 sizeof(null_pktinfo));
2586 in6_clearscope(&null_pktinfo.ipi6_addr);
2587 } else {
2588 /* XXX: we don't have to do this every time... */
2589 bzero(&null_pktinfo, sizeof(null_pktinfo));
2590 }
2591 optdatalen = sizeof(struct in6_pktinfo);
2592 break;
2593 case IPV6_TCLASS:
2594 if (pktopt && pktopt->ip6po_tclass >= 0)
2595 deftclass = pktopt->ip6po_tclass;
2596 optdata = (void *)&deftclass;
2597 optdatalen = sizeof(int);
2598 break;
2599 case IPV6_HOPOPTS:
2600 GET_PKTOPT_EXT_HDR(ip6po_hbh);
2601 break;
2602 case IPV6_RTHDR:
2603 GET_PKTOPT_EXT_HDR(ip6po_rthdr);
2604 break;
2605 case IPV6_RTHDRDSTOPTS:
2606 GET_PKTOPT_EXT_HDR(ip6po_dest1);
2607 break;
2608 case IPV6_DSTOPTS:
2609 GET_PKTOPT_EXT_HDR(ip6po_dest2);
2610 break;
2611 case IPV6_NEXTHOP:
2612 GET_PKTOPT_SOCKADDR(ip6po_nexthop);
2613 break;
2614 case IPV6_USE_MIN_MTU:
2615 if (pktopt)
2616 defminmtu = pktopt->ip6po_minmtu;
2617 optdata = (void *)&defminmtu;
2618 optdatalen = sizeof(int);
2619 break;
2620 case IPV6_DONTFRAG:
2621 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2622 on = 1;
2623 else
2624 on = 0;
2625 optdata = (void *)&on;
2626 optdatalen = sizeof(on);
2627 break;
2628 case IPV6_PREFER_TEMPADDR:
2629 if (pktopt)
2630 defpreftemp = pktopt->ip6po_prefer_tempaddr;
2631 optdata = (void *)&defpreftemp;
2632 optdatalen = sizeof(int);
2633 break;
2634 default: /* should not happen */
2635 #ifdef DIAGNOSTIC
2636 panic("ip6_getpcbopt: unexpected option\n");
2637 #endif
2638 INP_RUNLOCK(inp);
2639 return (ENOPROTOOPT);
2640 }
2641 INP_RUNLOCK(inp);
2642
2643 error = sooptcopyout(sopt, optdata, optdatalen);
2644 if (malloc_optdata)
2645 free(optdata, M_TEMP);
2646
2647 return (error);
2648 }
2649
2650 void
ip6_clearpktopts(struct ip6_pktopts * pktopt,int optname)2651 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2652 {
2653 if (pktopt == NULL)
2654 return;
2655
2656 if (optname == -1 || optname == IPV6_PKTINFO) {
2657 if (pktopt->ip6po_pktinfo)
2658 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2659 pktopt->ip6po_pktinfo = NULL;
2660 }
2661 if (optname == -1 || optname == IPV6_HOPLIMIT) {
2662 pktopt->ip6po_hlim = -1;
2663 pktopt->ip6po_valid &= ~IP6PO_VALID_HLIM;
2664 }
2665 if (optname == -1 || optname == IPV6_TCLASS) {
2666 pktopt->ip6po_tclass = -1;
2667 pktopt->ip6po_valid &= ~IP6PO_VALID_TC;
2668 }
2669 if (optname == -1 || optname == IPV6_NEXTHOP) {
2670 if (pktopt->ip6po_nextroute.ro_nh) {
2671 NH_FREE(pktopt->ip6po_nextroute.ro_nh);
2672 pktopt->ip6po_nextroute.ro_nh = NULL;
2673 }
2674 if (pktopt->ip6po_nexthop)
2675 free(pktopt->ip6po_nexthop, M_IP6OPT);
2676 pktopt->ip6po_nexthop = NULL;
2677 pktopt->ip6po_valid &= ~IP6PO_VALID_NHINFO;
2678 }
2679 if (optname == -1 || optname == IPV6_HOPOPTS) {
2680 if (pktopt->ip6po_hbh)
2681 free(pktopt->ip6po_hbh, M_IP6OPT);
2682 pktopt->ip6po_hbh = NULL;
2683 pktopt->ip6po_valid &= ~IP6PO_VALID_HBH;
2684 }
2685 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2686 if (pktopt->ip6po_dest1)
2687 free(pktopt->ip6po_dest1, M_IP6OPT);
2688 pktopt->ip6po_dest1 = NULL;
2689 pktopt->ip6po_valid &= ~IP6PO_VALID_DEST1;
2690 }
2691 if (optname == -1 || optname == IPV6_RTHDR) {
2692 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2693 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2694 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2695 if (pktopt->ip6po_route.ro_nh) {
2696 NH_FREE(pktopt->ip6po_route.ro_nh);
2697 pktopt->ip6po_route.ro_nh = NULL;
2698 }
2699 pktopt->ip6po_valid &= ~IP6PO_VALID_RHINFO;
2700 }
2701 if (optname == -1 || optname == IPV6_DSTOPTS) {
2702 if (pktopt->ip6po_dest2)
2703 free(pktopt->ip6po_dest2, M_IP6OPT);
2704 pktopt->ip6po_dest2 = NULL;
2705 pktopt->ip6po_valid &= ~IP6PO_VALID_DEST2;
2706 }
2707 }
2708
2709 #define PKTOPT_EXTHDRCPY(type) \
2710 do {\
2711 if (src->type) {\
2712 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2713 dst->type = malloc(hlen, M_IP6OPT, canwait);\
2714 if (dst->type == NULL)\
2715 goto bad;\
2716 bcopy(src->type, dst->type, hlen);\
2717 }\
2718 } while (/*CONSTCOND*/ 0)
2719
2720 static int
copypktopts(struct ip6_pktopts * dst,struct ip6_pktopts * src,int canwait)2721 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2722 {
2723 if (dst == NULL || src == NULL) {
2724 printf("ip6_clearpktopts: invalid argument\n");
2725 return (EINVAL);
2726 }
2727
2728 dst->ip6po_hlim = src->ip6po_hlim;
2729 dst->ip6po_tclass = src->ip6po_tclass;
2730 dst->ip6po_flags = src->ip6po_flags;
2731 dst->ip6po_minmtu = src->ip6po_minmtu;
2732 dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2733 if (src->ip6po_pktinfo) {
2734 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2735 M_IP6OPT, canwait);
2736 if (dst->ip6po_pktinfo == NULL)
2737 goto bad;
2738 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2739 }
2740 if (src->ip6po_nexthop) {
2741 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2742 M_IP6OPT, canwait);
2743 if (dst->ip6po_nexthop == NULL)
2744 goto bad;
2745 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2746 src->ip6po_nexthop->sa_len);
2747 }
2748 PKTOPT_EXTHDRCPY(ip6po_hbh);
2749 PKTOPT_EXTHDRCPY(ip6po_dest1);
2750 PKTOPT_EXTHDRCPY(ip6po_dest2);
2751 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2752 dst->ip6po_valid = src->ip6po_valid;
2753 return (0);
2754
2755 bad:
2756 ip6_clearpktopts(dst, -1);
2757 return (ENOBUFS);
2758 }
2759 #undef PKTOPT_EXTHDRCPY
2760
2761 struct ip6_pktopts *
ip6_copypktopts(struct ip6_pktopts * src,int canwait)2762 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2763 {
2764 int error;
2765 struct ip6_pktopts *dst;
2766
2767 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2768 if (dst == NULL)
2769 return (NULL);
2770 ip6_initpktopts(dst);
2771
2772 if ((error = copypktopts(dst, src, canwait)) != 0) {
2773 free(dst, M_IP6OPT);
2774 return (NULL);
2775 }
2776
2777 return (dst);
2778 }
2779
2780 void
ip6_freepcbopts(struct ip6_pktopts * pktopt)2781 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2782 {
2783 if (pktopt == NULL)
2784 return;
2785
2786 ip6_clearpktopts(pktopt, -1);
2787
2788 free(pktopt, M_IP6OPT);
2789 }
2790
2791 /*
2792 * Set IPv6 outgoing packet options based on advanced API.
2793 */
2794 int
ip6_setpktopts(struct mbuf * control,struct ip6_pktopts * opt,struct ip6_pktopts * stickyopt,struct ucred * cred,int uproto)2795 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2796 struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2797 {
2798 struct cmsghdr *cm = NULL;
2799
2800 if (control == NULL || opt == NULL)
2801 return (EINVAL);
2802
2803 /*
2804 * ip6_setpktopt can call ifnet_byindex(), so it's imperative that we
2805 * are in the network epoch here.
2806 */
2807 NET_EPOCH_ASSERT();
2808
2809 ip6_initpktopts(opt);
2810 if (stickyopt) {
2811 int error;
2812
2813 /*
2814 * If stickyopt is provided, make a local copy of the options
2815 * for this particular packet, then override them by ancillary
2816 * objects.
2817 * XXX: copypktopts() does not copy the cached route to a next
2818 * hop (if any). This is not very good in terms of efficiency,
2819 * but we can allow this since this option should be rarely
2820 * used.
2821 */
2822 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2823 return (error);
2824 }
2825
2826 /*
2827 * XXX: Currently, we assume all the optional information is stored
2828 * in a single mbuf.
2829 */
2830 if (control->m_next)
2831 return (EINVAL);
2832
2833 for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2834 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2835 int error;
2836
2837 if (control->m_len < CMSG_LEN(0))
2838 return (EINVAL);
2839
2840 cm = mtod(control, struct cmsghdr *);
2841 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2842 return (EINVAL);
2843 if (cm->cmsg_level != IPPROTO_IPV6)
2844 continue;
2845
2846 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2847 cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2848 if (error)
2849 return (error);
2850 }
2851
2852 return (0);
2853 }
2854
2855 /*
2856 * Set a particular packet option, as a sticky option or an ancillary data
2857 * item. "len" can be 0 only when it's a sticky option.
2858 * We have 4 cases of combination of "sticky" and "cmsg":
2859 * "sticky=0, cmsg=0": impossible
2860 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2861 * "sticky=1, cmsg=0": RFC3542 socket option
2862 * "sticky=1, cmsg=1": RFC2292 socket option
2863 */
2864 static int
ip6_setpktopt(int optname,u_char * buf,int len,struct ip6_pktopts * opt,struct ucred * cred,int sticky,int cmsg,int uproto)2865 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2866 struct ucred *cred, int sticky, int cmsg, int uproto)
2867 {
2868 int minmtupolicy, preftemp;
2869 int error;
2870
2871 NET_EPOCH_ASSERT();
2872
2873 if (!sticky && !cmsg) {
2874 #ifdef DIAGNOSTIC
2875 printf("ip6_setpktopt: impossible case\n");
2876 #endif
2877 return (EINVAL);
2878 }
2879
2880 /*
2881 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2882 * not be specified in the context of RFC3542. Conversely,
2883 * RFC3542 types should not be specified in the context of RFC2292.
2884 */
2885 if (!cmsg) {
2886 switch (optname) {
2887 case IPV6_2292PKTINFO:
2888 case IPV6_2292HOPLIMIT:
2889 case IPV6_2292NEXTHOP:
2890 case IPV6_2292HOPOPTS:
2891 case IPV6_2292DSTOPTS:
2892 case IPV6_2292RTHDR:
2893 case IPV6_2292PKTOPTIONS:
2894 return (ENOPROTOOPT);
2895 }
2896 }
2897 if (sticky && cmsg) {
2898 switch (optname) {
2899 case IPV6_PKTINFO:
2900 case IPV6_HOPLIMIT:
2901 case IPV6_NEXTHOP:
2902 case IPV6_HOPOPTS:
2903 case IPV6_DSTOPTS:
2904 case IPV6_RTHDRDSTOPTS:
2905 case IPV6_RTHDR:
2906 case IPV6_USE_MIN_MTU:
2907 case IPV6_DONTFRAG:
2908 case IPV6_TCLASS:
2909 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2910 return (ENOPROTOOPT);
2911 }
2912 }
2913
2914 switch (optname) {
2915 case IPV6_2292PKTINFO:
2916 case IPV6_PKTINFO:
2917 {
2918 struct ifnet *ifp = NULL;
2919 struct in6_pktinfo *pktinfo;
2920
2921 if (len != sizeof(struct in6_pktinfo))
2922 return (EINVAL);
2923
2924 pktinfo = (struct in6_pktinfo *)buf;
2925
2926 /*
2927 * An application can clear any sticky IPV6_PKTINFO option by
2928 * doing a "regular" setsockopt with ipi6_addr being
2929 * in6addr_any and ipi6_ifindex being zero.
2930 * [RFC 3542, Section 6]
2931 */
2932 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2933 pktinfo->ipi6_ifindex == 0 &&
2934 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2935 ip6_clearpktopts(opt, optname);
2936 break;
2937 }
2938
2939 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2940 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2941 return (EINVAL);
2942 }
2943 if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
2944 return (EINVAL);
2945 /* validate the interface index if specified. */
2946 if (pktinfo->ipi6_ifindex) {
2947 ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2948 if (ifp == NULL)
2949 return (ENXIO);
2950 }
2951 if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
2952 (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
2953 return (ENETDOWN);
2954
2955 if (ifp != NULL &&
2956 !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2957 struct in6_ifaddr *ia;
2958
2959 in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
2960 ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
2961 if (ia == NULL)
2962 return (EADDRNOTAVAIL);
2963 ifa_free(&ia->ia_ifa);
2964 }
2965 /*
2966 * We store the address anyway, and let in6_selectsrc()
2967 * validate the specified address. This is because ipi6_addr
2968 * may not have enough information about its scope zone, and
2969 * we may need additional information (such as outgoing
2970 * interface or the scope zone of a destination address) to
2971 * disambiguate the scope.
2972 * XXX: the delay of the validation may confuse the
2973 * application when it is used as a sticky option.
2974 */
2975 if (opt->ip6po_pktinfo == NULL) {
2976 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2977 M_IP6OPT, M_NOWAIT);
2978 if (opt->ip6po_pktinfo == NULL)
2979 return (ENOBUFS);
2980 }
2981 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2982 opt->ip6po_valid |= IP6PO_VALID_PKTINFO;
2983 break;
2984 }
2985
2986 case IPV6_2292HOPLIMIT:
2987 case IPV6_HOPLIMIT:
2988 {
2989 int *hlimp;
2990
2991 /*
2992 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2993 * to simplify the ordering among hoplimit options.
2994 */
2995 if (optname == IPV6_HOPLIMIT && sticky)
2996 return (ENOPROTOOPT);
2997
2998 if (len != sizeof(int))
2999 return (EINVAL);
3000 hlimp = (int *)buf;
3001 if (*hlimp < -1 || *hlimp > 255)
3002 return (EINVAL);
3003
3004 opt->ip6po_hlim = *hlimp;
3005 opt->ip6po_valid |= IP6PO_VALID_HLIM;
3006 break;
3007 }
3008
3009 case IPV6_TCLASS:
3010 {
3011 int tclass;
3012
3013 if (len != sizeof(int))
3014 return (EINVAL);
3015 tclass = *(int *)buf;
3016 if (tclass < -1 || tclass > 255)
3017 return (EINVAL);
3018
3019 opt->ip6po_tclass = tclass;
3020 opt->ip6po_valid |= IP6PO_VALID_TC;
3021 break;
3022 }
3023
3024 case IPV6_2292NEXTHOP:
3025 case IPV6_NEXTHOP:
3026 if (cred != NULL) {
3027 error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
3028 if (error)
3029 return (error);
3030 }
3031
3032 if (len == 0) { /* just remove the option */
3033 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3034 break;
3035 }
3036
3037 /* check if cmsg_len is large enough for sa_len */
3038 if (len < sizeof(struct sockaddr) || len < *buf)
3039 return (EINVAL);
3040
3041 switch (((struct sockaddr *)buf)->sa_family) {
3042 case AF_INET6:
3043 {
3044 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3045 int error;
3046
3047 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3048 return (EINVAL);
3049
3050 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3051 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3052 return (EINVAL);
3053 }
3054 if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
3055 != 0) {
3056 return (error);
3057 }
3058 break;
3059 }
3060 case AF_LINK: /* should eventually be supported */
3061 default:
3062 return (EAFNOSUPPORT);
3063 }
3064
3065 /* turn off the previous option, then set the new option. */
3066 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3067 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3068 if (opt->ip6po_nexthop == NULL)
3069 return (ENOBUFS);
3070 bcopy(buf, opt->ip6po_nexthop, *buf);
3071 opt->ip6po_valid |= IP6PO_VALID_NHINFO;
3072 break;
3073
3074 case IPV6_2292HOPOPTS:
3075 case IPV6_HOPOPTS:
3076 {
3077 struct ip6_hbh *hbh;
3078 int hbhlen;
3079
3080 /*
3081 * XXX: We don't allow a non-privileged user to set ANY HbH
3082 * options, since per-option restriction has too much
3083 * overhead.
3084 */
3085 if (cred != NULL) {
3086 error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
3087 if (error)
3088 return (error);
3089 }
3090
3091 if (len == 0) {
3092 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3093 break; /* just remove the option */
3094 }
3095
3096 /* message length validation */
3097 if (len < sizeof(struct ip6_hbh))
3098 return (EINVAL);
3099 hbh = (struct ip6_hbh *)buf;
3100 hbhlen = (hbh->ip6h_len + 1) << 3;
3101 if (len != hbhlen)
3102 return (EINVAL);
3103
3104 /* turn off the previous option, then set the new option. */
3105 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3106 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3107 if (opt->ip6po_hbh == NULL)
3108 return (ENOBUFS);
3109 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3110 opt->ip6po_valid |= IP6PO_VALID_HBH;
3111
3112 break;
3113 }
3114
3115 case IPV6_2292DSTOPTS:
3116 case IPV6_DSTOPTS:
3117 case IPV6_RTHDRDSTOPTS:
3118 {
3119 struct ip6_dest *dest, **newdest = NULL;
3120 int destlen;
3121
3122 if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
3123 error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS);
3124 if (error)
3125 return (error);
3126 }
3127
3128 if (len == 0) {
3129 ip6_clearpktopts(opt, optname);
3130 break; /* just remove the option */
3131 }
3132
3133 /* message length validation */
3134 if (len < sizeof(struct ip6_dest))
3135 return (EINVAL);
3136 dest = (struct ip6_dest *)buf;
3137 destlen = (dest->ip6d_len + 1) << 3;
3138 if (len != destlen)
3139 return (EINVAL);
3140
3141 /*
3142 * Determine the position that the destination options header
3143 * should be inserted; before or after the routing header.
3144 */
3145 switch (optname) {
3146 case IPV6_2292DSTOPTS:
3147 /*
3148 * The old advacned API is ambiguous on this point.
3149 * Our approach is to determine the position based
3150 * according to the existence of a routing header.
3151 * Note, however, that this depends on the order of the
3152 * extension headers in the ancillary data; the 1st
3153 * part of the destination options header must appear
3154 * before the routing header in the ancillary data,
3155 * too.
3156 * RFC3542 solved the ambiguity by introducing
3157 * separate ancillary data or option types.
3158 */
3159 if (opt->ip6po_rthdr == NULL)
3160 newdest = &opt->ip6po_dest1;
3161 else
3162 newdest = &opt->ip6po_dest2;
3163 break;
3164 case IPV6_RTHDRDSTOPTS:
3165 newdest = &opt->ip6po_dest1;
3166 break;
3167 case IPV6_DSTOPTS:
3168 newdest = &opt->ip6po_dest2;
3169 break;
3170 }
3171
3172 /* turn off the previous option, then set the new option. */
3173 ip6_clearpktopts(opt, optname);
3174 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3175 if (*newdest == NULL)
3176 return (ENOBUFS);
3177 bcopy(dest, *newdest, destlen);
3178 if (newdest == &opt->ip6po_dest1)
3179 opt->ip6po_valid |= IP6PO_VALID_DEST1;
3180 else
3181 opt->ip6po_valid |= IP6PO_VALID_DEST2;
3182
3183 break;
3184 }
3185
3186 case IPV6_2292RTHDR:
3187 case IPV6_RTHDR:
3188 {
3189 struct ip6_rthdr *rth;
3190 int rthlen;
3191
3192 if (len == 0) {
3193 ip6_clearpktopts(opt, IPV6_RTHDR);
3194 break; /* just remove the option */
3195 }
3196
3197 /* message length validation */
3198 if (len < sizeof(struct ip6_rthdr))
3199 return (EINVAL);
3200 rth = (struct ip6_rthdr *)buf;
3201 rthlen = (rth->ip6r_len + 1) << 3;
3202 if (len != rthlen)
3203 return (EINVAL);
3204
3205 switch (rth->ip6r_type) {
3206 case IPV6_RTHDR_TYPE_0:
3207 if (rth->ip6r_len == 0) /* must contain one addr */
3208 return (EINVAL);
3209 if (rth->ip6r_len % 2) /* length must be even */
3210 return (EINVAL);
3211 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3212 return (EINVAL);
3213 break;
3214 default:
3215 return (EINVAL); /* not supported */
3216 }
3217
3218 /* turn off the previous option */
3219 ip6_clearpktopts(opt, IPV6_RTHDR);
3220 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3221 if (opt->ip6po_rthdr == NULL)
3222 return (ENOBUFS);
3223 bcopy(rth, opt->ip6po_rthdr, rthlen);
3224 opt->ip6po_valid |= IP6PO_VALID_RHINFO;
3225
3226 break;
3227 }
3228
3229 case IPV6_USE_MIN_MTU:
3230 if (len != sizeof(int))
3231 return (EINVAL);
3232 minmtupolicy = *(int *)buf;
3233 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3234 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3235 minmtupolicy != IP6PO_MINMTU_ALL) {
3236 return (EINVAL);
3237 }
3238 opt->ip6po_minmtu = minmtupolicy;
3239 break;
3240
3241 case IPV6_DONTFRAG:
3242 if (len != sizeof(int))
3243 return (EINVAL);
3244
3245 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3246 /*
3247 * we ignore this option for TCP sockets.
3248 * (RFC3542 leaves this case unspecified.)
3249 */
3250 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3251 } else
3252 opt->ip6po_flags |= IP6PO_DONTFRAG;
3253 break;
3254
3255 case IPV6_PREFER_TEMPADDR:
3256 if (len != sizeof(int))
3257 return (EINVAL);
3258 preftemp = *(int *)buf;
3259 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3260 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3261 preftemp != IP6PO_TEMPADDR_PREFER) {
3262 return (EINVAL);
3263 }
3264 opt->ip6po_prefer_tempaddr = preftemp;
3265 break;
3266
3267 default:
3268 return (ENOPROTOOPT);
3269 } /* end of switch */
3270
3271 return (0);
3272 }
3273
3274 /*
3275 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3276 * packet to the input queue of a specified interface. Note that this
3277 * calls the output routine of the loopback "driver", but with an interface
3278 * pointer that might NOT be &loif -- easier than replicating that code here.
3279 */
3280 void
ip6_mloopback(struct ifnet * ifp,struct mbuf * m)3281 ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
3282 {
3283 struct mbuf *copym;
3284 struct ip6_hdr *ip6;
3285
3286 copym = m_copym(m, 0, M_COPYALL, M_NOWAIT);
3287 if (copym == NULL)
3288 return;
3289
3290 /*
3291 * Make sure to deep-copy IPv6 header portion in case the data
3292 * is in an mbuf cluster, so that we can safely override the IPv6
3293 * header portion later.
3294 */
3295 if (!M_WRITABLE(copym) ||
3296 copym->m_len < sizeof(struct ip6_hdr)) {
3297 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3298 if (copym == NULL)
3299 return;
3300 }
3301 ip6 = mtod(copym, struct ip6_hdr *);
3302 /*
3303 * clear embedded scope identifiers if necessary.
3304 * in6_clearscope will touch the addresses only when necessary.
3305 */
3306 in6_clearscope(&ip6->ip6_src);
3307 in6_clearscope(&ip6->ip6_dst);
3308 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
3309 copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
3310 CSUM_PSEUDO_HDR;
3311 copym->m_pkthdr.csum_data = 0xffff;
3312 }
3313 if_simloop(ifp, copym, AF_INET6, 0);
3314 }
3315
3316 /*
3317 * Chop IPv6 header off from the payload.
3318 */
3319 static int
ip6_splithdr(struct mbuf * m,struct ip6_exthdrs * exthdrs)3320 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3321 {
3322 struct mbuf *mh;
3323 struct ip6_hdr *ip6;
3324
3325 ip6 = mtod(m, struct ip6_hdr *);
3326 if (m->m_len > sizeof(*ip6)) {
3327 mh = m_gethdr(M_NOWAIT, MT_DATA);
3328 if (mh == NULL) {
3329 m_freem(m);
3330 return ENOBUFS;
3331 }
3332 m_move_pkthdr(mh, m);
3333 M_ALIGN(mh, sizeof(*ip6));
3334 m->m_len -= sizeof(*ip6);
3335 m->m_data += sizeof(*ip6);
3336 mh->m_next = m;
3337 m = mh;
3338 m->m_len = sizeof(*ip6);
3339 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3340 }
3341 exthdrs->ip6e_ip6 = m;
3342 return 0;
3343 }
3344
3345 /*
3346 * Compute IPv6 extension header length.
3347 */
3348 int
ip6_optlen(struct inpcb * inp)3349 ip6_optlen(struct inpcb *inp)
3350 {
3351 int len;
3352
3353 if (!inp->in6p_outputopts)
3354 return 0;
3355
3356 len = 0;
3357 #define elen(x) \
3358 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3359
3360 len += elen(inp->in6p_outputopts->ip6po_hbh);
3361 if (inp->in6p_outputopts->ip6po_rthdr)
3362 /* dest1 is valid with rthdr only */
3363 len += elen(inp->in6p_outputopts->ip6po_dest1);
3364 len += elen(inp->in6p_outputopts->ip6po_rthdr);
3365 len += elen(inp->in6p_outputopts->ip6po_dest2);
3366 return len;
3367 #undef elen
3368 }
3369