1 /* $FreeBSD$ */ 2 /* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-3-Clause 6 * 7 * Copyright (C) 2000 WIDE Project. 8 * Copyright (c) 2010 Hiroki Sato <hrs@FreeBSD.org> 9 * Copyright (c) 2013 Ermal Luci <eri@FreeBSD.org> 10 * Copyright (c) 2017-2021 Rubicon Communications, LLC (Netgate) 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the project nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * 6to4 interface, based on RFC3056. 40 * 41 * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. 42 * There is no address mapping defined from IPv6 multicast address to IPv4 43 * address. Therefore, we do not have IFF_MULTICAST on the interface. 44 * 45 * Due to the lack of address mapping for link-local addresses, we cannot 46 * throw packets toward link-local addresses (fe80::x). Also, we cannot throw 47 * packets to link-local multicast addresses (ff02::x). 48 * 49 * Here are interesting symptoms due to the lack of link-local address: 50 * 51 * Unicast routing exchange: 52 * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9, 53 * and link-local addresses as nexthop. 54 * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address 55 * assigned to the link, and makes use of them. Also, HELLO packets use 56 * link-local multicast addresses (ff02::5 and ff02::6). 57 * - BGP4+: Maybe. You can only use global address as nexthop, and global 58 * address as TCP endpoint address. 59 * 60 * Multicast routing protocols: 61 * - PIM: Hello packet cannot be used to discover adjacent PIM routers. 62 * Adjacent PIM routers must be configured manually (is it really spec-wise 63 * correct thing to do?). 64 * 65 * ICMPv6: 66 * - Redirects cannot be used due to the lack of link-local address. 67 * 68 * stf interface does not have, and will not need, a link-local address. 69 * It seems to have no real benefit and does not help the above symptoms much. 70 * Even if we assign link-locals to interface, we cannot really 71 * use link-local unicast/multicast on top of 6to4 cloud (since there's no 72 * encapsulation defined for link-local address), and the above analysis does 73 * not change. RFC3056 does not mandate the assignment of link-local address 74 * either. 75 * 76 * 6to4 interface has security issues. Refer to 77 * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt 78 * for details. The code tries to filter out some of malicious packets. 79 * Note that there is no way to be 100% secure. 80 */ 81 82 #include <sys/param.h> 83 #include <sys/systm.h> 84 #include <sys/socket.h> 85 #include <sys/sockio.h> 86 #include <sys/mbuf.h> 87 #include <sys/endian.h> 88 #include <sys/errno.h> 89 #include <sys/kernel.h> 90 #include <sys/lock.h> 91 #include <sys/module.h> 92 #include <sys/priv.h> 93 #include <sys/proc.h> 94 #include <sys/queue.h> 95 #include <sys/sdt.h> 96 #include <sys/sysctl.h> 97 #include <machine/cpu.h> 98 99 #include <sys/malloc.h> 100 101 #include <net/if.h> 102 #include <net/if_var.h> 103 #include <net/if_private.h> 104 #include <net/if_clone.h> 105 #include <net/route.h> 106 #include <net/route/nhop.h> 107 #include <net/netisr.h> 108 #include <net/if_stf.h> 109 #include <net/if_types.h> 110 #include <net/vnet.h> 111 112 #include <netinet/in.h> 113 #include <netinet/in_fib.h> 114 #include <netinet/in_systm.h> 115 #include <netinet/ip.h> 116 #include <netinet/ip_var.h> 117 #include <netinet/in_var.h> 118 119 #include <netinet/ip6.h> 120 #include <netinet6/in6_fib.h> 121 #include <netinet6/ip6_var.h> 122 #include <netinet6/in6_var.h> 123 #include <netinet/ip_ecn.h> 124 125 #include <netinet/ip_encap.h> 126 127 #include <machine/stdarg.h> 128 129 #include <net/bpf.h> 130 131 #include <security/mac/mac_framework.h> 132 133 SDT_PROVIDER_DEFINE(if_stf); 134 SDT_PROBE_DEFINE3(if_stf, , encapcheck, in, "struct mbuf *", "int", "int"); 135 SDT_PROBE_DEFINE0(if_stf, , encapcheck, accept); 136 SDT_PROBE_DEFINE3(if_stf, , getsrcifa6, in, "struct ifnet *", 137 "struct in6_addr *", "struct in6_addr *"); 138 SDT_PROBE_DEFINE2(if_stf, , getsrcifa6, found, "struct in6_addr *", 139 "struct in6_addr *"); 140 SDT_PROBE_DEFINE0(if_stf, , getsrcifa6, notfound); 141 142 SDT_PROBE_DEFINE4(if_stf, , stf_output, in, "struct ifnet *", "struct mbuf *", 143 "struct sockaddr *", "struct route *"); 144 SDT_PROBE_DEFINE2(if_stf, , stf_output, error, "int", "int"); 145 SDT_PROBE_DEFINE1(if_stf, , stf_output, out, "int"); 146 147 SDT_PROBE_DEFINE3(if_stf, , checkaddr6, in, "struct stf_softc *", 148 "struct in6_addr *", "struct ifnet *"); 149 SDT_PROBE_DEFINE2(if_stf, , checkaddr6, out, "int", "int"); 150 151 SDT_PROBE_DEFINE3(if_stf, , stf_input, in, "struct mbuf *", "int", "int"); 152 SDT_PROBE_DEFINE2(if_stf, , stf_input, out, "int", "int"); 153 154 SDT_PROBE_DEFINE3(if_stf, , ioctl, sv4net, "struct in_addr *", 155 "struct in_addr *", "int"); 156 SDT_PROBE_DEFINE1(if_stf, , ioctl, sdstv4, "struct in_addr *"); 157 SDT_PROBE_DEFINE1(if_stf, , ioctl, ifaddr, "struct ifaddr *"); 158 159 SDT_PROBE_DEFINE4(if_stf, , getin4addr_in6, out, "struct in6_addr *", 160 "struct in6_addr *", "struct in6_addr *", "struct sockaddr_in *"); 161 162 SDT_PROBE_DEFINE2(if_stf, , getin4addr, in, "struct in6_addr *", "struct in6_addr *"); 163 SDT_PROBE_DEFINE1(if_stf, , getin4addr, out, "struct sockaddr_in *"); 164 165 SYSCTL_DECL(_net_link); 166 static SYSCTL_NODE(_net_link, IFT_STF, stf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 167 "6to4 Interface"); 168 169 static int stf_permit_rfc1918 = 0; 170 SYSCTL_INT(_net_link_stf, OID_AUTO, permit_rfc1918, CTLFLAG_RWTUN, 171 &stf_permit_rfc1918, 0, "Permit the use of private IPv4 addresses"); 172 173 #define STFUNIT 0 174 175 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002) 176 177 /* 178 * XXX: Return a pointer with 16-bit aligned. Don't cast it to 179 * struct in_addr *; use bcopy() instead. 180 */ 181 #define GET_V4(x) (&(x)->s6_addr16[1]) 182 183 struct stf_softc { 184 struct ifnet *sc_ifp; 185 in_addr_t braddr; /* Border relay IPv4 address */ 186 in_addr_t srcv4_addr; /* Our IPv4 WAN address */ 187 u_int v4prefixlen; /* How much of the v4 address to include in our address. */ 188 u_int sc_fibnum; 189 const struct encaptab *encap_cookie; 190 }; 191 #define STF2IFP(sc) ((sc)->sc_ifp) 192 193 static const char stfname[] = "stf"; 194 195 static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface"); 196 static const int ip_stf_ttl = 40; 197 198 static int in_stf_input(struct mbuf *, int, int, void *); 199 static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; 200 201 static int stfmodevent(module_t, int, void *); 202 static int stf_encapcheck(const struct mbuf *, int, int, void *); 203 static int stf_getsrcifa6(struct ifnet *, struct in6_addr *, struct in6_addr *); 204 static int stf_output(struct ifnet *, struct mbuf *, const struct sockaddr *, 205 struct route *); 206 static int isrfc1918addr(struct in_addr *); 207 static int stf_checkaddr4(struct stf_softc *, struct in_addr *, 208 struct ifnet *); 209 static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, 210 struct ifnet *); 211 static struct sockaddr_in *stf_getin4addr_in6(struct stf_softc *, 212 struct sockaddr_in *, struct in6_addr, struct in6_addr, 213 struct in6_addr); 214 static struct sockaddr_in *stf_getin4addr(struct stf_softc *, 215 struct sockaddr_in *, struct in6_addr, struct in6_addr); 216 static int stf_ioctl(struct ifnet *, u_long, caddr_t); 217 218 VNET_DEFINE_STATIC(struct if_clone *, stf_cloner); 219 #define V_stf_cloner VNET(stf_cloner) 220 221 static const struct encap_config ipv4_encap_cfg = { 222 .proto = IPPROTO_IPV6, 223 .min_length = sizeof(struct ip), 224 .exact_match = (sizeof(in_addr_t) << 3) + 8, 225 .check = stf_encapcheck, 226 .input = in_stf_input 227 }; 228 229 static int 230 stf_clone_match(struct if_clone *ifc, const char *name) 231 { 232 int i; 233 234 for(i = 0; stfnames[i] != NULL; i++) { 235 if (strcmp(stfnames[i], name) == 0) 236 return (1); 237 } 238 239 return (0); 240 } 241 242 static int 243 stf_clone_create(struct if_clone *ifc, char *name, size_t len, 244 struct ifc_data *ifd, struct ifnet **ifpp) 245 { 246 char *dp; 247 int err, unit, wildcard; 248 struct stf_softc *sc; 249 struct ifnet *ifp; 250 251 err = ifc_name2unit(name, &unit); 252 if (err != 0) 253 return (err); 254 wildcard = (unit < 0); 255 256 /* 257 * We can only have one unit, but since unit allocation is 258 * already locked, we use it to keep from allocating extra 259 * interfaces. 260 */ 261 unit = STFUNIT; 262 err = ifc_alloc_unit(ifc, &unit); 263 if (err != 0) 264 return (err); 265 266 sc = malloc(sizeof(struct stf_softc), M_STF, M_WAITOK | M_ZERO); 267 ifp = STF2IFP(sc) = if_alloc(IFT_STF); 268 if (ifp == NULL) { 269 free(sc, M_STF); 270 ifc_free_unit(ifc, unit); 271 return (ENOSPC); 272 } 273 ifp->if_softc = sc; 274 sc->sc_fibnum = curthread->td_proc->p_fibnum; 275 276 /* 277 * Set the name manually rather then using if_initname because 278 * we don't conform to the default naming convention for interfaces. 279 * In the wildcard case, we need to update the name. 280 */ 281 if (wildcard) { 282 for (dp = name; *dp != '\0'; dp++); 283 if (snprintf(dp, len - (dp-name), "%d", unit) > 284 len - (dp-name) - 1) { 285 /* 286 * This can only be a programmer error and 287 * there's no straightforward way to recover if 288 * it happens. 289 */ 290 panic("if_clone_create(): interface name too long"); 291 } 292 } 293 strlcpy(ifp->if_xname, name, IFNAMSIZ); 294 ifp->if_dname = stfname; 295 ifp->if_dunit = IF_DUNIT_NONE; 296 297 sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK); 298 299 ifp->if_mtu = IPV6_MMTU; 300 ifp->if_ioctl = stf_ioctl; 301 ifp->if_output = stf_output; 302 ifp->if_snd.ifq_maxlen = ifqmaxlen; 303 if_attach(ifp); 304 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 305 *ifpp = ifp; 306 307 return (0); 308 } 309 310 static int 311 stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) 312 { 313 struct stf_softc *sc = ifp->if_softc; 314 int err __unused; 315 316 err = ip_encap_detach(sc->encap_cookie); 317 KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); 318 bpfdetach(ifp); 319 if_detach(ifp); 320 if_free(ifp); 321 322 free(sc, M_STF); 323 ifc_free_unit(ifc, STFUNIT); 324 325 return (0); 326 } 327 328 static void 329 vnet_stf_init(const void *unused __unused) 330 { 331 struct if_clone_addreq req = { 332 .match_f = stf_clone_match, 333 .create_f = stf_clone_create, 334 .destroy_f = stf_clone_destroy, 335 }; 336 V_stf_cloner = ifc_attach_cloner(stfname, &req); 337 } 338 VNET_SYSINIT(vnet_stf_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_stf_init, NULL); 339 340 static void 341 vnet_stf_uninit(const void *unused __unused) 342 { 343 if_clone_detach(V_stf_cloner); 344 V_stf_cloner = NULL; 345 } 346 VNET_SYSUNINIT(vnet_stf_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_stf_uninit, 347 NULL); 348 349 static int 350 stfmodevent(module_t mod, int type, void *data) 351 { 352 353 switch (type) { 354 case MOD_LOAD: 355 /* Done in vnet_stf_init() */ 356 break; 357 case MOD_UNLOAD: 358 /* Done in vnet_stf_uninit() */ 359 break; 360 default: 361 return (EOPNOTSUPP); 362 } 363 364 return (0); 365 } 366 367 static moduledata_t stf_mod = { 368 "if_stf", 369 stfmodevent, 370 0 371 }; 372 373 DECLARE_MODULE(if_stf, stf_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 374 MODULE_VERSION(if_stf, 2); 375 376 static int 377 stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 378 { 379 struct ip ip; 380 struct stf_softc *sc; 381 struct in6_addr addr6, mask6; 382 struct sockaddr_in sin4addr, sin4mask; 383 384 SDT_PROBE3(if_stf, , encapcheck, in, m, off, proto); 385 386 sc = (struct stf_softc *)arg; 387 if (sc == NULL) 388 return (0); 389 390 if ((STF2IFP(sc)->if_flags & IFF_UP) == 0) 391 return (0); 392 393 /* IFF_LINK0 means "no decapsulation" */ 394 if ((STF2IFP(sc)->if_flags & IFF_LINK0) != 0) 395 return (0); 396 397 if (proto != IPPROTO_IPV6) 398 return (0); 399 400 m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); 401 402 if (ip.ip_v != 4) 403 return (0); 404 405 if (stf_getsrcifa6(STF2IFP(sc), &addr6, &mask6) != 0) 406 return (0); 407 408 if (sc->srcv4_addr != INADDR_ANY) { 409 sin4addr.sin_addr.s_addr = sc->srcv4_addr; 410 sin4addr.sin_family = AF_INET; 411 } else 412 if (stf_getin4addr(sc, &sin4addr, addr6, mask6) == NULL) 413 return (0); 414 415 if (sin4addr.sin_addr.s_addr != ip.ip_dst.s_addr) 416 return (0); 417 418 if (IN6_IS_ADDR_6TO4(&addr6)) { 419 /* 420 * 6to4 (RFC 3056). 421 * Check if IPv4 src matches the IPv4 address derived 422 * from the local 6to4 address masked by prefixmask. 423 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24 424 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24 425 */ 426 memcpy(&sin4mask.sin_addr, GET_V4(&mask6), 427 sizeof(sin4mask.sin_addr)); 428 if ((sin4addr.sin_addr.s_addr & sin4mask.sin_addr.s_addr) != 429 (ip.ip_src.s_addr & sin4mask.sin_addr.s_addr)) 430 return (0); 431 } else { 432 /* 6rd (RFC 5569) */ 433 /* 434 * No restriction on the src address in the case of 435 * 6rd because the stf(4) interface always has a 436 * prefix which covers whole of IPv4 src address 437 * range. So, stf_output() will catch all of 438 * 6rd-capsuled IPv4 traffic with suspicious inner dst 439 * IPv4 address (i.e. the IPv6 destination address is 440 * one the admin does not like to route to outside), 441 * and then it discard them silently. 442 */ 443 } 444 445 SDT_PROBE0(if_stf, , encapcheck, accept); 446 447 /* stf interface makes single side match only */ 448 return (32); 449 } 450 451 static int 452 stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask) 453 { 454 struct ifaddr *ia; 455 struct in_ifaddr *ia4; 456 struct in6_addr addr6, mask6; 457 struct sockaddr_in sin4; 458 struct stf_softc *sc; 459 struct in_addr in; 460 461 NET_EPOCH_ASSERT(); 462 463 sc = ifp->if_softc; 464 465 SDT_PROBE3(if_stf, , getsrcifa6, in, ifp, addr, mask); 466 467 CK_STAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 468 if (ia->ifa_addr->sa_family != AF_INET6) 469 continue; 470 471 addr6 = *IFA_IN6(ia); 472 mask6 = *IFA_MASKIN6(ia); 473 if (sc->srcv4_addr != INADDR_ANY) 474 bcopy(&sc->srcv4_addr, &in, sizeof(in)); 475 else { 476 if (stf_getin4addr(sc, &sin4, addr6, mask6) == NULL) 477 continue; 478 bcopy(&sin4.sin_addr, &in, sizeof(in)); 479 } 480 481 CK_LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) 482 if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) 483 break; 484 if (ia4 == NULL) 485 continue; 486 487 *addr = addr6; 488 *mask = mask6; 489 490 SDT_PROBE2(if_stf, , getsrcifa6, found, addr, mask); 491 492 return (0); 493 } 494 495 SDT_PROBE0(if_stf, , getsrcifa6, notfound); 496 497 return (ENOENT); 498 } 499 500 static int 501 stf_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 502 struct route *ro) 503 { 504 struct stf_softc *sc; 505 const struct sockaddr_in6 *dst6; 506 struct sockaddr_in dst4, src4; 507 u_int8_t tos; 508 struct ip *ip; 509 struct ip6_hdr *ip6; 510 struct in6_addr addr6, mask6; 511 int error; 512 513 SDT_PROBE4(if_stf, , stf_output, in, ifp, m, dst, ro); 514 515 #ifdef MAC 516 error = mac_ifnet_check_transmit(ifp, m); 517 if (error) { 518 m_freem(m); 519 SDT_PROBE2(if_stf, , stf_output, error, error, __LINE__); 520 return (error); 521 } 522 #endif 523 524 sc = ifp->if_softc; 525 dst6 = (const struct sockaddr_in6 *)dst; 526 527 /* just in case */ 528 if ((ifp->if_flags & IFF_UP) == 0) { 529 m_freem(m); 530 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 531 SDT_PROBE2(if_stf, , stf_output, error, ENETDOWN, __LINE__); 532 return (ENETDOWN); 533 } 534 535 /* 536 * If we don't have an ip4 address that match my inner ip6 address, 537 * we shouldn't generate output. Without this check, we'll end up 538 * using wrong IPv4 source. 539 */ 540 if (stf_getsrcifa6(ifp, &addr6, &mask6) != 0) { 541 m_freem(m); 542 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 543 SDT_PROBE2(if_stf, , stf_output, error, ENETDOWN, __LINE__); 544 return (ENETDOWN); 545 } 546 547 if (m->m_len < sizeof(*ip6)) { 548 m = m_pullup(m, sizeof(*ip6)); 549 if (!m) { 550 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 551 SDT_PROBE2(if_stf, , stf_output, error, ENOBUFS, 552 __LINE__); 553 return (ENOBUFS); 554 } 555 } 556 ip6 = mtod(m, struct ip6_hdr *); 557 tos = IPV6_TRAFFIC_CLASS(ip6); 558 559 /* 560 * Pickup the right outer dst addr from the list of candidates. 561 * ip6_dst has priority as it may be able to give us shorter IPv4 hops. 562 */ 563 if (stf_getin4addr_in6(sc, &dst4, addr6, mask6, 564 ip6->ip6_dst) == NULL) { 565 if (sc->braddr != INADDR_ANY) 566 dst4.sin_addr.s_addr = sc->braddr; 567 else if (stf_getin4addr_in6(sc, &dst4, addr6, mask6, 568 dst6->sin6_addr) == NULL) { 569 m_freem(m); 570 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 571 SDT_PROBE2(if_stf, , stf_output, error, ENETUNREACH, 572 __LINE__); 573 return (ENETUNREACH); 574 } 575 } 576 577 if (bpf_peers_present(ifp->if_bpf)) { 578 /* 579 * We need to prepend the address family as 580 * a four byte field. Cons up a dummy header 581 * to pacify bpf. This is safe because bpf 582 * will only read from the mbuf (i.e., it won't 583 * try to free it or keep a pointer a to it). 584 */ 585 u_int af = AF_INET6; 586 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); 587 } 588 589 M_PREPEND(m, sizeof(struct ip), M_NOWAIT); 590 if (m == NULL) { 591 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 592 SDT_PROBE2(if_stf, , stf_output, error, ENOBUFS, __LINE__); 593 return (ENOBUFS); 594 } 595 ip = mtod(m, struct ip *); 596 597 bzero(ip, sizeof(*ip)); 598 599 if (sc->srcv4_addr != INADDR_ANY) 600 src4.sin_addr.s_addr = sc->srcv4_addr; 601 else if (stf_getin4addr(sc, &src4, addr6, mask6) == NULL) { 602 m_freem(m); 603 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 604 SDT_PROBE2(if_stf, , stf_output, error, ENETUNREACH, __LINE__); 605 return (ENETUNREACH); 606 } 607 bcopy(&src4.sin_addr, &ip->ip_src, sizeof(ip->ip_src)); 608 bcopy(&dst4.sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); 609 610 ip->ip_p = IPPROTO_IPV6; 611 ip->ip_ttl = ip_stf_ttl; 612 ip->ip_len = htons(m->m_pkthdr.len); 613 if (ifp->if_flags & IFF_LINK1) 614 ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); 615 else 616 ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); 617 618 M_SETFIB(m, sc->sc_fibnum); 619 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 620 error = ip_output(m, NULL, NULL, 0, NULL, NULL); 621 622 SDT_PROBE1(if_stf, , stf_output, out, error); 623 return (error); 624 } 625 626 static int 627 isrfc1918addr(struct in_addr *in) 628 { 629 /* 630 * returns 1 if private address range: 631 * 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 632 */ 633 if (stf_permit_rfc1918 == 0 && ( 634 (ntohl(in->s_addr) & 0xff000000) >> 24 == 10 || 635 (ntohl(in->s_addr) & 0xfff00000) >> 16 == 172 * 256 + 16 || 636 (ntohl(in->s_addr) & 0xffff0000) >> 16 == 192 * 256 + 168)) 637 return (1); 638 639 return (0); 640 } 641 642 static int 643 stf_checkaddr4(struct stf_softc *sc, struct in_addr *in, struct ifnet *inifp) 644 { 645 struct in_ifaddr *ia4; 646 647 /* 648 * reject packets with the following address: 649 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8 650 */ 651 if (IN_MULTICAST(ntohl(in->s_addr))) 652 return (-1); 653 switch ((ntohl(in->s_addr) & 0xff000000) >> 24) { 654 case 0: case 127: case 255: 655 return (-1); 656 } 657 658 /* 659 * reject packets with broadcast 660 */ 661 CK_STAILQ_FOREACH(ia4, &V_in_ifaddrhead, ia_link) { 662 if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) 663 continue; 664 if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { 665 return (-1); 666 } 667 } 668 669 /* 670 * perform ingress filter 671 */ 672 if (sc && (STF2IFP(sc)->if_flags & IFF_LINK2) == 0 && inifp) { 673 struct nhop_object *nh; 674 675 NET_EPOCH_ASSERT(); 676 nh = fib4_lookup(sc->sc_fibnum, *in, 0, 0, 0); 677 if (nh == NULL) 678 return (-1); 679 680 if (nh->nh_ifp != inifp) 681 return (-1); 682 } 683 684 return (0); 685 } 686 687 static int 688 stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) 689 { 690 SDT_PROBE3(if_stf, , checkaddr6, in, sc, in6, inifp); 691 692 /* 693 * check 6to4 addresses 694 */ 695 if (IN6_IS_ADDR_6TO4(in6)) { 696 struct in_addr in4; 697 int ret; 698 699 bcopy(GET_V4(in6), &in4, sizeof(in4)); 700 ret = stf_checkaddr4(sc, &in4, inifp); 701 SDT_PROBE2(if_stf, , checkaddr6, out, ret, __LINE__); 702 return (ret); 703 } 704 705 /* 706 * reject anything that look suspicious. the test is implemented 707 * in ip6_input too, but we check here as well to 708 * (1) reject bad packets earlier, and 709 * (2) to be safe against future ip6_input change. 710 */ 711 if (IN6_IS_ADDR_V4COMPAT(in6)) { 712 SDT_PROBE2(if_stf, , checkaddr6, out, -1, __LINE__); 713 return (-1); 714 } 715 716 if (IN6_IS_ADDR_V4MAPPED(in6)) { 717 SDT_PROBE2(if_stf, , checkaddr6, out, -1, __LINE__); 718 return (-1); 719 } 720 721 SDT_PROBE2(if_stf, , checkaddr6, out, 0, __LINE__); 722 return (0); 723 } 724 725 static int 726 in_stf_input(struct mbuf *m, int off, int proto, void *arg) 727 { 728 struct stf_softc *sc = arg; 729 struct ip ip; 730 struct ip6_hdr *ip6; 731 u_int8_t otos, itos; 732 struct ifnet *ifp; 733 struct nhop_object *nh; 734 735 NET_EPOCH_ASSERT(); 736 737 SDT_PROBE3(if_stf, , stf_input, in, m, off, proto); 738 739 if (proto != IPPROTO_IPV6) { 740 m_freem(m); 741 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 742 return (IPPROTO_DONE); 743 } 744 745 m_copydata(m, 0, sizeof(struct ip), (caddr_t)&ip); 746 if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) { 747 m_freem(m); 748 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 749 return (IPPROTO_DONE); 750 } 751 752 ifp = STF2IFP(sc); 753 754 #ifdef MAC 755 mac_ifnet_create_mbuf(ifp, m); 756 #endif 757 758 /* 759 * perform sanity check against outer src/dst. 760 * for source, perform ingress filter as well. 761 */ 762 if (stf_checkaddr4(sc, &ip.ip_dst, NULL) < 0 || 763 stf_checkaddr4(sc, &ip.ip_src, m->m_pkthdr.rcvif) < 0) { 764 m_freem(m); 765 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 766 return (IPPROTO_DONE); 767 } 768 769 otos = ip.ip_tos; 770 m_adj(m, off); 771 772 if (m->m_len < sizeof(*ip6)) { 773 m = m_pullup(m, sizeof(*ip6)); 774 if (!m) { 775 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, 776 __LINE__); 777 return (IPPROTO_DONE); 778 } 779 } 780 ip6 = mtod(m, struct ip6_hdr *); 781 782 /* 783 * perform sanity check against inner src/dst. 784 * for source, perform ingress filter as well. 785 */ 786 if (stf_checkaddr6(sc, &ip6->ip6_dst, NULL) < 0 || 787 stf_checkaddr6(sc, &ip6->ip6_src, m->m_pkthdr.rcvif) < 0) { 788 m_freem(m); 789 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 790 return (IPPROTO_DONE); 791 } 792 793 /* 794 * reject packets with private address range. 795 * (requirement from RFC3056 section 2 1st paragraph) 796 */ 797 if ((IN6_IS_ADDR_6TO4(&ip6->ip6_src) && isrfc1918addr(&ip.ip_src)) || 798 (IN6_IS_ADDR_6TO4(&ip6->ip6_dst) && isrfc1918addr(&ip.ip_dst))) { 799 m_freem(m); 800 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 801 return (IPPROTO_DONE); 802 } 803 804 /* 805 * Ignore if the destination is the same stf interface because 806 * all of valid IPv6 outgoing traffic should go interfaces 807 * except for it. 808 */ 809 nh = fib6_lookup(sc->sc_fibnum, &ip6->ip6_dst, 0, 0, 0); 810 if (nh == NULL) { 811 m_free(m); 812 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 813 return (IPPROTO_DONE); 814 } 815 if ((nh->nh_ifp == ifp) && 816 (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &nh->gw6_sa.sin6_addr))) { 817 m_free(m); 818 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 819 return (IPPROTO_DONE); 820 } 821 822 itos = IPV6_TRAFFIC_CLASS(ip6); 823 if ((ifp->if_flags & IFF_LINK1) != 0) 824 ip_ecn_egress(ECN_ALLOWED, &otos, &itos); 825 else 826 ip_ecn_egress(ECN_NOCARE, &otos, &itos); 827 ip6->ip6_flow &= ~htonl(0xff << 20); 828 ip6->ip6_flow |= htonl((u_int32_t)itos << 20); 829 830 m->m_pkthdr.rcvif = ifp; 831 832 if (bpf_peers_present(ifp->if_bpf)) { 833 /* 834 * We need to prepend the address family as 835 * a four byte field. Cons up a dummy header 836 * to pacify bpf. This is safe because bpf 837 * will only read from the mbuf (i.e., it won't 838 * try to free it or keep a pointer a to it). 839 */ 840 u_int32_t af = AF_INET6; 841 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); 842 } 843 844 /* 845 * Put the packet to the network layer input queue according to the 846 * specified address family. 847 * See net/if_gif.c for possible issues with packet processing 848 * reorder due to extra queueing. 849 */ 850 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 851 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 852 M_SETFIB(m, ifp->if_fib); 853 netisr_dispatch(NETISR_IPV6, m); 854 SDT_PROBE2(if_stf, , stf_input, out, IPPROTO_DONE, __LINE__); 855 return (IPPROTO_DONE); 856 } 857 858 static struct sockaddr_in * 859 stf_getin4addr_in6(struct stf_softc *sc, struct sockaddr_in *sin, 860 struct in6_addr addr6, struct in6_addr mask6, struct in6_addr in6) 861 { 862 int i; 863 struct sockaddr_in *out; 864 865 /* 866 * When (src addr & src mask) != (in6 & src mask), 867 * the dst is not in the 6rd domain. The IPv4 address must 868 * not be used. 869 */ 870 for (i = 0; i < sizeof(addr6); i++) { 871 if ((((u_char *)&addr6)[i] & ((u_char *)&mask6)[i]) != 872 (((u_char *)&in6)[i] & ((u_char *)&mask6)[i])) { 873 SDT_PROBE4(if_stf, , getin4addr_in6, out, &addr6, 874 &mask6, &in6, NULL); 875 return (NULL); 876 } 877 } 878 879 /* After the mask check, use in6 instead of addr6. */ 880 out = stf_getin4addr(sc, sin, in6, mask6); 881 SDT_PROBE4(if_stf, , getin4addr_in6, out, &addr6, &mask6, &in6, out); 882 return (out); 883 } 884 885 static struct sockaddr_in * 886 stf_getin4addr(struct stf_softc *sc, struct sockaddr_in *sin, 887 struct in6_addr addr6, struct in6_addr mask6) 888 { 889 struct in_addr *in; 890 891 SDT_PROBE2(if_stf, , getin4addr, in, &addr6, &mask6); 892 893 memset(sin, 0, sizeof(*sin)); 894 in = &sin->sin_addr; 895 if (IN6_IS_ADDR_6TO4(&addr6)) { 896 /* 6to4 (RFC 3056) */ 897 bcopy(GET_V4(&addr6), in, sizeof(*in)); 898 if (isrfc1918addr(in)) 899 return (NULL); 900 } else { 901 /* 6rd (RFC 5569) */ 902 in_addr_t v4prefix; 903 uint8_t *v6 = (uint8_t*)&addr6; 904 uint64_t v6prefix; 905 u_int plen; 906 u_int v4suffixlen; 907 908 v4prefix = 0; 909 if (sc->v4prefixlen < 32) { 910 v4suffixlen = 32 - sc->v4prefixlen; 911 v4prefix = ntohl(sc->srcv4_addr) & 912 (0xffffffffU << v4suffixlen); 913 } else { 914 MPASS(sc->v4prefixlen == 32); 915 v4suffixlen = 32; 916 } 917 918 plen = in6_mask2len(&mask6, NULL); 919 if (plen > 64) 920 return (NULL); 921 922 /* To make this simple we do not support prefixes longer than 923 * 64 bits. RFC5969 says "a 6rd delegated prefix SHOULD be /64 924 * or shorter." so this is a moderately safe assumption. */ 925 v6prefix = be64toh(*(uint64_t *)v6); 926 927 /* Shift away the v6 prefix itself. */ 928 v6prefix <<= plen; 929 v6prefix >>= plen; 930 931 /* Now shift away everything after the v4 address. */ 932 v6prefix >>= 64 - plen - v4suffixlen; 933 934 sin->sin_addr.s_addr = htonl(v4prefix | (uint32_t)v6prefix); 935 } 936 937 SDT_PROBE1(if_stf, , getin4addr, out, sin); 938 939 return (sin); 940 } 941 942 static int 943 stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 944 { 945 struct ifaddr *ifa; 946 struct ifdrv *ifd; 947 struct ifreq *ifr; 948 struct sockaddr_in sin4; 949 struct stf_softc *sc_cur; 950 struct stfv4args args; 951 int error, mtu; 952 953 error = 0; 954 sc_cur = ifp->if_softc; 955 956 switch (cmd) { 957 case SIOCSDRVSPEC: 958 ifd = (struct ifdrv *)data; 959 error = priv_check(curthread, PRIV_NET_ADDIFADDR); 960 if (error) 961 break; 962 if (ifd->ifd_cmd == STF6RD_SV4NET) { 963 if (ifd->ifd_len != sizeof(args)) { 964 error = EINVAL; 965 break; 966 } 967 bzero(&args, sizeof(args)); 968 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 969 if (error) 970 break; 971 972 if (args.v4_prefixlen < 1 || args.v4_prefixlen > 32) { 973 error = EINVAL; 974 break; 975 } 976 977 bcopy(&args.srcv4_addr, &sc_cur->srcv4_addr, 978 sizeof(sc_cur->srcv4_addr)); 979 sc_cur->v4prefixlen = args.v4_prefixlen; 980 SDT_PROBE3(if_stf, , ioctl, sv4net, sc_cur->srcv4_addr, 981 sc_cur->srcv4_addr, sc_cur->v4prefixlen); 982 } else if (ifd->ifd_cmd == STF6RD_SBR) { 983 if (ifd->ifd_len != sizeof(args)) { 984 error = EINVAL; 985 break; 986 } 987 bzero(&args, sizeof(args)); 988 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); 989 if (error) 990 break; 991 sc_cur->braddr = args.braddr.s_addr; 992 SDT_PROBE1(if_stf, , ioctl, sdstv4, 993 sc_cur->braddr); 994 } else 995 error = EINVAL; 996 break; 997 case SIOCGDRVSPEC: 998 ifd = (struct ifdrv *)data; 999 if (ifd->ifd_cmd != STF6RD_GV4NET) { 1000 error = EINVAL; 1001 break; 1002 } 1003 if (ifd->ifd_len != sizeof(args)) { 1004 error = EINVAL; 1005 break; 1006 } 1007 bzero(&args, sizeof(args)); 1008 args.srcv4_addr.s_addr = sc_cur->srcv4_addr; 1009 args.braddr.s_addr = sc_cur->braddr; 1010 args.v4_prefixlen = sc_cur->v4prefixlen; 1011 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); 1012 break; 1013 case SIOCSIFADDR: 1014 ifa = (struct ifaddr *)data; 1015 SDT_PROBE1(if_stf, , ioctl, ifaddr, ifa); 1016 if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { 1017 error = EAFNOSUPPORT; 1018 break; 1019 } 1020 if (stf_getin4addr(sc_cur, &sin4, 1021 satosin6(ifa->ifa_addr)->sin6_addr, 1022 satosin6(ifa->ifa_netmask)->sin6_addr) == NULL) { 1023 error = EINVAL; 1024 break; 1025 } 1026 ifp->if_flags |= IFF_UP; 1027 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1028 break; 1029 1030 case SIOCADDMULTI: 1031 case SIOCDELMULTI: 1032 ifr = (struct ifreq *)data; 1033 if (ifr && ifr->ifr_addr.sa_family == AF_INET6) 1034 ; 1035 else 1036 error = EAFNOSUPPORT; 1037 break; 1038 1039 case SIOCGIFMTU: 1040 break; 1041 1042 case SIOCSIFMTU: 1043 ifr = (struct ifreq *)data; 1044 mtu = ifr->ifr_mtu; 1045 /* RFC 4213 3.2 ideal world MTU */ 1046 if (mtu < IPV6_MINMTU || mtu > IF_MAXMTU - 20) 1047 return (EINVAL); 1048 ifp->if_mtu = mtu; 1049 break; 1050 1051 default: 1052 error = EINVAL; 1053 break; 1054 } 1055 1056 return (error); 1057 } 1058