1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1998 The NetBSD Foundation, Inc. 5 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 * 34 * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_inet.h" 41 #include "opt_inet6.h" 42 43 #include <sys/param.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/module.h> 48 #include <sys/mbuf.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/sockio.h> 53 #include <sys/sx.h> 54 #include <sys/sysctl.h> 55 #include <sys/syslog.h> 56 #include <sys/systm.h> 57 58 #include <net/ethernet.h> 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_clone.h> 62 #include <net/if_types.h> 63 #include <net/netisr.h> 64 #include <net/vnet.h> 65 #include <net/route.h> 66 67 #include <netinet/in.h> 68 #ifdef INET 69 #include <netinet/in_var.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip_var.h> 72 #endif 73 74 #ifdef INET6 75 #include <netinet/ip6.h> 76 #include <netinet6/in6_var.h> 77 #include <netinet6/ip6_var.h> 78 #endif 79 80 #include <netinet/ip_encap.h> 81 #include <net/bpf.h> 82 #include <net/if_gre.h> 83 84 #include <machine/in_cksum.h> 85 #include <security/mac/mac_framework.h> 86 87 #define GREMTU 1476 88 89 static const char grename[] = "gre"; 90 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); 91 92 static struct sx gre_ioctl_sx; 93 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); 94 95 static int gre_clone_create(struct if_clone *, int, caddr_t); 96 static void gre_clone_destroy(struct ifnet *); 97 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner); 98 #define V_gre_cloner VNET(gre_cloner) 99 100 static void gre_qflush(struct ifnet *); 101 static int gre_transmit(struct ifnet *, struct mbuf *); 102 static int gre_ioctl(struct ifnet *, u_long, caddr_t); 103 static int gre_output(struct ifnet *, struct mbuf *, 104 const struct sockaddr *, struct route *); 105 static void gre_delete_tunnel(struct gre_softc *); 106 107 SYSCTL_DECL(_net_link); 108 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, 109 "Generic Routing Encapsulation"); 110 #ifndef MAX_GRE_NEST 111 /* 112 * This macro controls the default upper limitation on nesting of gre tunnels. 113 * Since, setting a large value to this macro with a careless configuration 114 * may introduce system crash, we don't allow any nestings by default. 115 * If you need to configure nested gre tunnels, you can define this macro 116 * in your kernel configuration file. However, if you do so, please be 117 * careful to configure the tunnels so that it won't make a loop. 118 */ 119 #define MAX_GRE_NEST 1 120 #endif 121 122 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST; 123 #define V_max_gre_nesting VNET(max_gre_nesting) 124 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, 125 &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); 126 127 static void 128 vnet_gre_init(const void *unused __unused) 129 { 130 131 V_gre_cloner = if_clone_simple(grename, gre_clone_create, 132 gre_clone_destroy, 0); 133 #ifdef INET 134 in_gre_init(); 135 #endif 136 #ifdef INET6 137 in6_gre_init(); 138 #endif 139 } 140 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 141 vnet_gre_init, NULL); 142 143 static void 144 vnet_gre_uninit(const void *unused __unused) 145 { 146 147 if_clone_detach(V_gre_cloner); 148 #ifdef INET 149 in_gre_uninit(); 150 #endif 151 #ifdef INET6 152 in6_gre_uninit(); 153 #endif 154 } 155 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 156 vnet_gre_uninit, NULL); 157 158 static int 159 gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) 160 { 161 struct gre_softc *sc; 162 163 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); 164 sc->gre_fibnum = curthread->td_proc->p_fibnum; 165 GRE2IFP(sc) = if_alloc(IFT_TUNNEL); 166 GRE2IFP(sc)->if_softc = sc; 167 if_initname(GRE2IFP(sc), grename, unit); 168 169 GRE2IFP(sc)->if_mtu = GREMTU; 170 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 171 GRE2IFP(sc)->if_output = gre_output; 172 GRE2IFP(sc)->if_ioctl = gre_ioctl; 173 GRE2IFP(sc)->if_transmit = gre_transmit; 174 GRE2IFP(sc)->if_qflush = gre_qflush; 175 GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; 176 GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; 177 if_attach(GRE2IFP(sc)); 178 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 179 return (0); 180 } 181 182 static void 183 gre_clone_destroy(struct ifnet *ifp) 184 { 185 struct gre_softc *sc; 186 187 sx_xlock(&gre_ioctl_sx); 188 sc = ifp->if_softc; 189 gre_delete_tunnel(sc); 190 bpfdetach(ifp); 191 if_detach(ifp); 192 ifp->if_softc = NULL; 193 sx_xunlock(&gre_ioctl_sx); 194 195 GRE_WAIT(); 196 if_free(ifp); 197 free(sc, M_GRE); 198 } 199 200 static int 201 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 202 { 203 struct ifreq *ifr = (struct ifreq *)data; 204 struct gre_softc *sc; 205 uint32_t opt; 206 int error; 207 208 switch (cmd) { 209 case SIOCSIFMTU: 210 /* XXX: */ 211 if (ifr->ifr_mtu < 576) 212 return (EINVAL); 213 ifp->if_mtu = ifr->ifr_mtu; 214 return (0); 215 case SIOCSIFADDR: 216 ifp->if_flags |= IFF_UP; 217 case SIOCSIFFLAGS: 218 case SIOCADDMULTI: 219 case SIOCDELMULTI: 220 return (0); 221 case GRESADDRS: 222 case GRESADDRD: 223 case GREGADDRS: 224 case GREGADDRD: 225 case GRESPROTO: 226 case GREGPROTO: 227 return (EOPNOTSUPP); 228 } 229 sx_xlock(&gre_ioctl_sx); 230 sc = ifp->if_softc; 231 if (sc == NULL) { 232 error = ENXIO; 233 goto end; 234 } 235 error = 0; 236 switch (cmd) { 237 case SIOCDIFPHYADDR: 238 if (sc->gre_family == 0) 239 break; 240 gre_delete_tunnel(sc); 241 break; 242 #ifdef INET 243 case SIOCSIFPHYADDR: 244 case SIOCGIFPSRCADDR: 245 case SIOCGIFPDSTADDR: 246 error = in_gre_ioctl(sc, cmd, data); 247 break; 248 #endif 249 #ifdef INET6 250 case SIOCSIFPHYADDR_IN6: 251 case SIOCGIFPSRCADDR_IN6: 252 case SIOCGIFPDSTADDR_IN6: 253 error = in6_gre_ioctl(sc, cmd, data); 254 break; 255 #endif 256 case SIOCGTUNFIB: 257 ifr->ifr_fib = sc->gre_fibnum; 258 break; 259 case SIOCSTUNFIB: 260 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 261 break; 262 if (ifr->ifr_fib >= rt_numfibs) 263 error = EINVAL; 264 else 265 sc->gre_fibnum = ifr->ifr_fib; 266 break; 267 case GRESKEY: 268 case GRESOPTS: 269 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 270 break; 271 if ((error = copyin(ifr_data_get_ptr(ifr), &opt, 272 sizeof(opt))) != 0) 273 break; 274 if (cmd == GRESKEY) { 275 if (sc->gre_key == opt) 276 break; 277 } else if (cmd == GRESOPTS) { 278 if (opt & ~GRE_OPTMASK) { 279 error = EINVAL; 280 break; 281 } 282 if (sc->gre_options == opt) 283 break; 284 } 285 switch (sc->gre_family) { 286 #ifdef INET 287 case AF_INET: 288 in_gre_setopts(sc, cmd, opt); 289 break; 290 #endif 291 #ifdef INET6 292 case AF_INET6: 293 in6_gre_setopts(sc, cmd, opt); 294 break; 295 #endif 296 default: 297 if (cmd == GRESKEY) 298 sc->gre_key = opt; 299 else 300 sc->gre_options = opt; 301 break; 302 } 303 /* 304 * XXX: Do we need to initiate change of interface 305 * state here? 306 */ 307 break; 308 case GREGKEY: 309 error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), 310 sizeof(sc->gre_key)); 311 break; 312 case GREGOPTS: 313 error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr), 314 sizeof(sc->gre_options)); 315 break; 316 default: 317 error = EINVAL; 318 break; 319 } 320 if (error == 0 && sc->gre_family != 0) { 321 if ( 322 #ifdef INET 323 cmd == SIOCSIFPHYADDR || 324 #endif 325 #ifdef INET6 326 cmd == SIOCSIFPHYADDR_IN6 || 327 #endif 328 0) { 329 if_link_state_change(ifp, LINK_STATE_UP); 330 } 331 } 332 end: 333 sx_xunlock(&gre_ioctl_sx); 334 return (error); 335 } 336 337 static void 338 gre_delete_tunnel(struct gre_softc *sc) 339 { 340 341 sx_assert(&gre_ioctl_sx, SA_XLOCKED); 342 if (sc->gre_family != 0) { 343 CK_LIST_REMOVE(sc, chain); 344 CK_LIST_REMOVE(sc, srchash); 345 GRE_WAIT(); 346 free(sc->gre_hdr, M_GRE); 347 sc->gre_family = 0; 348 } 349 GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 350 if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN); 351 } 352 353 struct gre_list * 354 gre_hashinit(void) 355 { 356 struct gre_list *hash; 357 int i; 358 359 hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE, 360 M_GRE, M_WAITOK); 361 for (i = 0; i < GRE_HASH_SIZE; i++) 362 CK_LIST_INIT(&hash[i]); 363 364 return (hash); 365 } 366 367 void 368 gre_hashdestroy(struct gre_list *hash) 369 { 370 371 free(hash, M_GRE); 372 } 373 374 void 375 gre_updatehdr(struct gre_softc *sc, struct grehdr *gh) 376 { 377 uint32_t *opts; 378 uint16_t flags; 379 380 sx_assert(&gre_ioctl_sx, SA_XLOCKED); 381 382 flags = 0; 383 opts = gh->gre_opts; 384 if (sc->gre_options & GRE_ENABLE_CSUM) { 385 flags |= GRE_FLAGS_CP; 386 sc->gre_hlen += 2 * sizeof(uint16_t); 387 *opts++ = 0; 388 } 389 if (sc->gre_key != 0) { 390 flags |= GRE_FLAGS_KP; 391 sc->gre_hlen += sizeof(uint32_t); 392 *opts++ = htonl(sc->gre_key); 393 } 394 if (sc->gre_options & GRE_ENABLE_SEQ) { 395 flags |= GRE_FLAGS_SP; 396 sc->gre_hlen += sizeof(uint32_t); 397 *opts++ = 0; 398 } else 399 sc->gre_oseq = 0; 400 gh->gre_flags = htons(flags); 401 } 402 403 int 404 gre_input(struct mbuf *m, int off, int proto, void *arg) 405 { 406 struct gre_softc *sc = arg; 407 struct grehdr *gh; 408 struct ifnet *ifp; 409 uint32_t *opts; 410 #ifdef notyet 411 uint32_t key; 412 #endif 413 uint16_t flags; 414 int hlen, isr, af; 415 416 ifp = GRE2IFP(sc); 417 hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t); 418 if (m->m_pkthdr.len < hlen) 419 goto drop; 420 if (m->m_len < hlen) { 421 m = m_pullup(m, hlen); 422 if (m == NULL) 423 goto drop; 424 } 425 gh = (struct grehdr *)mtodo(m, off); 426 flags = ntohs(gh->gre_flags); 427 if (flags & ~GRE_FLAGS_MASK) 428 goto drop; 429 opts = gh->gre_opts; 430 hlen = 2 * sizeof(uint16_t); 431 if (flags & GRE_FLAGS_CP) { 432 /* reserved1 field must be zero */ 433 if (((uint16_t *)opts)[1] != 0) 434 goto drop; 435 if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0) 436 goto drop; 437 hlen += 2 * sizeof(uint16_t); 438 opts++; 439 } 440 if (flags & GRE_FLAGS_KP) { 441 #ifdef notyet 442 /* 443 * XXX: The current implementation uses the key only for outgoing 444 * packets. But we can check the key value here, or even in the 445 * encapcheck function. 446 */ 447 key = ntohl(*opts); 448 #endif 449 hlen += sizeof(uint32_t); 450 opts++; 451 } 452 #ifdef notyet 453 } else 454 key = 0; 455 456 if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) 457 goto drop; 458 #endif 459 if (flags & GRE_FLAGS_SP) { 460 #ifdef notyet 461 seq = ntohl(*opts); 462 #endif 463 hlen += sizeof(uint32_t); 464 } 465 switch (ntohs(gh->gre_proto)) { 466 case ETHERTYPE_WCCP: 467 /* 468 * For WCCP skip an additional 4 bytes if after GRE header 469 * doesn't follow an IP header. 470 */ 471 if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) 472 hlen += sizeof(uint32_t); 473 /* FALLTHROUGH */ 474 case ETHERTYPE_IP: 475 isr = NETISR_IP; 476 af = AF_INET; 477 break; 478 case ETHERTYPE_IPV6: 479 isr = NETISR_IPV6; 480 af = AF_INET6; 481 break; 482 default: 483 goto drop; 484 } 485 m_adj(m, off + hlen); 486 m_clrprotoflags(m); 487 m->m_pkthdr.rcvif = ifp; 488 M_SETFIB(m, ifp->if_fib); 489 #ifdef MAC 490 mac_ifnet_create_mbuf(ifp, m); 491 #endif 492 BPF_MTAP2(ifp, &af, sizeof(af), m); 493 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 494 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 495 if ((ifp->if_flags & IFF_MONITOR) != 0) 496 m_freem(m); 497 else 498 netisr_dispatch(isr, m); 499 return (IPPROTO_DONE); 500 drop: 501 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 502 m_freem(m); 503 return (IPPROTO_DONE); 504 } 505 506 static int 507 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 508 struct route *ro) 509 { 510 uint32_t af; 511 512 if (dst->sa_family == AF_UNSPEC) 513 bcopy(dst->sa_data, &af, sizeof(af)); 514 else 515 af = dst->sa_family; 516 /* 517 * Now save the af in the inbound pkt csum data, this is a cheat since 518 * we are using the inbound csum_data field to carry the af over to 519 * the gre_transmit() routine, avoiding using yet another mtag. 520 */ 521 m->m_pkthdr.csum_data = af; 522 return (ifp->if_transmit(ifp, m)); 523 } 524 525 static void 526 gre_setseqn(struct grehdr *gh, uint32_t seq) 527 { 528 uint32_t *opts; 529 uint16_t flags; 530 531 opts = gh->gre_opts; 532 flags = ntohs(gh->gre_flags); 533 KASSERT((flags & GRE_FLAGS_SP) != 0, 534 ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); 535 if (flags & GRE_FLAGS_CP) 536 opts++; 537 if (flags & GRE_FLAGS_KP) 538 opts++; 539 *opts = htonl(seq); 540 } 541 542 #define MTAG_GRE 1307983903 543 static int 544 gre_transmit(struct ifnet *ifp, struct mbuf *m) 545 { 546 GRE_RLOCK_TRACKER; 547 struct gre_softc *sc; 548 struct grehdr *gh; 549 uint32_t af; 550 int error, len; 551 uint16_t proto; 552 553 len = 0; 554 GRE_RLOCK(); 555 #ifdef MAC 556 error = mac_ifnet_check_transmit(ifp, m); 557 if (error) { 558 m_freem(m); 559 goto drop; 560 } 561 #endif 562 error = ENETDOWN; 563 sc = ifp->if_softc; 564 if ((ifp->if_flags & IFF_MONITOR) != 0 || 565 (ifp->if_flags & IFF_UP) == 0 || 566 (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 567 sc->gre_family == 0 || 568 (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE, 569 V_max_gre_nesting)) != 0) { 570 m_freem(m); 571 goto drop; 572 } 573 af = m->m_pkthdr.csum_data; 574 BPF_MTAP2(ifp, &af, sizeof(af), m); 575 m->m_flags &= ~(M_BCAST|M_MCAST); 576 M_SETFIB(m, sc->gre_fibnum); 577 M_PREPEND(m, sc->gre_hlen, M_NOWAIT); 578 if (m == NULL) { 579 error = ENOBUFS; 580 goto drop; 581 } 582 bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen); 583 /* Determine GRE proto */ 584 switch (af) { 585 #ifdef INET 586 case AF_INET: 587 proto = htons(ETHERTYPE_IP); 588 break; 589 #endif 590 #ifdef INET6 591 case AF_INET6: 592 proto = htons(ETHERTYPE_IPV6); 593 break; 594 #endif 595 default: 596 m_freem(m); 597 error = ENETDOWN; 598 goto drop; 599 } 600 /* Determine offset of GRE header */ 601 switch (sc->gre_family) { 602 #ifdef INET 603 case AF_INET: 604 len = sizeof(struct ip); 605 break; 606 #endif 607 #ifdef INET6 608 case AF_INET6: 609 len = sizeof(struct ip6_hdr); 610 break; 611 #endif 612 default: 613 m_freem(m); 614 error = ENETDOWN; 615 goto drop; 616 } 617 gh = (struct grehdr *)mtodo(m, len); 618 gh->gre_proto = proto; 619 if (sc->gre_options & GRE_ENABLE_SEQ) 620 gre_setseqn(gh, sc->gre_oseq++); 621 if (sc->gre_options & GRE_ENABLE_CSUM) { 622 *(uint16_t *)gh->gre_opts = in_cksum_skip(m, 623 m->m_pkthdr.len, len); 624 } 625 len = m->m_pkthdr.len - len; 626 switch (sc->gre_family) { 627 #ifdef INET 628 case AF_INET: 629 error = in_gre_output(m, af, sc->gre_hlen); 630 break; 631 #endif 632 #ifdef INET6 633 case AF_INET6: 634 error = in6_gre_output(m, af, sc->gre_hlen); 635 break; 636 #endif 637 default: 638 m_freem(m); 639 error = ENETDOWN; 640 } 641 drop: 642 if (error) 643 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 644 else { 645 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 646 if_inc_counter(ifp, IFCOUNTER_OBYTES, len); 647 } 648 GRE_RUNLOCK(); 649 return (error); 650 } 651 652 static void 653 gre_qflush(struct ifnet *ifp __unused) 654 { 655 656 } 657 658 static int 659 gremodevent(module_t mod, int type, void *data) 660 { 661 662 switch (type) { 663 case MOD_LOAD: 664 case MOD_UNLOAD: 665 break; 666 default: 667 return (EOPNOTSUPP); 668 } 669 return (0); 670 } 671 672 static moduledata_t gre_mod = { 673 "if_gre", 674 gremodevent, 675 0 676 }; 677 678 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 679 MODULE_VERSION(if_gre, 1); 680