1 /*- 2 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/jail.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mbuf.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <sys/sx.h> 43 #include <sys/sysctl.h> 44 #include <sys/syslog.h> 45 46 #include <net/bpf.h> 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/if_clone.h> 51 #include <net/if_types.h> 52 #include <net/netisr.h> 53 #include <net/vnet.h> 54 #include <net/route.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_systm.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/ip_encap.h> 62 63 #include <machine/in_cksum.h> 64 #include <security/mac/mac_framework.h> 65 66 #define MEMTU (1500 - sizeof(struct mobhdr)) 67 static const char mename[] = "me"; 68 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); 69 /* Minimal forwarding header RFC 2004 */ 70 struct mobhdr { 71 uint8_t mob_proto; /* protocol */ 72 uint8_t mob_flags; /* flags */ 73 #define MOB_FLAGS_SP 0x80 /* source present */ 74 uint16_t mob_csum; /* header checksum */ 75 struct in_addr mob_dst; /* original destination address */ 76 struct in_addr mob_src; /* original source addr (optional) */ 77 } __packed; 78 79 struct me_softc { 80 struct ifnet *me_ifp; 81 u_int me_fibnum; 82 struct in_addr me_src; 83 struct in_addr me_dst; 84 85 CK_LIST_ENTRY(me_softc) chain; 86 }; 87 CK_LIST_HEAD(me_list, me_softc); 88 #define ME2IFP(sc) ((sc)->me_ifp) 89 #define ME_READY(sc) ((sc)->me_src.s_addr != 0) 90 #define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt) 91 #define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) 92 #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) 93 94 #ifndef ME_HASH_SIZE 95 #define ME_HASH_SIZE (1 << 4) 96 #endif 97 static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL; 98 #define V_me_hashtbl VNET(me_hashtbl) 99 #define ME_HASH(src, dst) (V_me_hashtbl[\ 100 me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)]) 101 102 static struct sx me_ioctl_sx; 103 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); 104 105 static int me_clone_create(struct if_clone *, int, caddr_t); 106 static void me_clone_destroy(struct ifnet *); 107 static VNET_DEFINE(struct if_clone *, me_cloner); 108 #define V_me_cloner VNET(me_cloner) 109 110 static void me_qflush(struct ifnet *); 111 static int me_transmit(struct ifnet *, struct mbuf *); 112 static int me_ioctl(struct ifnet *, u_long, caddr_t); 113 static int me_output(struct ifnet *, struct mbuf *, 114 const struct sockaddr *, struct route *); 115 static int me_input(struct mbuf *, int, int, void *); 116 117 static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t); 118 static void me_delete_tunnel(struct me_softc *); 119 120 SYSCTL_DECL(_net_link); 121 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, 122 "Minimal Encapsulation for IP (RFC 2004)"); 123 #ifndef MAX_ME_NEST 124 #define MAX_ME_NEST 1 125 #endif 126 127 static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; 128 #define V_max_me_nesting VNET(max_me_nesting) 129 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, 130 &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); 131 132 static uint32_t 133 me_hashval(in_addr_t src, in_addr_t dst) 134 { 135 uint32_t ret; 136 137 ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); 138 return (fnv_32_buf(&dst, sizeof(dst), ret)); 139 } 140 141 static struct me_list * 142 me_hashinit(void) 143 { 144 struct me_list *hash; 145 int i; 146 147 hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE, 148 M_IFME, M_WAITOK); 149 for (i = 0; i < ME_HASH_SIZE; i++) 150 CK_LIST_INIT(&hash[i]); 151 152 return (hash); 153 } 154 155 static void 156 vnet_me_init(const void *unused __unused) 157 { 158 V_me_cloner = if_clone_simple(mename, me_clone_create, 159 me_clone_destroy, 0); 160 } 161 VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 162 vnet_me_init, NULL); 163 164 static void 165 vnet_me_uninit(const void *unused __unused) 166 { 167 168 if (V_me_hashtbl != NULL) 169 free(V_me_hashtbl, M_IFME); 170 if_clone_detach(V_me_cloner); 171 } 172 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 173 vnet_me_uninit, NULL); 174 175 static int 176 me_clone_create(struct if_clone *ifc, int unit, caddr_t params) 177 { 178 struct me_softc *sc; 179 180 sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); 181 sc->me_fibnum = curthread->td_proc->p_fibnum; 182 ME2IFP(sc) = if_alloc(IFT_TUNNEL); 183 ME2IFP(sc)->if_softc = sc; 184 if_initname(ME2IFP(sc), mename, unit); 185 186 ME2IFP(sc)->if_mtu = MEMTU;; 187 ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 188 ME2IFP(sc)->if_output = me_output; 189 ME2IFP(sc)->if_ioctl = me_ioctl; 190 ME2IFP(sc)->if_transmit = me_transmit; 191 ME2IFP(sc)->if_qflush = me_qflush; 192 ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; 193 ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; 194 if_attach(ME2IFP(sc)); 195 bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 196 return (0); 197 } 198 199 static void 200 me_clone_destroy(struct ifnet *ifp) 201 { 202 struct me_softc *sc; 203 204 sx_xlock(&me_ioctl_sx); 205 sc = ifp->if_softc; 206 me_delete_tunnel(sc); 207 bpfdetach(ifp); 208 if_detach(ifp); 209 ifp->if_softc = NULL; 210 sx_xunlock(&me_ioctl_sx); 211 212 ME_WAIT(); 213 if_free(ifp); 214 free(sc, M_IFME); 215 } 216 217 static int 218 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 219 { 220 struct ifreq *ifr = (struct ifreq *)data; 221 struct sockaddr_in *src, *dst; 222 struct me_softc *sc; 223 int error; 224 225 switch (cmd) { 226 case SIOCSIFMTU: 227 if (ifr->ifr_mtu < 576) 228 return (EINVAL); 229 ifp->if_mtu = ifr->ifr_mtu; 230 return (0); 231 case SIOCSIFADDR: 232 ifp->if_flags |= IFF_UP; 233 case SIOCSIFFLAGS: 234 case SIOCADDMULTI: 235 case SIOCDELMULTI: 236 return (0); 237 } 238 sx_xlock(&me_ioctl_sx); 239 sc = ifp->if_softc; 240 if (sc == NULL) { 241 error = ENXIO; 242 goto end; 243 } 244 error = 0; 245 switch (cmd) { 246 case SIOCSIFPHYADDR: 247 src = &((struct in_aliasreq *)data)->ifra_addr; 248 dst = &((struct in_aliasreq *)data)->ifra_dstaddr; 249 if (src->sin_family != dst->sin_family || 250 src->sin_family != AF_INET || 251 src->sin_len != dst->sin_len || 252 src->sin_len != sizeof(struct sockaddr_in)) { 253 error = EINVAL; 254 break; 255 } 256 if (src->sin_addr.s_addr == INADDR_ANY || 257 dst->sin_addr.s_addr == INADDR_ANY) { 258 error = EADDRNOTAVAIL; 259 break; 260 } 261 error = me_set_tunnel(sc, src->sin_addr.s_addr, 262 dst->sin_addr.s_addr); 263 break; 264 case SIOCDIFPHYADDR: 265 me_delete_tunnel(sc); 266 break; 267 case SIOCGIFPSRCADDR: 268 case SIOCGIFPDSTADDR: 269 if (!ME_READY(sc)) { 270 error = EADDRNOTAVAIL; 271 break; 272 } 273 src = (struct sockaddr_in *)&ifr->ifr_addr; 274 memset(src, 0, sizeof(*src)); 275 src->sin_family = AF_INET; 276 src->sin_len = sizeof(*src); 277 switch (cmd) { 278 case SIOCGIFPSRCADDR: 279 src->sin_addr = sc->me_src; 280 break; 281 case SIOCGIFPDSTADDR: 282 src->sin_addr = sc->me_dst; 283 break; 284 } 285 error = prison_if(curthread->td_ucred, sintosa(src)); 286 if (error != 0) 287 memset(src, 0, sizeof(*src)); 288 break; 289 case SIOCGTUNFIB: 290 ifr->ifr_fib = sc->me_fibnum; 291 break; 292 case SIOCSTUNFIB: 293 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 294 break; 295 if (ifr->ifr_fib >= rt_numfibs) 296 error = EINVAL; 297 else 298 sc->me_fibnum = ifr->ifr_fib; 299 break; 300 default: 301 error = EINVAL; 302 break; 303 } 304 end: 305 sx_xunlock(&me_ioctl_sx); 306 return (error); 307 } 308 309 static int 310 me_lookup(const struct mbuf *m, int off, int proto, void **arg) 311 { 312 const struct ip *ip; 313 struct me_softc *sc; 314 315 MPASS(in_epoch()); 316 ip = mtod(m, const struct ip *); 317 CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, 318 ip->ip_src.s_addr), chain) { 319 if (sc->me_src.s_addr == ip->ip_dst.s_addr && 320 sc->me_dst.s_addr == ip->ip_src.s_addr) { 321 if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) 322 return (0); 323 *arg = sc; 324 return (ENCAP_DRV_LOOKUP); 325 } 326 } 327 return (0); 328 } 329 330 static int 331 me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst) 332 { 333 struct me_softc *tmp; 334 335 sx_assert(&me_ioctl_sx, SA_XLOCKED); 336 337 if (V_me_hashtbl == NULL) 338 V_me_hashtbl = me_hashinit(); 339 340 if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst) 341 return (0); 342 343 CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) { 344 if (tmp == sc) 345 continue; 346 if (tmp->me_src.s_addr == src && 347 tmp->me_dst.s_addr == dst) 348 return (EADDRNOTAVAIL); 349 } 350 351 me_delete_tunnel(sc); 352 sc->me_dst.s_addr = dst; 353 sc->me_src.s_addr = src; 354 CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain); 355 356 ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 357 if_link_state_change(ME2IFP(sc), LINK_STATE_UP); 358 return (0); 359 } 360 361 static void 362 me_delete_tunnel(struct me_softc *sc) 363 { 364 365 sx_assert(&me_ioctl_sx, SA_XLOCKED); 366 if (ME_READY(sc)) { 367 CK_LIST_REMOVE(sc, chain); 368 ME_WAIT(); 369 370 sc->me_src.s_addr = 0; 371 sc->me_dst.s_addr = 0; 372 ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 373 if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN); 374 } 375 } 376 377 static uint16_t 378 me_in_cksum(uint16_t *p, int nwords) 379 { 380 uint32_t sum = 0; 381 382 while (nwords-- > 0) 383 sum += *p++; 384 sum = (sum >> 16) + (sum & 0xffff); 385 sum += (sum >> 16); 386 return (~sum); 387 } 388 389 static int 390 me_input(struct mbuf *m, int off, int proto, void *arg) 391 { 392 struct me_softc *sc = arg; 393 struct mobhdr *mh; 394 struct ifnet *ifp; 395 struct ip *ip; 396 int hlen; 397 398 ifp = ME2IFP(sc); 399 /* checks for short packets */ 400 hlen = sizeof(struct mobhdr); 401 if (m->m_pkthdr.len < sizeof(struct ip) + hlen) 402 hlen -= sizeof(struct in_addr); 403 if (m->m_len < sizeof(struct ip) + hlen) 404 m = m_pullup(m, sizeof(struct ip) + hlen); 405 if (m == NULL) 406 goto drop; 407 mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); 408 /* check for wrong flags */ 409 if (mh->mob_flags & (~MOB_FLAGS_SP)) { 410 m_freem(m); 411 goto drop; 412 } 413 if (mh->mob_flags) { 414 if (hlen != sizeof(struct mobhdr)) { 415 m_freem(m); 416 goto drop; 417 } 418 } else 419 hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); 420 /* check mobile header checksum */ 421 if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { 422 m_freem(m); 423 goto drop; 424 } 425 #ifdef MAC 426 mac_ifnet_create_mbuf(ifp, m); 427 #endif 428 ip = mtod(m, struct ip *); 429 ip->ip_dst = mh->mob_dst; 430 ip->ip_p = mh->mob_proto; 431 ip->ip_sum = 0; 432 ip->ip_len = htons(m->m_pkthdr.len - hlen); 433 if (mh->mob_flags) 434 ip->ip_src = mh->mob_src; 435 memmove(mtodo(m, hlen), ip, sizeof(struct ip)); 436 m_adj(m, hlen); 437 m_clrprotoflags(m); 438 m->m_pkthdr.rcvif = ifp; 439 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); 440 M_SETFIB(m, ifp->if_fib); 441 hlen = AF_INET; 442 BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); 443 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 444 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 445 if ((ifp->if_flags & IFF_MONITOR) != 0) 446 m_freem(m); 447 else 448 netisr_dispatch(NETISR_IP, m); 449 return (IPPROTO_DONE); 450 drop: 451 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 452 return (IPPROTO_DONE); 453 } 454 455 #define MTAG_ME 1414491977 456 static int 457 me_check_nesting(struct ifnet *ifp, struct mbuf *m) 458 { 459 struct m_tag *mtag; 460 int count; 461 462 count = 1; 463 mtag = NULL; 464 while ((mtag = m_tag_locate(m, MTAG_ME, 0, mtag)) != NULL) { 465 if (*(struct ifnet **)(mtag + 1) == ifp) { 466 log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); 467 return (EIO); 468 } 469 count++; 470 } 471 if (count > V_max_me_nesting) { 472 log(LOG_NOTICE, 473 "%s: if_output recursively called too many times(%d)\n", 474 ifp->if_xname, count); 475 return (EIO); 476 } 477 mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT); 478 if (mtag == NULL) 479 return (ENOMEM); 480 *(struct ifnet **)(mtag + 1) = ifp; 481 m_tag_prepend(m, mtag); 482 return (0); 483 } 484 485 static int 486 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 487 struct route *ro __unused) 488 { 489 uint32_t af; 490 491 if (dst->sa_family == AF_UNSPEC) 492 bcopy(dst->sa_data, &af, sizeof(af)); 493 else 494 af = dst->sa_family; 495 m->m_pkthdr.csum_data = af; 496 return (ifp->if_transmit(ifp, m)); 497 } 498 499 static int 500 me_transmit(struct ifnet *ifp, struct mbuf *m) 501 { 502 struct mobhdr mh; 503 struct me_softc *sc; 504 struct ip *ip; 505 uint32_t af; 506 int error, hlen, plen; 507 508 #ifdef MAC 509 error = mac_ifnet_check_transmit(ifp, m); 510 if (error != 0) 511 goto drop; 512 #endif 513 error = ENETDOWN; 514 ME_RLOCK(); 515 sc = ifp->if_softc; 516 if (sc == NULL || !ME_READY(sc) || 517 (ifp->if_flags & IFF_MONITOR) != 0 || 518 (ifp->if_flags & IFF_UP) == 0 || 519 (error = me_check_nesting(ifp, m) != 0)) { 520 m_freem(m); 521 goto drop; 522 } 523 af = m->m_pkthdr.csum_data; 524 if (af != AF_INET) { 525 error = EAFNOSUPPORT; 526 m_freem(m); 527 goto drop; 528 } 529 if (m->m_len < sizeof(struct ip)) 530 m = m_pullup(m, sizeof(struct ip)); 531 if (m == NULL) { 532 error = ENOBUFS; 533 goto drop; 534 } 535 ip = mtod(m, struct ip *); 536 /* Fragmented datagramms shouldn't be encapsulated */ 537 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 538 error = EINVAL; 539 m_freem(m); 540 goto drop; 541 } 542 mh.mob_proto = ip->ip_p; 543 mh.mob_src = ip->ip_src; 544 mh.mob_dst = ip->ip_dst; 545 if (in_hosteq(sc->me_src, ip->ip_src)) { 546 hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); 547 mh.mob_flags = 0; 548 } else { 549 hlen = sizeof(struct mobhdr); 550 mh.mob_flags = MOB_FLAGS_SP; 551 } 552 BPF_MTAP2(ifp, &af, sizeof(af), m); 553 plen = m->m_pkthdr.len; 554 ip->ip_src = sc->me_src; 555 ip->ip_dst = sc->me_dst; 556 m->m_flags &= ~(M_BCAST|M_MCAST); 557 M_SETFIB(m, sc->me_fibnum); 558 M_PREPEND(m, hlen, M_NOWAIT); 559 if (m == NULL) { 560 error = ENOBUFS; 561 goto drop; 562 } 563 if (m->m_len < sizeof(struct ip) + hlen) 564 m = m_pullup(m, sizeof(struct ip) + hlen); 565 if (m == NULL) { 566 error = ENOBUFS; 567 goto drop; 568 } 569 memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); 570 ip = mtod(m, struct ip *); 571 ip->ip_len = htons(m->m_pkthdr.len); 572 ip->ip_p = IPPROTO_MOBILE; 573 ip->ip_sum = 0; 574 mh.mob_csum = 0; 575 mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); 576 bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); 577 error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 578 drop: 579 if (error) 580 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 581 else { 582 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 583 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 584 } 585 ME_RUNLOCK(); 586 return (error); 587 } 588 589 static void 590 me_qflush(struct ifnet *ifp __unused) 591 { 592 593 } 594 595 static const struct encaptab *ecookie = NULL; 596 static const struct encap_config me_encap_cfg = { 597 .proto = IPPROTO_MOBILE, 598 .min_length = sizeof(struct ip) + sizeof(struct mobhdr) - 599 sizeof(in_addr_t), 600 .exact_match = ENCAP_DRV_LOOKUP, 601 .lookup = me_lookup, 602 .input = me_input 603 }; 604 605 static int 606 memodevent(module_t mod, int type, void *data) 607 { 608 609 switch (type) { 610 case MOD_LOAD: 611 ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK); 612 break; 613 case MOD_UNLOAD: 614 ip_encap_detach(ecookie); 615 break; 616 default: 617 return (EOPNOTSUPP); 618 } 619 return (0); 620 } 621 622 static moduledata_t me_mod = { 623 "if_me", 624 memodevent, 625 0 626 }; 627 628 DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 629 MODULE_VERSION(if_me, 1); 630