1 /*- 2 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/jail.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/mbuf.h> 38 #include <sys/priv.h> 39 #include <sys/proc.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <sys/sx.h> 43 #include <sys/sysctl.h> 44 #include <sys/syslog.h> 45 46 #include <net/bpf.h> 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/if_clone.h> 51 #include <net/if_types.h> 52 #include <net/netisr.h> 53 #include <net/vnet.h> 54 #include <net/route.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_systm.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/ip_encap.h> 62 63 #include <machine/in_cksum.h> 64 #include <security/mac/mac_framework.h> 65 66 #define MEMTU (1500 - sizeof(struct mobhdr)) 67 static const char mename[] = "me"; 68 static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); 69 /* Minimal forwarding header RFC 2004 */ 70 struct mobhdr { 71 uint8_t mob_proto; /* protocol */ 72 uint8_t mob_flags; /* flags */ 73 #define MOB_FLAGS_SP 0x80 /* source present */ 74 uint16_t mob_csum; /* header checksum */ 75 struct in_addr mob_dst; /* original destination address */ 76 struct in_addr mob_src; /* original source addr (optional) */ 77 } __packed; 78 79 struct me_softc { 80 struct ifnet *me_ifp; 81 u_int me_fibnum; 82 struct in_addr me_src; 83 struct in_addr me_dst; 84 85 CK_LIST_ENTRY(me_softc) chain; 86 }; 87 CK_LIST_HEAD(me_list, me_softc); 88 #define ME2IFP(sc) ((sc)->me_ifp) 89 #define ME_READY(sc) ((sc)->me_src.s_addr != 0) 90 #define ME_RLOCK() struct epoch_tracker me_et; epoch_enter_preempt(net_epoch_preempt, &me_et) 91 #define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &me_et) 92 #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) 93 94 #ifndef ME_HASH_SIZE 95 #define ME_HASH_SIZE (1 << 4) 96 #endif 97 VNET_DEFINE_STATIC(struct me_list *, me_hashtbl) = NULL; 98 #define V_me_hashtbl VNET(me_hashtbl) 99 #define ME_HASH(src, dst) (V_me_hashtbl[\ 100 me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)]) 101 102 static struct sx me_ioctl_sx; 103 SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); 104 105 static int me_clone_create(struct if_clone *, int, caddr_t); 106 static void me_clone_destroy(struct ifnet *); 107 VNET_DEFINE_STATIC(struct if_clone *, me_cloner); 108 #define V_me_cloner VNET(me_cloner) 109 110 static void me_qflush(struct ifnet *); 111 static int me_transmit(struct ifnet *, struct mbuf *); 112 static int me_ioctl(struct ifnet *, u_long, caddr_t); 113 static int me_output(struct ifnet *, struct mbuf *, 114 const struct sockaddr *, struct route *); 115 static int me_input(struct mbuf *, int, int, void *); 116 117 static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t); 118 static void me_delete_tunnel(struct me_softc *); 119 120 SYSCTL_DECL(_net_link); 121 static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, 122 "Minimal Encapsulation for IP (RFC 2004)"); 123 #ifndef MAX_ME_NEST 124 #define MAX_ME_NEST 1 125 #endif 126 127 VNET_DEFINE_STATIC(int, max_me_nesting) = MAX_ME_NEST; 128 #define V_max_me_nesting VNET(max_me_nesting) 129 SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, 130 &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); 131 132 static uint32_t 133 me_hashval(in_addr_t src, in_addr_t dst) 134 { 135 uint32_t ret; 136 137 ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); 138 return (fnv_32_buf(&dst, sizeof(dst), ret)); 139 } 140 141 static struct me_list * 142 me_hashinit(void) 143 { 144 struct me_list *hash; 145 int i; 146 147 hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE, 148 M_IFME, M_WAITOK); 149 for (i = 0; i < ME_HASH_SIZE; i++) 150 CK_LIST_INIT(&hash[i]); 151 152 return (hash); 153 } 154 155 static void 156 vnet_me_init(const void *unused __unused) 157 { 158 V_me_cloner = if_clone_simple(mename, me_clone_create, 159 me_clone_destroy, 0); 160 } 161 VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 162 vnet_me_init, NULL); 163 164 static void 165 vnet_me_uninit(const void *unused __unused) 166 { 167 168 if (V_me_hashtbl != NULL) 169 free(V_me_hashtbl, M_IFME); 170 if_clone_detach(V_me_cloner); 171 } 172 VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 173 vnet_me_uninit, NULL); 174 175 static int 176 me_clone_create(struct if_clone *ifc, int unit, caddr_t params) 177 { 178 struct me_softc *sc; 179 180 sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); 181 sc->me_fibnum = curthread->td_proc->p_fibnum; 182 ME2IFP(sc) = if_alloc(IFT_TUNNEL); 183 ME2IFP(sc)->if_softc = sc; 184 if_initname(ME2IFP(sc), mename, unit); 185 186 ME2IFP(sc)->if_mtu = MEMTU;; 187 ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 188 ME2IFP(sc)->if_output = me_output; 189 ME2IFP(sc)->if_ioctl = me_ioctl; 190 ME2IFP(sc)->if_transmit = me_transmit; 191 ME2IFP(sc)->if_qflush = me_qflush; 192 ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; 193 ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; 194 if_attach(ME2IFP(sc)); 195 bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 196 return (0); 197 } 198 199 static void 200 me_clone_destroy(struct ifnet *ifp) 201 { 202 struct me_softc *sc; 203 204 sx_xlock(&me_ioctl_sx); 205 sc = ifp->if_softc; 206 me_delete_tunnel(sc); 207 bpfdetach(ifp); 208 if_detach(ifp); 209 ifp->if_softc = NULL; 210 sx_xunlock(&me_ioctl_sx); 211 212 ME_WAIT(); 213 if_free(ifp); 214 free(sc, M_IFME); 215 } 216 217 static int 218 me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 219 { 220 struct ifreq *ifr = (struct ifreq *)data; 221 struct sockaddr_in *src, *dst; 222 struct me_softc *sc; 223 int error; 224 225 switch (cmd) { 226 case SIOCSIFMTU: 227 if (ifr->ifr_mtu < 576) 228 return (EINVAL); 229 ifp->if_mtu = ifr->ifr_mtu; 230 return (0); 231 case SIOCSIFADDR: 232 ifp->if_flags |= IFF_UP; 233 case SIOCSIFFLAGS: 234 case SIOCADDMULTI: 235 case SIOCDELMULTI: 236 return (0); 237 } 238 sx_xlock(&me_ioctl_sx); 239 sc = ifp->if_softc; 240 if (sc == NULL) { 241 error = ENXIO; 242 goto end; 243 } 244 error = 0; 245 switch (cmd) { 246 case SIOCSIFPHYADDR: 247 src = &((struct in_aliasreq *)data)->ifra_addr; 248 dst = &((struct in_aliasreq *)data)->ifra_dstaddr; 249 if (src->sin_family != dst->sin_family || 250 src->sin_family != AF_INET || 251 src->sin_len != dst->sin_len || 252 src->sin_len != sizeof(struct sockaddr_in)) { 253 error = EINVAL; 254 break; 255 } 256 if (src->sin_addr.s_addr == INADDR_ANY || 257 dst->sin_addr.s_addr == INADDR_ANY) { 258 error = EADDRNOTAVAIL; 259 break; 260 } 261 error = me_set_tunnel(sc, src->sin_addr.s_addr, 262 dst->sin_addr.s_addr); 263 break; 264 case SIOCDIFPHYADDR: 265 me_delete_tunnel(sc); 266 break; 267 case SIOCGIFPSRCADDR: 268 case SIOCGIFPDSTADDR: 269 if (!ME_READY(sc)) { 270 error = EADDRNOTAVAIL; 271 break; 272 } 273 src = (struct sockaddr_in *)&ifr->ifr_addr; 274 memset(src, 0, sizeof(*src)); 275 src->sin_family = AF_INET; 276 src->sin_len = sizeof(*src); 277 switch (cmd) { 278 case SIOCGIFPSRCADDR: 279 src->sin_addr = sc->me_src; 280 break; 281 case SIOCGIFPDSTADDR: 282 src->sin_addr = sc->me_dst; 283 break; 284 } 285 error = prison_if(curthread->td_ucred, sintosa(src)); 286 if (error != 0) 287 memset(src, 0, sizeof(*src)); 288 break; 289 case SIOCGTUNFIB: 290 ifr->ifr_fib = sc->me_fibnum; 291 break; 292 case SIOCSTUNFIB: 293 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 294 break; 295 if (ifr->ifr_fib >= rt_numfibs) 296 error = EINVAL; 297 else 298 sc->me_fibnum = ifr->ifr_fib; 299 break; 300 default: 301 error = EINVAL; 302 break; 303 } 304 end: 305 sx_xunlock(&me_ioctl_sx); 306 return (error); 307 } 308 309 static int 310 me_lookup(const struct mbuf *m, int off, int proto, void **arg) 311 { 312 const struct ip *ip; 313 struct me_softc *sc; 314 315 if (V_me_hashtbl == NULL) 316 return (0); 317 318 MPASS(in_epoch(net_epoch_preempt)); 319 ip = mtod(m, const struct ip *); 320 CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, 321 ip->ip_src.s_addr), chain) { 322 if (sc->me_src.s_addr == ip->ip_dst.s_addr && 323 sc->me_dst.s_addr == ip->ip_src.s_addr) { 324 if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) 325 return (0); 326 *arg = sc; 327 return (ENCAP_DRV_LOOKUP); 328 } 329 } 330 return (0); 331 } 332 333 static int 334 me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst) 335 { 336 struct me_softc *tmp; 337 338 sx_assert(&me_ioctl_sx, SA_XLOCKED); 339 340 if (V_me_hashtbl == NULL) 341 V_me_hashtbl = me_hashinit(); 342 343 if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst) 344 return (0); 345 346 CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) { 347 if (tmp == sc) 348 continue; 349 if (tmp->me_src.s_addr == src && 350 tmp->me_dst.s_addr == dst) 351 return (EADDRNOTAVAIL); 352 } 353 354 me_delete_tunnel(sc); 355 sc->me_dst.s_addr = dst; 356 sc->me_src.s_addr = src; 357 CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain); 358 359 ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 360 if_link_state_change(ME2IFP(sc), LINK_STATE_UP); 361 return (0); 362 } 363 364 static void 365 me_delete_tunnel(struct me_softc *sc) 366 { 367 368 sx_assert(&me_ioctl_sx, SA_XLOCKED); 369 if (ME_READY(sc)) { 370 CK_LIST_REMOVE(sc, chain); 371 ME_WAIT(); 372 373 sc->me_src.s_addr = 0; 374 sc->me_dst.s_addr = 0; 375 ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 376 if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN); 377 } 378 } 379 380 static uint16_t 381 me_in_cksum(uint16_t *p, int nwords) 382 { 383 uint32_t sum = 0; 384 385 while (nwords-- > 0) 386 sum += *p++; 387 sum = (sum >> 16) + (sum & 0xffff); 388 sum += (sum >> 16); 389 return (~sum); 390 } 391 392 static int 393 me_input(struct mbuf *m, int off, int proto, void *arg) 394 { 395 struct me_softc *sc = arg; 396 struct mobhdr *mh; 397 struct ifnet *ifp; 398 struct ip *ip; 399 int hlen; 400 401 ifp = ME2IFP(sc); 402 /* checks for short packets */ 403 hlen = sizeof(struct mobhdr); 404 if (m->m_pkthdr.len < sizeof(struct ip) + hlen) 405 hlen -= sizeof(struct in_addr); 406 if (m->m_len < sizeof(struct ip) + hlen) 407 m = m_pullup(m, sizeof(struct ip) + hlen); 408 if (m == NULL) 409 goto drop; 410 mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); 411 /* check for wrong flags */ 412 if (mh->mob_flags & (~MOB_FLAGS_SP)) { 413 m_freem(m); 414 goto drop; 415 } 416 if (mh->mob_flags) { 417 if (hlen != sizeof(struct mobhdr)) { 418 m_freem(m); 419 goto drop; 420 } 421 } else 422 hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); 423 /* check mobile header checksum */ 424 if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { 425 m_freem(m); 426 goto drop; 427 } 428 #ifdef MAC 429 mac_ifnet_create_mbuf(ifp, m); 430 #endif 431 ip = mtod(m, struct ip *); 432 ip->ip_dst = mh->mob_dst; 433 ip->ip_p = mh->mob_proto; 434 ip->ip_sum = 0; 435 ip->ip_len = htons(m->m_pkthdr.len - hlen); 436 if (mh->mob_flags) 437 ip->ip_src = mh->mob_src; 438 memmove(mtodo(m, hlen), ip, sizeof(struct ip)); 439 m_adj(m, hlen); 440 m_clrprotoflags(m); 441 m->m_pkthdr.rcvif = ifp; 442 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); 443 M_SETFIB(m, ifp->if_fib); 444 hlen = AF_INET; 445 BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); 446 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 447 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 448 if ((ifp->if_flags & IFF_MONITOR) != 0) 449 m_freem(m); 450 else 451 netisr_dispatch(NETISR_IP, m); 452 return (IPPROTO_DONE); 453 drop: 454 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 455 return (IPPROTO_DONE); 456 } 457 458 static int 459 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 460 struct route *ro __unused) 461 { 462 uint32_t af; 463 464 if (dst->sa_family == AF_UNSPEC) 465 bcopy(dst->sa_data, &af, sizeof(af)); 466 else 467 af = dst->sa_family; 468 m->m_pkthdr.csum_data = af; 469 return (ifp->if_transmit(ifp, m)); 470 } 471 472 #define MTAG_ME 1414491977 473 static int 474 me_transmit(struct ifnet *ifp, struct mbuf *m) 475 { 476 struct mobhdr mh; 477 struct me_softc *sc; 478 struct ip *ip; 479 uint32_t af; 480 int error, hlen, plen; 481 482 #ifdef MAC 483 error = mac_ifnet_check_transmit(ifp, m); 484 if (error != 0) 485 goto drop; 486 #endif 487 error = ENETDOWN; 488 ME_RLOCK(); 489 sc = ifp->if_softc; 490 if (sc == NULL || !ME_READY(sc) || 491 (ifp->if_flags & IFF_MONITOR) != 0 || 492 (ifp->if_flags & IFF_UP) == 0 || 493 (error = if_tunnel_check_nesting(ifp, m, MTAG_ME, 494 V_max_me_nesting)) != 0) { 495 m_freem(m); 496 goto drop; 497 } 498 af = m->m_pkthdr.csum_data; 499 if (af != AF_INET) { 500 error = EAFNOSUPPORT; 501 m_freem(m); 502 goto drop; 503 } 504 if (m->m_len < sizeof(struct ip)) 505 m = m_pullup(m, sizeof(struct ip)); 506 if (m == NULL) { 507 error = ENOBUFS; 508 goto drop; 509 } 510 ip = mtod(m, struct ip *); 511 /* Fragmented datagramms shouldn't be encapsulated */ 512 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 513 error = EINVAL; 514 m_freem(m); 515 goto drop; 516 } 517 mh.mob_proto = ip->ip_p; 518 mh.mob_src = ip->ip_src; 519 mh.mob_dst = ip->ip_dst; 520 if (in_hosteq(sc->me_src, ip->ip_src)) { 521 hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); 522 mh.mob_flags = 0; 523 } else { 524 hlen = sizeof(struct mobhdr); 525 mh.mob_flags = MOB_FLAGS_SP; 526 } 527 BPF_MTAP2(ifp, &af, sizeof(af), m); 528 plen = m->m_pkthdr.len; 529 ip->ip_src = sc->me_src; 530 ip->ip_dst = sc->me_dst; 531 m->m_flags &= ~(M_BCAST|M_MCAST); 532 M_SETFIB(m, sc->me_fibnum); 533 M_PREPEND(m, hlen, M_NOWAIT); 534 if (m == NULL) { 535 error = ENOBUFS; 536 goto drop; 537 } 538 if (m->m_len < sizeof(struct ip) + hlen) 539 m = m_pullup(m, sizeof(struct ip) + hlen); 540 if (m == NULL) { 541 error = ENOBUFS; 542 goto drop; 543 } 544 memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); 545 ip = mtod(m, struct ip *); 546 ip->ip_len = htons(m->m_pkthdr.len); 547 ip->ip_p = IPPROTO_MOBILE; 548 ip->ip_sum = 0; 549 mh.mob_csum = 0; 550 mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); 551 bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); 552 error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 553 drop: 554 if (error) 555 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 556 else { 557 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 558 if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); 559 } 560 ME_RUNLOCK(); 561 return (error); 562 } 563 564 static void 565 me_qflush(struct ifnet *ifp __unused) 566 { 567 568 } 569 570 static const struct encaptab *ecookie = NULL; 571 static const struct encap_config me_encap_cfg = { 572 .proto = IPPROTO_MOBILE, 573 .min_length = sizeof(struct ip) + sizeof(struct mobhdr) - 574 sizeof(in_addr_t), 575 .exact_match = ENCAP_DRV_LOOKUP, 576 .lookup = me_lookup, 577 .input = me_input 578 }; 579 580 static int 581 memodevent(module_t mod, int type, void *data) 582 { 583 584 switch (type) { 585 case MOD_LOAD: 586 ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK); 587 break; 588 case MOD_UNLOAD: 589 ip_encap_detach(ecookie); 590 break; 591 default: 592 return (EOPNOTSUPP); 593 } 594 return (0); 595 } 596 597 static moduledata_t me_mod = { 598 "if_me", 599 memodevent, 600 0 601 }; 602 603 DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 604 MODULE_VERSION(if_me, 1); 605