1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 1998 The NetBSD Foundation, Inc. 5 * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org> 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 * 34 * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 41 #include <sys/param.h> 42 #include <sys/jail.h> 43 #include <sys/systm.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sockio.h> 47 #include <sys/mbuf.h> 48 #include <sys/errno.h> 49 #include <sys/kernel.h> 50 #include <sys/sysctl.h> 51 #include <sys/malloc.h> 52 #include <sys/proc.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_private.h> 57 #include <net/vnet.h> 58 59 #include <netinet/in.h> 60 #include <netinet/in_var.h> 61 #include <netinet/in_pcb.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_encap.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/udp.h> 66 #include <netinet/udp_var.h> 67 68 #ifdef INET6 69 #include <netinet/ip6.h> 70 #endif 71 72 #include <net/if_gre.h> 73 #include <machine/in_cksum.h> 74 75 #define GRE_TTL 30 76 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL; 77 #define V_ip_gre_ttl VNET(ip_gre_ttl) 78 SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW, 79 &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets"); 80 81 struct in_gre_socket { 82 struct gre_socket base; 83 in_addr_t addr; 84 }; 85 VNET_DEFINE_STATIC(struct gre_sockets *, ipv4_sockets) = NULL; 86 VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL; 87 VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL; 88 #define V_ipv4_sockets VNET(ipv4_sockets) 89 #define V_ipv4_hashtbl VNET(ipv4_hashtbl) 90 #define V_ipv4_srchashtbl VNET(ipv4_srchashtbl) 91 #define GRE_HASH(src, dst) (V_ipv4_hashtbl[\ 92 in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)]) 93 #define GRE_SRCHASH(src) (V_ipv4_srchashtbl[\ 94 fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)]) 95 #define GRE_SOCKHASH(src) (V_ipv4_sockets[\ 96 fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)]) 97 #define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\ 98 (sc)->gre_oip.ip_dst.s_addr) 99 100 static uint32_t 101 in_gre_hashval(in_addr_t src, in_addr_t dst) 102 { 103 uint32_t ret; 104 105 ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); 106 return (fnv_32_buf(&dst, sizeof(dst), ret)); 107 } 108 109 static struct gre_socket* 110 in_gre_lookup_socket(in_addr_t addr) 111 { 112 struct gre_socket *gs; 113 struct in_gre_socket *s; 114 115 CK_LIST_FOREACH(gs, &GRE_SOCKHASH(addr), chain) { 116 s = __containerof(gs, struct in_gre_socket, base); 117 if (s->addr == addr) 118 break; 119 } 120 return (gs); 121 } 122 123 static int 124 in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst, 125 uint32_t opts) 126 { 127 struct gre_list *head; 128 struct gre_softc *tmp; 129 struct gre_socket *gs; 130 131 if (sc->gre_family == AF_INET && 132 sc->gre_oip.ip_src.s_addr == src && 133 sc->gre_oip.ip_dst.s_addr == dst && 134 (sc->gre_options & GRE_UDPENCAP) == (opts & GRE_UDPENCAP)) 135 return (EEXIST); 136 137 if (opts & GRE_UDPENCAP) { 138 gs = in_gre_lookup_socket(src); 139 if (gs == NULL) 140 return (0); 141 head = &gs->list; 142 } else 143 head = &GRE_HASH(src, dst); 144 145 CK_LIST_FOREACH(tmp, head, chain) { 146 if (tmp == sc) 147 continue; 148 if (tmp->gre_oip.ip_src.s_addr == src && 149 tmp->gre_oip.ip_dst.s_addr == dst) 150 return (EADDRNOTAVAIL); 151 } 152 return (0); 153 } 154 155 static int 156 in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg) 157 { 158 const struct ip *ip; 159 struct gre_softc *sc; 160 161 if (V_ipv4_hashtbl == NULL) 162 return (0); 163 164 NET_EPOCH_ASSERT(); 165 ip = mtod(m, const struct ip *); 166 CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr, 167 ip->ip_src.s_addr), chain) { 168 /* 169 * This is an inbound packet, its ip_dst is source address 170 * in softc. 171 */ 172 if (sc->gre_oip.ip_src.s_addr == ip->ip_dst.s_addr && 173 sc->gre_oip.ip_dst.s_addr == ip->ip_src.s_addr) { 174 if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0) 175 return (0); 176 *arg = sc; 177 return (ENCAP_DRV_LOOKUP); 178 } 179 } 180 return (0); 181 } 182 183 /* 184 * Check that ingress address belongs to local host. 185 */ 186 static void 187 in_gre_set_running(struct gre_softc *sc) 188 { 189 190 if (in_localip(sc->gre_oip.ip_src)) 191 GRE2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; 192 else 193 GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; 194 } 195 196 /* 197 * ifaddr_event handler. 198 * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent 199 * source address spoofing. 200 */ 201 static void 202 in_gre_srcaddr(void *arg __unused, const struct sockaddr *sa, 203 int event __unused) 204 { 205 const struct sockaddr_in *sin; 206 struct gre_softc *sc; 207 208 /* Check that VNET is ready */ 209 if (V_ipv4_hashtbl == NULL) 210 return; 211 212 NET_EPOCH_ASSERT(); 213 sin = (const struct sockaddr_in *)sa; 214 CK_LIST_FOREACH(sc, &GRE_SRCHASH(sin->sin_addr.s_addr), srchash) { 215 if (sc->gre_oip.ip_src.s_addr != sin->sin_addr.s_addr) 216 continue; 217 in_gre_set_running(sc); 218 } 219 } 220 221 static bool 222 in_gre_udp_input(struct mbuf *m, int off, struct inpcb *inp, 223 const struct sockaddr *sa, void *ctx) 224 { 225 struct gre_socket *gs; 226 struct gre_softc *sc; 227 in_addr_t dst; 228 229 NET_EPOCH_ASSERT(); 230 231 gs = (struct gre_socket *)ctx; 232 dst = ((const struct sockaddr_in *)sa)->sin_addr.s_addr; 233 CK_LIST_FOREACH(sc, &gs->list, chain) { 234 if (sc->gre_oip.ip_dst.s_addr == dst) 235 break; 236 } 237 if (sc != NULL && (GRE2IFP(sc)->if_flags & IFF_UP) != 0){ 238 gre_input(m, off + sizeof(struct udphdr), IPPROTO_UDP, sc); 239 return (true); 240 } 241 m_freem(m); 242 243 return (true); 244 } 245 246 static int 247 in_gre_setup_socket(struct gre_softc *sc) 248 { 249 struct sockopt sopt; 250 struct sockaddr_in sin; 251 struct in_gre_socket *s; 252 struct gre_socket *gs; 253 in_addr_t addr; 254 int error, value; 255 256 /* 257 * NOTE: we are protected with gre_ioctl_sx lock. 258 * 259 * First check that socket is already configured. 260 * If so, check that source address was not changed. 261 * If address is different, check that there are no other tunnels 262 * and close socket. 263 */ 264 addr = sc->gre_oip.ip_src.s_addr; 265 gs = sc->gre_so; 266 if (gs != NULL) { 267 s = __containerof(gs, struct in_gre_socket, base); 268 if (s->addr != addr) { 269 if (CK_LIST_EMPTY(&gs->list)) { 270 CK_LIST_REMOVE(gs, chain); 271 soclose(gs->so); 272 NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx); 273 } 274 gs = sc->gre_so = NULL; 275 } 276 } 277 278 if (gs == NULL) { 279 /* 280 * Check that socket for given address is already 281 * configured. 282 */ 283 gs = in_gre_lookup_socket(addr); 284 if (gs == NULL) { 285 s = malloc(sizeof(*s), M_GRE, M_WAITOK | M_ZERO); 286 s->addr = addr; 287 gs = &s->base; 288 289 error = socreate(sc->gre_family, &gs->so, 290 SOCK_DGRAM, IPPROTO_UDP, curthread->td_ucred, 291 curthread); 292 if (error != 0) { 293 if_printf(GRE2IFP(sc), 294 "cannot create socket: %d\n", error); 295 free(s, M_GRE); 296 return (error); 297 } 298 299 error = udp_set_kernel_tunneling(gs->so, 300 in_gre_udp_input, NULL, gs); 301 if (error != 0) { 302 if_printf(GRE2IFP(sc), 303 "cannot set UDP tunneling: %d\n", error); 304 goto fail; 305 } 306 307 memset(&sopt, 0, sizeof(sopt)); 308 sopt.sopt_dir = SOPT_SET; 309 sopt.sopt_level = IPPROTO_IP; 310 sopt.sopt_name = IP_BINDANY; 311 sopt.sopt_val = &value; 312 sopt.sopt_valsize = sizeof(value); 313 value = 1; 314 error = sosetopt(gs->so, &sopt); 315 if (error != 0) { 316 if_printf(GRE2IFP(sc), 317 "cannot set IP_BINDANY opt: %d\n", error); 318 goto fail; 319 } 320 321 memset(&sin, 0, sizeof(sin)); 322 sin.sin_family = AF_INET; 323 sin.sin_len = sizeof(sin); 324 sin.sin_addr.s_addr = addr; 325 sin.sin_port = htons(GRE_UDPPORT); 326 error = sobind(gs->so, (struct sockaddr *)&sin, 327 curthread); 328 if (error != 0) { 329 if_printf(GRE2IFP(sc), 330 "cannot bind socket: %d\n", error); 331 goto fail; 332 } 333 /* Add socket to the chain */ 334 CK_LIST_INSERT_HEAD(&GRE_SOCKHASH(addr), gs, chain); 335 } 336 } 337 338 /* Add softc to the socket's list */ 339 CK_LIST_INSERT_HEAD(&gs->list, sc, chain); 340 sc->gre_so = gs; 341 return (0); 342 fail: 343 soclose(gs->so); 344 free(s, M_GRE); 345 return (error); 346 } 347 348 static int 349 in_gre_attach(struct gre_softc *sc) 350 { 351 struct epoch_tracker et; 352 struct grehdr *gh; 353 int error; 354 355 if (sc->gre_options & GRE_UDPENCAP) { 356 sc->gre_csumflags = CSUM_UDP; 357 sc->gre_hlen = sizeof(struct greudp); 358 sc->gre_oip.ip_p = IPPROTO_UDP; 359 gh = &sc->gre_udphdr->gi_gre; 360 gre_update_udphdr(sc, &sc->gre_udp, 361 in_pseudo(sc->gre_oip.ip_src.s_addr, 362 sc->gre_oip.ip_dst.s_addr, 0)); 363 } else { 364 sc->gre_hlen = sizeof(struct greip); 365 sc->gre_oip.ip_p = IPPROTO_GRE; 366 gh = &sc->gre_iphdr->gi_gre; 367 } 368 sc->gre_oip.ip_v = IPVERSION; 369 sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; 370 gre_update_hdr(sc, gh); 371 372 /* 373 * If we return error, this means that sc is not linked, 374 * and caller should reset gre_family and free(sc->gre_hdr). 375 */ 376 if (sc->gre_options & GRE_UDPENCAP) { 377 error = in_gre_setup_socket(sc); 378 if (error != 0) 379 return (error); 380 } else 381 CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain); 382 CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr), 383 sc, srchash); 384 385 /* Set IFF_DRV_RUNNING if interface is ready */ 386 NET_EPOCH_ENTER(et); 387 in_gre_set_running(sc); 388 NET_EPOCH_EXIT(et); 389 return (0); 390 } 391 392 int 393 in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value) 394 { 395 int error; 396 397 /* NOTE: we are protected with gre_ioctl_sx lock */ 398 MPASS(cmd == GRESKEY || cmd == GRESOPTS || cmd == GRESPORT); 399 MPASS(sc->gre_family == AF_INET); 400 401 /* 402 * If we are going to change encapsulation protocol, do check 403 * for duplicate tunnels. Return EEXIST here to do not confuse 404 * user. 405 */ 406 if (cmd == GRESOPTS && 407 (sc->gre_options & GRE_UDPENCAP) != (value & GRE_UDPENCAP) && 408 in_gre_checkdup(sc, sc->gre_oip.ip_src.s_addr, 409 sc->gre_oip.ip_dst.s_addr, value) == EADDRNOTAVAIL) 410 return (EEXIST); 411 412 CK_LIST_REMOVE(sc, chain); 413 CK_LIST_REMOVE(sc, srchash); 414 GRE_WAIT(); 415 switch (cmd) { 416 case GRESKEY: 417 sc->gre_key = value; 418 break; 419 case GRESOPTS: 420 sc->gre_options = value; 421 break; 422 case GRESPORT: 423 sc->gre_port = value; 424 break; 425 } 426 error = in_gre_attach(sc); 427 if (error != 0) { 428 sc->gre_family = 0; 429 free(sc->gre_hdr, M_GRE); 430 } 431 return (error); 432 } 433 434 int 435 in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) 436 { 437 struct ifreq *ifr = (struct ifreq *)data; 438 struct sockaddr_in *dst, *src; 439 struct ip *ip; 440 int error; 441 442 /* NOTE: we are protected with gre_ioctl_sx lock */ 443 error = EINVAL; 444 switch (cmd) { 445 case SIOCSIFPHYADDR: 446 src = &((struct in_aliasreq *)data)->ifra_addr; 447 dst = &((struct in_aliasreq *)data)->ifra_dstaddr; 448 449 /* sanity checks */ 450 if (src->sin_family != dst->sin_family || 451 src->sin_family != AF_INET || 452 src->sin_len != dst->sin_len || 453 src->sin_len != sizeof(*src)) 454 break; 455 if (src->sin_addr.s_addr == INADDR_ANY || 456 dst->sin_addr.s_addr == INADDR_ANY) { 457 error = EADDRNOTAVAIL; 458 break; 459 } 460 if (V_ipv4_hashtbl == NULL) { 461 V_ipv4_hashtbl = gre_hashinit(); 462 V_ipv4_srchashtbl = gre_hashinit(); 463 V_ipv4_sockets = (struct gre_sockets *)gre_hashinit(); 464 } 465 error = in_gre_checkdup(sc, src->sin_addr.s_addr, 466 dst->sin_addr.s_addr, sc->gre_options); 467 if (error == EADDRNOTAVAIL) 468 break; 469 if (error == EEXIST) { 470 /* Addresses are the same. Just return. */ 471 error = 0; 472 break; 473 } 474 ip = malloc(sizeof(struct greudp) + 3 * sizeof(uint32_t), 475 M_GRE, M_WAITOK | M_ZERO); 476 ip->ip_src.s_addr = src->sin_addr.s_addr; 477 ip->ip_dst.s_addr = dst->sin_addr.s_addr; 478 if (sc->gre_family != 0) { 479 /* Detach existing tunnel first */ 480 CK_LIST_REMOVE(sc, chain); 481 CK_LIST_REMOVE(sc, srchash); 482 GRE_WAIT(); 483 free(sc->gre_hdr, M_GRE); 484 /* XXX: should we notify about link state change? */ 485 } 486 sc->gre_family = AF_INET; 487 sc->gre_hdr = ip; 488 sc->gre_oseq = 0; 489 sc->gre_iseq = UINT32_MAX; 490 error = in_gre_attach(sc); 491 if (error != 0) { 492 sc->gre_family = 0; 493 free(sc->gre_hdr, M_GRE); 494 } 495 break; 496 case SIOCGIFPSRCADDR: 497 case SIOCGIFPDSTADDR: 498 if (sc->gre_family != AF_INET) { 499 error = EADDRNOTAVAIL; 500 break; 501 } 502 src = (struct sockaddr_in *)&ifr->ifr_addr; 503 memset(src, 0, sizeof(*src)); 504 src->sin_family = AF_INET; 505 src->sin_len = sizeof(*src); 506 src->sin_addr = (cmd == SIOCGIFPSRCADDR) ? 507 sc->gre_oip.ip_src: sc->gre_oip.ip_dst; 508 error = prison_if(curthread->td_ucred, (struct sockaddr *)src); 509 if (error != 0) 510 memset(src, 0, sizeof(*src)); 511 break; 512 } 513 return (error); 514 } 515 516 int 517 in_gre_output(struct mbuf *m, int af, int hlen) 518 { 519 struct greip *gi; 520 521 gi = mtod(m, struct greip *); 522 switch (af) { 523 case AF_INET: 524 /* 525 * gre_transmit() has used M_PREPEND() that doesn't guarantee 526 * m_data is contiguous more than hlen bytes. Use m_copydata() 527 * here to avoid m_pullup(). 528 */ 529 m_copydata(m, hlen + offsetof(struct ip, ip_tos), 530 sizeof(u_char), &gi->gi_ip.ip_tos); 531 m_copydata(m, hlen + offsetof(struct ip, ip_id), 532 sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id); 533 break; 534 #ifdef INET6 535 case AF_INET6: 536 gi->gi_ip.ip_tos = 0; /* XXX */ 537 ip_fillid(&gi->gi_ip); 538 break; 539 #endif 540 } 541 gi->gi_ip.ip_ttl = V_ip_gre_ttl; 542 gi->gi_ip.ip_len = htons(m->m_pkthdr.len); 543 return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL)); 544 } 545 546 static const struct srcaddrtab *ipv4_srcaddrtab = NULL; 547 static const struct encaptab *ecookie = NULL; 548 static const struct encap_config ipv4_encap_cfg = { 549 .proto = IPPROTO_GRE, 550 .min_length = sizeof(struct greip) + sizeof(struct ip), 551 .exact_match = ENCAP_DRV_LOOKUP, 552 .lookup = in_gre_lookup, 553 .input = gre_input 554 }; 555 556 void 557 in_gre_init(void) 558 { 559 560 if (!IS_DEFAULT_VNET(curvnet)) 561 return; 562 ipv4_srcaddrtab = ip_encap_register_srcaddr(in_gre_srcaddr, 563 NULL, M_WAITOK); 564 ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK); 565 } 566 567 void 568 in_gre_uninit(void) 569 { 570 571 if (IS_DEFAULT_VNET(curvnet)) { 572 ip_encap_detach(ecookie); 573 ip_encap_unregister_srcaddr(ipv4_srcaddrtab); 574 } 575 if (V_ipv4_hashtbl != NULL) { 576 gre_hashdestroy(V_ipv4_hashtbl); 577 V_ipv4_hashtbl = NULL; 578 GRE_WAIT(); 579 gre_hashdestroy(V_ipv4_srchashtbl); 580 gre_hashdestroy((struct gre_list *)V_ipv4_sockets); 581 } 582 } 583