1 /* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ 2 /* $FreeBSD$ */ 3 4 /*- 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Encapsulate L3 protocols into IP 37 * See RFC 2784 (successor of RFC 1701 and 1702) for more details. 38 * If_gre is compatible with Cisco GRE tunnels, so you can 39 * have a NetBSD box as the other end of a tunnel interface of a Cisco 40 * router. See gre(4) for more details. 41 * Also supported: IP in IP encaps (proto 55) as of RFC 2004 42 */ 43 44 #include "opt_inet.h" 45 #include "opt_inet6.h" 46 47 #include <sys/param.h> 48 #include <sys/jail.h> 49 #include <sys/kernel.h> 50 #include <sys/libkern.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mbuf.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/protosw.h> 57 #include <sys/socket.h> 58 #include <sys/sockio.h> 59 #include <sys/sysctl.h> 60 #include <sys/systm.h> 61 62 #include <net/ethernet.h> 63 #include <net/if.h> 64 #include <net/if_var.h> 65 #include <net/if_clone.h> 66 #include <net/if_types.h> 67 #include <net/route.h> 68 #include <net/vnet.h> 69 70 #ifdef INET 71 #include <netinet/in.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/in_var.h> 74 #include <netinet/ip.h> 75 #include <netinet/ip_gre.h> 76 #include <netinet/ip_var.h> 77 #include <netinet/ip_encap.h> 78 #else 79 #error "Huh? if_gre without inet?" 80 #endif 81 82 #include <net/bpf.h> 83 84 #include <net/if_gre.h> 85 86 /* 87 * It is not easy to calculate the right value for a GRE MTU. 88 * We leave this task to the admin and use the same default that 89 * other vendors use. 90 */ 91 #define GREMTU 1476 92 93 #define MTAG_COOKIE_GRE 1307983903 94 #define MTAG_GRE_NESTING 1 95 struct mtag_gre_nesting { 96 uint16_t count; 97 uint16_t max; 98 struct ifnet *ifp[]; 99 }; 100 101 /* 102 * gre_mtx protects all global variables in if_gre.c. 103 * XXX: gre_softc data not protected yet. 104 */ 105 VNET_DEFINE(struct mtx, gre_mtx); 106 VNET_DEFINE(struct gre_softc_head, gre_softc_list); 107 108 static const char grename[] = "gre"; 109 static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); 110 111 static int gre_clone_create(struct if_clone *, int, caddr_t); 112 static void gre_clone_destroy(struct ifnet *); 113 static VNET_DEFINE(struct if_clone *, gre_cloner); 114 #define V_gre_cloner VNET(gre_cloner) 115 116 static int gre_ioctl(struct ifnet *, u_long, caddr_t); 117 static int gre_output(struct ifnet *, struct mbuf *, 118 const struct sockaddr *, struct route *); 119 120 static int gre_compute_route(struct gre_softc *sc); 121 122 #ifdef INET 123 extern struct domain inetdomain; 124 static const struct protosw in_gre_protosw = { 125 .pr_type = SOCK_RAW, 126 .pr_domain = &inetdomain, 127 .pr_protocol = IPPROTO_GRE, 128 .pr_flags = PR_ATOMIC|PR_ADDR, 129 .pr_input = gre_input, 130 .pr_output = rip_output, 131 .pr_ctlinput = rip_ctlinput, 132 .pr_ctloutput = rip_ctloutput, 133 .pr_usrreqs = &rip_usrreqs 134 }; 135 static const struct protosw in_mobile_protosw = { 136 .pr_type = SOCK_RAW, 137 .pr_domain = &inetdomain, 138 .pr_protocol = IPPROTO_MOBILE, 139 .pr_flags = PR_ATOMIC|PR_ADDR, 140 .pr_input = gre_mobile_input, 141 .pr_output = rip_output, 142 .pr_ctlinput = rip_ctlinput, 143 .pr_ctloutput = rip_ctloutput, 144 .pr_usrreqs = &rip_usrreqs 145 }; 146 #endif 147 148 SYSCTL_DECL(_net_link); 149 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, 150 "Generic Routing Encapsulation"); 151 #ifndef MAX_GRE_NEST 152 /* 153 * This macro controls the default upper limitation on nesting of gre tunnels. 154 * Since, setting a large value to this macro with a careless configuration 155 * may introduce system crash, we don't allow any nestings by default. 156 * If you need to configure nested gre tunnels, you can define this macro 157 * in your kernel configuration file. However, if you do so, please be 158 * careful to configure the tunnels so that it won't make a loop. 159 */ 160 #define MAX_GRE_NEST 1 161 #endif 162 static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; 163 #define V_max_gre_nesting VNET(max_gre_nesting) 164 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, 165 &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); 166 167 static void 168 vnet_gre_init(const void *unused __unused) 169 { 170 LIST_INIT(&V_gre_softc_list); 171 GRE_LIST_LOCK_INIT(); 172 V_gre_cloner = if_clone_simple(grename, gre_clone_create, 173 gre_clone_destroy, 0); 174 } 175 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 176 vnet_gre_init, NULL); 177 178 static void 179 vnet_gre_uninit(const void *unused __unused) 180 { 181 182 if_clone_detach(V_gre_cloner); 183 GRE_LIST_LOCK_DESTROY(); 184 } 185 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 186 vnet_gre_uninit, NULL); 187 188 static int 189 gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) 190 { 191 struct gre_softc *sc; 192 193 sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); 194 195 GRE2IFP(sc) = if_alloc(IFT_TUNNEL); 196 if (GRE2IFP(sc) == NULL) { 197 free(sc, M_GRE); 198 return (ENOSPC); 199 } 200 201 GRE2IFP(sc)->if_softc = sc; 202 if_initname(GRE2IFP(sc), grename, unit); 203 204 GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; 205 GRE2IFP(sc)->if_addrlen = 0; 206 GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ 207 GRE2IFP(sc)->if_mtu = GREMTU; 208 GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 209 GRE2IFP(sc)->if_output = gre_output; 210 GRE2IFP(sc)->if_ioctl = gre_ioctl; 211 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 212 sc->g_proto = IPPROTO_GRE; 213 GRE2IFP(sc)->if_flags |= IFF_LINK0; 214 sc->encap = NULL; 215 sc->gre_fibnum = curthread->td_proc->p_fibnum; 216 sc->wccp_ver = WCCP_V1; 217 sc->key = 0; 218 if_attach(GRE2IFP(sc)); 219 bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); 220 GRE_LIST_LOCK(); 221 LIST_INSERT_HEAD(&V_gre_softc_list, sc, sc_list); 222 GRE_LIST_UNLOCK(); 223 return (0); 224 } 225 226 static void 227 gre_clone_destroy(struct ifnet *ifp) 228 { 229 struct gre_softc *sc = ifp->if_softc; 230 231 GRE_LIST_LOCK(); 232 LIST_REMOVE(sc, sc_list); 233 GRE_LIST_UNLOCK(); 234 235 #ifdef INET 236 if (sc->encap != NULL) 237 encap_detach(sc->encap); 238 #endif 239 bpfdetach(ifp); 240 if_detach(ifp); 241 if_free(ifp); 242 free(sc, M_GRE); 243 } 244 245 /* 246 * The output routine. Takes a packet and encapsulates it in the protocol 247 * given by sc->g_proto. See also RFC 1701 and RFC 2004 248 */ 249 static int 250 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 251 struct route *ro) 252 { 253 int error = 0; 254 struct gre_softc *sc = ifp->if_softc; 255 struct greip *gh; 256 struct ip *ip; 257 struct m_tag *mtag; 258 struct mtag_gre_nesting *gt; 259 size_t len; 260 u_short gre_ip_id = 0; 261 uint8_t gre_ip_tos = 0; 262 u_int16_t etype = 0; 263 struct mobile_h mob_h; 264 u_int32_t af; 265 int extra = 0, max; 266 267 /* 268 * gre may cause infinite recursion calls when misconfigured. High 269 * nesting level may cause stack exhaustion. We'll prevent this by 270 * detecting loops and by introducing upper limit. 271 */ 272 mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); 273 if (mtag != NULL) { 274 struct ifnet **ifp2; 275 276 gt = (struct mtag_gre_nesting *)(mtag + 1); 277 gt->count++; 278 if (gt->count > min(gt->max, V_max_gre_nesting)) { 279 printf("%s: hit maximum recursion limit %u on %s\n", 280 __func__, gt->count - 1, ifp->if_xname); 281 m_freem(m); 282 error = EIO; /* is there better errno? */ 283 goto end; 284 } 285 286 ifp2 = gt->ifp; 287 for (max = gt->count - 1; max > 0; max--) { 288 if (*ifp2 == ifp) 289 break; 290 ifp2++; 291 } 292 if (*ifp2 == ifp) { 293 printf("%s: detected loop with nexting %u on %s\n", 294 __func__, gt->count-1, ifp->if_xname); 295 m_freem(m); 296 error = EIO; /* is there better errno? */ 297 goto end; 298 } 299 *ifp2 = ifp; 300 301 } else { 302 /* 303 * Given that people should NOT increase max_gre_nesting beyond 304 * their real needs, we allocate once per packet rather than 305 * allocating an mtag once per passing through gre. 306 * 307 * Note: the sysctl does not actually check for saneness, so we 308 * limit the maximum numbers of possible recursions here. 309 */ 310 max = imin(V_max_gre_nesting, 256); 311 /* If someone sets the sysctl <= 0, we want at least 1. */ 312 max = imax(max, 1); 313 len = sizeof(struct mtag_gre_nesting) + 314 max * sizeof(struct ifnet *); 315 mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, 316 M_NOWAIT); 317 if (mtag == NULL) { 318 m_freem(m); 319 error = ENOMEM; 320 goto end; 321 } 322 gt = (struct mtag_gre_nesting *)(mtag + 1); 323 bzero(gt, len); 324 gt->count = 1; 325 gt->max = max; 326 *gt->ifp = ifp; 327 m_tag_prepend(m, mtag); 328 } 329 330 if (!((ifp->if_flags & IFF_UP) && 331 (ifp->if_drv_flags & IFF_DRV_RUNNING)) || 332 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 333 m_freem(m); 334 error = ENETDOWN; 335 goto end; 336 } 337 338 gh = NULL; 339 ip = NULL; 340 341 /* BPF writes need to be handled specially. */ 342 if (dst->sa_family == AF_UNSPEC) 343 bcopy(dst->sa_data, &af, sizeof(af)); 344 else 345 af = dst->sa_family; 346 347 if (bpf_peers_present(ifp->if_bpf)) 348 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); 349 350 if ((ifp->if_flags & IFF_MONITOR) != 0) { 351 m_freem(m); 352 error = ENETDOWN; 353 goto end; 354 } 355 356 m->m_flags &= ~(M_BCAST|M_MCAST); 357 358 if (sc->g_proto == IPPROTO_MOBILE) { 359 if (af == AF_INET) { 360 struct mbuf *m0; 361 int msiz; 362 363 ip = mtod(m, struct ip *); 364 365 /* 366 * RFC2004 specifies that fragmented diagrams shouldn't 367 * be encapsulated. 368 */ 369 if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 370 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 371 m_freem(m); 372 error = EINVAL; /* is there better errno? */ 373 goto end; 374 } 375 memset(&mob_h, 0, MOB_H_SIZ_L); 376 mob_h.proto = (ip->ip_p) << 8; 377 mob_h.odst = ip->ip_dst.s_addr; 378 ip->ip_dst.s_addr = sc->g_dst.s_addr; 379 380 /* 381 * If the packet comes from our host, we only change 382 * the destination address in the IP header. 383 * Else we also need to save and change the source 384 */ 385 if (in_hosteq(ip->ip_src, sc->g_src)) { 386 msiz = MOB_H_SIZ_S; 387 } else { 388 mob_h.proto |= MOB_H_SBIT; 389 mob_h.osrc = ip->ip_src.s_addr; 390 ip->ip_src.s_addr = sc->g_src.s_addr; 391 msiz = MOB_H_SIZ_L; 392 } 393 mob_h.proto = htons(mob_h.proto); 394 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); 395 396 if ((m->m_data - msiz) < m->m_pktdat) { 397 m0 = m_gethdr(M_NOWAIT, MT_DATA); 398 if (m0 == NULL) { 399 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 400 m_freem(m); 401 error = ENOBUFS; 402 goto end; 403 } 404 m0->m_next = m; 405 m->m_data += sizeof(struct ip); 406 m->m_len -= sizeof(struct ip); 407 m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 408 m0->m_len = msiz + sizeof(struct ip); 409 m0->m_data += max_linkhdr; 410 memcpy(mtod(m0, caddr_t), (caddr_t)ip, 411 sizeof(struct ip)); 412 m = m0; 413 } else { /* we have some space left in the old one */ 414 m->m_data -= msiz; 415 m->m_len += msiz; 416 m->m_pkthdr.len += msiz; 417 bcopy(ip, mtod(m, caddr_t), 418 sizeof(struct ip)); 419 } 420 ip = mtod(m, struct ip *); 421 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); 422 ip->ip_len = htons(ntohs(ip->ip_len) + msiz); 423 } else { /* AF_INET */ 424 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 425 m_freem(m); 426 error = EINVAL; 427 goto end; 428 } 429 } else if (sc->g_proto == IPPROTO_GRE) { 430 switch (af) { 431 case AF_INET: 432 ip = mtod(m, struct ip *); 433 gre_ip_tos = ip->ip_tos; 434 gre_ip_id = ip->ip_id; 435 if (sc->wccp_ver == WCCP_V2) { 436 extra = sizeof(uint32_t); 437 etype = WCCP_PROTOCOL_TYPE; 438 } else { 439 etype = ETHERTYPE_IP; 440 } 441 break; 442 #ifdef INET6 443 case AF_INET6: 444 gre_ip_id = ip_newid(); 445 etype = ETHERTYPE_IPV6; 446 break; 447 #endif 448 default: 449 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 450 m_freem(m); 451 error = EAFNOSUPPORT; 452 goto end; 453 } 454 455 /* Reserve space for GRE header + optional GRE key */ 456 int hdrlen = sizeof(struct greip) + extra; 457 if (sc->key) 458 hdrlen += sizeof(uint32_t); 459 M_PREPEND(m, hdrlen, M_NOWAIT); 460 } else { 461 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 462 m_freem(m); 463 error = EINVAL; 464 goto end; 465 } 466 467 if (m == NULL) { /* mbuf allocation failed */ 468 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 469 error = ENOBUFS; 470 goto end; 471 } 472 473 M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ 474 475 gh = mtod(m, struct greip *); 476 if (sc->g_proto == IPPROTO_GRE) { 477 uint32_t *options = gh->gi_options; 478 479 memset((void *)gh, 0, sizeof(struct greip) + extra); 480 gh->gi_ptype = htons(etype); 481 gh->gi_flags = 0; 482 483 /* Add key option */ 484 if (sc->key) 485 { 486 gh->gi_flags |= htons(GRE_KP); 487 *(options++) = htonl(sc->key); 488 } 489 } 490 491 gh->gi_pr = sc->g_proto; 492 if (sc->g_proto != IPPROTO_MOBILE) { 493 gh->gi_src = sc->g_src; 494 gh->gi_dst = sc->g_dst; 495 ((struct ip*)gh)->ip_v = IPPROTO_IPV4; 496 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; 497 ((struct ip*)gh)->ip_ttl = GRE_TTL; 498 ((struct ip*)gh)->ip_tos = gre_ip_tos; 499 ((struct ip*)gh)->ip_id = gre_ip_id; 500 gh->gi_len = htons(m->m_pkthdr.len); 501 } 502 503 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 504 if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 505 /* 506 * Send it off and with IP_FORWARD flag to prevent it from 507 * overwriting the ip_id again. ip_id is already set to the 508 * ip_id of the encapsulated packet. 509 */ 510 error = ip_output(m, NULL, &sc->route, IP_FORWARDING, 511 (struct ip_moptions *)NULL, (struct inpcb *)NULL); 512 end: 513 if (error) 514 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 515 return (error); 516 } 517 518 static int 519 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 520 { 521 struct ifreq *ifr = (struct ifreq *)data; 522 struct in_aliasreq *aifr = (struct in_aliasreq *)data; 523 struct gre_softc *sc = ifp->if_softc; 524 struct sockaddr_in si; 525 struct sockaddr *sa = NULL; 526 int error, adj; 527 struct sockaddr_in sp, sm, dp, dm; 528 uint32_t key; 529 530 error = 0; 531 adj = 0; 532 533 switch (cmd) { 534 case SIOCSIFADDR: 535 ifp->if_flags |= IFF_UP; 536 break; 537 case SIOCSIFFLAGS: 538 /* 539 * XXXRW: Isn't this priv_check() redundant to the ifnet 540 * layer check? 541 */ 542 if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0) 543 break; 544 if ((ifr->ifr_flags & IFF_LINK0) != 0) 545 sc->g_proto = IPPROTO_GRE; 546 else 547 sc->g_proto = IPPROTO_MOBILE; 548 if ((ifr->ifr_flags & IFF_LINK2) != 0) 549 sc->wccp_ver = WCCP_V2; 550 else 551 sc->wccp_ver = WCCP_V1; 552 goto recompute; 553 case SIOCSIFMTU: 554 /* 555 * XXXRW: Isn't this priv_check() redundant to the ifnet 556 * layer check? 557 */ 558 if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0) 559 break; 560 if (ifr->ifr_mtu < 576) { 561 error = EINVAL; 562 break; 563 } 564 ifp->if_mtu = ifr->ifr_mtu; 565 break; 566 case SIOCGIFMTU: 567 ifr->ifr_mtu = GRE2IFP(sc)->if_mtu; 568 break; 569 case SIOCADDMULTI: 570 /* 571 * XXXRW: Isn't this priv_checkr() redundant to the ifnet 572 * layer check? 573 */ 574 if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0) 575 break; 576 if (ifr == 0) { 577 error = EAFNOSUPPORT; 578 break; 579 } 580 switch (ifr->ifr_addr.sa_family) { 581 #ifdef INET 582 case AF_INET: 583 break; 584 #endif 585 #ifdef INET6 586 case AF_INET6: 587 break; 588 #endif 589 default: 590 error = EAFNOSUPPORT; 591 break; 592 } 593 break; 594 case SIOCDELMULTI: 595 /* 596 * XXXRW: Isn't this priv_check() redundant to the ifnet 597 * layer check? 598 */ 599 if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0) 600 break; 601 if (ifr == 0) { 602 error = EAFNOSUPPORT; 603 break; 604 } 605 switch (ifr->ifr_addr.sa_family) { 606 #ifdef INET 607 case AF_INET: 608 break; 609 #endif 610 #ifdef INET6 611 case AF_INET6: 612 break; 613 #endif 614 default: 615 error = EAFNOSUPPORT; 616 break; 617 } 618 break; 619 case GRESPROTO: 620 /* 621 * XXXRW: Isn't this priv_check() redundant to the ifnet 622 * layer check? 623 */ 624 if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) 625 break; 626 sc->g_proto = ifr->ifr_flags; 627 switch (sc->g_proto) { 628 case IPPROTO_GRE: 629 ifp->if_flags |= IFF_LINK0; 630 break; 631 case IPPROTO_MOBILE: 632 ifp->if_flags &= ~IFF_LINK0; 633 break; 634 default: 635 error = EPROTONOSUPPORT; 636 break; 637 } 638 goto recompute; 639 case GREGPROTO: 640 ifr->ifr_flags = sc->g_proto; 641 break; 642 case GRESADDRS: 643 case GRESADDRD: 644 error = priv_check(curthread, PRIV_NET_GRE); 645 if (error) 646 return (error); 647 /* 648 * set tunnel endpoints, compute a less specific route 649 * to the remote end and mark if as up 650 */ 651 sa = &ifr->ifr_addr; 652 if (cmd == GRESADDRS) 653 sc->g_src = (satosin(sa))->sin_addr; 654 if (cmd == GRESADDRD) 655 sc->g_dst = (satosin(sa))->sin_addr; 656 recompute: 657 #ifdef INET 658 if (sc->encap != NULL) { 659 encap_detach(sc->encap); 660 sc->encap = NULL; 661 } 662 #endif 663 if ((sc->g_src.s_addr != INADDR_ANY) && 664 (sc->g_dst.s_addr != INADDR_ANY)) { 665 bzero(&sp, sizeof(sp)); 666 bzero(&sm, sizeof(sm)); 667 bzero(&dp, sizeof(dp)); 668 bzero(&dm, sizeof(dm)); 669 sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len = 670 sizeof(struct sockaddr_in); 671 sp.sin_family = sm.sin_family = dp.sin_family = 672 dm.sin_family = AF_INET; 673 sp.sin_addr = sc->g_src; 674 dp.sin_addr = sc->g_dst; 675 sm.sin_addr.s_addr = dm.sin_addr.s_addr = 676 INADDR_BROADCAST; 677 #ifdef INET 678 sc->encap = encap_attach(AF_INET, sc->g_proto, 679 sintosa(&sp), sintosa(&sm), sintosa(&dp), 680 sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ? 681 &in_gre_protosw : &in_mobile_protosw, sc); 682 if (sc->encap == NULL) 683 printf("%s: unable to attach encap\n", 684 if_name(GRE2IFP(sc))); 685 #endif 686 if (sc->route.ro_rt != 0) /* free old route */ 687 RTFREE(sc->route.ro_rt); 688 if (gre_compute_route(sc) == 0) 689 ifp->if_drv_flags |= IFF_DRV_RUNNING; 690 else 691 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 692 } 693 break; 694 case GREGADDRS: 695 memset(&si, 0, sizeof(si)); 696 si.sin_family = AF_INET; 697 si.sin_len = sizeof(struct sockaddr_in); 698 si.sin_addr.s_addr = sc->g_src.s_addr; 699 sa = sintosa(&si); 700 error = prison_if(curthread->td_ucred, sa); 701 if (error != 0) 702 break; 703 ifr->ifr_addr = *sa; 704 break; 705 case GREGADDRD: 706 memset(&si, 0, sizeof(si)); 707 si.sin_family = AF_INET; 708 si.sin_len = sizeof(struct sockaddr_in); 709 si.sin_addr.s_addr = sc->g_dst.s_addr; 710 sa = sintosa(&si); 711 error = prison_if(curthread->td_ucred, sa); 712 if (error != 0) 713 break; 714 ifr->ifr_addr = *sa; 715 break; 716 case SIOCSIFPHYADDR: 717 /* 718 * XXXRW: Isn't this priv_check() redundant to the ifnet 719 * layer check? 720 */ 721 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 722 break; 723 if (aifr->ifra_addr.sin_family != AF_INET || 724 aifr->ifra_dstaddr.sin_family != AF_INET) { 725 error = EAFNOSUPPORT; 726 break; 727 } 728 if (aifr->ifra_addr.sin_len != sizeof(si) || 729 aifr->ifra_dstaddr.sin_len != sizeof(si)) { 730 error = EINVAL; 731 break; 732 } 733 sc->g_src = aifr->ifra_addr.sin_addr; 734 sc->g_dst = aifr->ifra_dstaddr.sin_addr; 735 goto recompute; 736 case SIOCDIFPHYADDR: 737 /* 738 * XXXRW: Isn't this priv_check() redundant to the ifnet 739 * layer check? 740 */ 741 if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) 742 break; 743 sc->g_src.s_addr = INADDR_ANY; 744 sc->g_dst.s_addr = INADDR_ANY; 745 goto recompute; 746 case SIOCGIFPSRCADDR: 747 #ifdef INET6 748 case SIOCGIFPSRCADDR_IN6: 749 #endif 750 if (sc->g_src.s_addr == INADDR_ANY) { 751 error = EADDRNOTAVAIL; 752 break; 753 } 754 memset(&si, 0, sizeof(si)); 755 si.sin_family = AF_INET; 756 si.sin_len = sizeof(struct sockaddr_in); 757 si.sin_addr.s_addr = sc->g_src.s_addr; 758 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 759 if (error != 0) 760 break; 761 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 762 break; 763 case SIOCGIFPDSTADDR: 764 #ifdef INET6 765 case SIOCGIFPDSTADDR_IN6: 766 #endif 767 if (sc->g_dst.s_addr == INADDR_ANY) { 768 error = EADDRNOTAVAIL; 769 break; 770 } 771 memset(&si, 0, sizeof(si)); 772 si.sin_family = AF_INET; 773 si.sin_len = sizeof(struct sockaddr_in); 774 si.sin_addr.s_addr = sc->g_dst.s_addr; 775 error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); 776 if (error != 0) 777 break; 778 bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); 779 break; 780 case GRESKEY: 781 error = priv_check(curthread, PRIV_NET_GRE); 782 if (error) 783 break; 784 error = copyin(ifr->ifr_data, &key, sizeof(key)); 785 if (error) 786 break; 787 /* adjust MTU for option header */ 788 if (key == 0 && sc->key != 0) /* clear */ 789 adj += sizeof(key); 790 else if (key != 0 && sc->key == 0) /* set */ 791 adj -= sizeof(key); 792 793 if (ifp->if_mtu + adj < 576) { 794 error = EINVAL; 795 break; 796 } 797 ifp->if_mtu += adj; 798 sc->key = key; 799 break; 800 case GREGKEY: 801 error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key)); 802 break; 803 804 default: 805 error = EINVAL; 806 break; 807 } 808 809 return (error); 810 } 811 812 /* 813 * computes a route to our destination that is not the one 814 * which would be taken by ip_output(), as this one will loop back to 815 * us. If the interface is p2p as a--->b, then a routing entry exists 816 * If we now send a packet to b (e.g. ping b), this will come down here 817 * gets src=a, dst=b tacked on and would from ip_output() sent back to 818 * if_gre. 819 * Goal here is to compute a route to b that is less specific than 820 * a-->b. We know that this one exists as in normal operation we have 821 * at least a default route which matches. 822 */ 823 static int 824 gre_compute_route(struct gre_softc *sc) 825 { 826 struct route *ro; 827 828 ro = &sc->route; 829 830 memset(ro, 0, sizeof(struct route)); 831 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 832 ro->ro_dst.sa_family = AF_INET; 833 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 834 835 /* 836 * toggle last bit, so our interface is not found, but a less 837 * specific route. I'd rather like to specify a shorter mask, 838 * but this is not possible. Should work though. XXX 839 * XXX MRT Use a different FIB for the tunnel to solve this problem. 840 */ 841 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { 842 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= 843 htonl(0x01); 844 } 845 846 #ifdef DIAGNOSTIC 847 printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)), 848 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); 849 #endif 850 851 rtalloc_fib(ro, sc->gre_fibnum); 852 853 /* 854 * check if this returned a route at all and this route is no 855 * recursion to ourself 856 */ 857 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { 858 #ifdef DIAGNOSTIC 859 if (ro->ro_rt == NULL) 860 printf(" - no route found!\n"); 861 else 862 printf(" - route loops back to ourself!\n"); 863 #endif 864 return EADDRNOTAVAIL; 865 } 866 867 /* 868 * now change it back - else ip_output will just drop 869 * the route and search one to this interface ... 870 */ 871 if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) 872 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 873 874 #ifdef DIAGNOSTIC 875 printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp), 876 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); 877 printf("\n"); 878 #endif 879 880 return 0; 881 } 882 883 /* 884 * do a checksum of a buffer - much like in_cksum, which operates on 885 * mbufs. 886 */ 887 u_int16_t 888 gre_in_cksum(u_int16_t *p, u_int len) 889 { 890 u_int32_t sum = 0; 891 int nwords = len >> 1; 892 893 while (nwords-- != 0) 894 sum += *p++; 895 896 if (len & 1) { 897 union { 898 u_short w; 899 u_char c[2]; 900 } u; 901 u.c[0] = *(u_char *)p; 902 u.c[1] = 0; 903 sum += u.w; 904 } 905 906 /* end-around-carry */ 907 sum = (sum >> 16) + (sum & 0xffff); 908 sum += (sum >> 16); 909 return (~sum); 910 } 911 912 static int 913 gremodevent(module_t mod, int type, void *data) 914 { 915 916 switch (type) { 917 case MOD_LOAD: 918 case MOD_UNLOAD: 919 break; 920 default: 921 return (EOPNOTSUPP); 922 } 923 return (0); 924 } 925 926 static moduledata_t gre_mod = { 927 "if_gre", 928 gremodevent, 929 0 930 }; 931 932 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 933 MODULE_VERSION(if_gre, 1); 934