1 /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2 /*- 3 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4 * 5 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 6 * All rights reserved. 7 * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * BASED ON: 32 * ------------------------------------------------------------------------- 33 * 34 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 35 * Nottingham University 1987. 36 * 37 * This source may be freely distributed, however I would be interested 38 * in any changes that are made. 39 * 40 * This driver takes packets off the IP i/f and hands them up to a 41 * user process to have its wicked way with. This driver has it's 42 * roots in a similar driver written by Phil Cockcroft (formerly) at 43 * UCL. This driver is based much more on read/write/poll mode of 44 * operation though. 45 * 46 * $FreeBSD$ 47 */ 48 49 #include "opt_inet.h" 50 #include "opt_inet6.h" 51 52 #include <sys/param.h> 53 #include <sys/lock.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/systm.h> 57 #include <sys/jail.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/socket.h> 61 #include <sys/eventhandler.h> 62 #include <sys/fcntl.h> 63 #include <sys/filio.h> 64 #include <sys/sockio.h> 65 #include <sys/sx.h> 66 #include <sys/syslog.h> 67 #include <sys/ttycom.h> 68 #include <sys/poll.h> 69 #include <sys/selinfo.h> 70 #include <sys/signalvar.h> 71 #include <sys/filedesc.h> 72 #include <sys/kernel.h> 73 #include <sys/sysctl.h> 74 #include <sys/conf.h> 75 #include <sys/uio.h> 76 #include <sys/malloc.h> 77 #include <sys/random.h> 78 #include <sys/ctype.h> 79 80 #include <net/ethernet.h> 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/if_clone.h> 84 #include <net/if_dl.h> 85 #include <net/if_media.h> 86 #include <net/if_types.h> 87 #include <net/netisr.h> 88 #include <net/route.h> 89 #include <net/vnet.h> 90 #ifdef INET 91 #include <netinet/in.h> 92 #endif 93 #include <net/bpf.h> 94 #include <net/if_tap.h> 95 #include <net/if_tun.h> 96 97 #include <sys/queue.h> 98 #include <sys/condvar.h> 99 #include <security/mac/mac_framework.h> 100 101 struct tuntap_driver; 102 103 /* 104 * tun_list is protected by global tunmtx. Other mutable fields are 105 * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is 106 * static for the duration of a tunnel interface. 107 */ 108 struct tuntap_softc { 109 TAILQ_ENTRY(tuntap_softc) tun_list; 110 struct cdev *tun_alias; 111 struct cdev *tun_dev; 112 u_short tun_flags; /* misc flags */ 113 #define TUN_OPEN 0x0001 114 #define TUN_INITED 0x0002 115 #define TUN_IASET 0x0008 116 #define TUN_DSTADDR 0x0010 117 #define TUN_LMODE 0x0020 118 #define TUN_RWAIT 0x0040 119 #define TUN_ASYNC 0x0080 120 #define TUN_IFHEAD 0x0100 121 #define TUN_DYING 0x0200 122 #define TUN_L2 0x0400 123 #define TUN_VMNET 0x0800 124 125 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) 126 #define TUN_READY (TUN_OPEN | TUN_INITED) 127 128 pid_t tun_pid; /* owning pid */ 129 struct ifnet *tun_ifp; /* the interface */ 130 struct sigio *tun_sigio; /* async I/O info */ 131 struct tuntap_driver *tun_drv; /* appropriate driver */ 132 struct selinfo tun_rsel; /* read select */ 133 struct mtx tun_mtx; /* softc field mutex */ 134 struct cv tun_cv; /* for ref'd dev destroy */ 135 struct ether_addr tun_ether; /* remote address */ 136 int tun_busy; /* busy count */ 137 }; 138 #define TUN2IFP(sc) ((sc)->tun_ifp) 139 140 #define TUNDEBUG if (tundebug) if_printf 141 142 #define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx) 143 #define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx) 144 #define TUN_LOCK_ASSERT(tp) mtx_assert(&(tp)->tun_mtx, MA_OWNED); 145 146 #define TUN_VMIO_FLAG_MASK 0x0fff 147 148 /* 149 * All mutable global variables in if_tun are locked using tunmtx, with 150 * the exception of tundebug, which is used unlocked, and the drivers' *clones, 151 * which are static after setup. 152 */ 153 static struct mtx tunmtx; 154 static eventhandler_tag arrival_tag; 155 static eventhandler_tag clone_tag; 156 static const char tunname[] = "tun"; 157 static const char tapname[] = "tap"; 158 static const char vmnetname[] = "vmnet"; 159 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); 160 static int tundebug = 0; 161 static int tundclone = 1; 162 static int tap_allow_uopen = 0; /* allow user open() */ 163 static int tapuponopen = 0; /* IFF_UP on open() */ 164 static int tapdclone = 1; /* enable devfs cloning */ 165 166 static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); 167 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 168 169 static struct sx tun_ioctl_sx; 170 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); 171 172 SYSCTL_DECL(_net_link); 173 /* tun */ 174 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, 175 "IP tunnel software network interface"); 176 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, 177 "Enable legacy devfs interface creation"); 178 179 /* tap */ 180 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 181 "Ethernet tunnel software network interface"); 182 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0, 183 "Allow user to open /dev/tap (based on node permissions)"); 184 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 185 "Bring interface up when /dev/tap is opened"); 186 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 187 "Enable legacy devfs interface creation"); 188 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, ""); 189 190 static int tun_busy_locked(struct tuntap_softc *tp); 191 static void tun_unbusy_locked(struct tuntap_softc *tp); 192 static int tun_busy(struct tuntap_softc *tp); 193 static void tun_unbusy(struct tuntap_softc *tp); 194 195 static int tuntap_name2info(const char *name, int *unit, int *flags); 196 static void tunclone(void *arg, struct ucred *cred, char *name, 197 int namelen, struct cdev **dev); 198 static void tuncreate(struct cdev *dev, struct tuntap_driver *); 199 static void tunrename(void *arg, struct ifnet *ifp); 200 static int tunifioctl(struct ifnet *, u_long, caddr_t); 201 static void tuninit(struct ifnet *); 202 static void tunifinit(void *xtp); 203 static int tuntapmodevent(module_t, int, void *); 204 static int tunoutput(struct ifnet *, struct mbuf *, 205 const struct sockaddr *, struct route *ro); 206 static void tunstart(struct ifnet *); 207 static void tunstart_l2(struct ifnet *); 208 209 static int tun_clone_match(struct if_clone *ifc, const char *name); 210 static int tap_clone_match(struct if_clone *ifc, const char *name); 211 static int vmnet_clone_match(struct if_clone *ifc, const char *name); 212 static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); 213 static int tun_clone_destroy(struct if_clone *, struct ifnet *); 214 215 static d_open_t tunopen; 216 static d_close_t tunclose; 217 static d_read_t tunread; 218 static d_write_t tunwrite; 219 static d_ioctl_t tunioctl; 220 static d_poll_t tunpoll; 221 static d_kqfilter_t tunkqfilter; 222 223 static int tunkqread(struct knote *, long); 224 static int tunkqwrite(struct knote *, long); 225 static void tunkqdetach(struct knote *); 226 227 static struct filterops tun_read_filterops = { 228 .f_isfd = 1, 229 .f_attach = NULL, 230 .f_detach = tunkqdetach, 231 .f_event = tunkqread, 232 }; 233 234 static struct filterops tun_write_filterops = { 235 .f_isfd = 1, 236 .f_attach = NULL, 237 .f_detach = tunkqdetach, 238 .f_event = tunkqwrite, 239 }; 240 241 static struct tuntap_driver { 242 struct cdevsw cdevsw; 243 int ident_flags; 244 struct unrhdr *unrhdr; 245 struct clonedevs *clones; 246 ifc_match_t *clone_match_fn; 247 ifc_create_t *clone_create_fn; 248 ifc_destroy_t *clone_destroy_fn; 249 } tuntap_drivers[] = { 250 { 251 .ident_flags = 0, 252 .cdevsw = { 253 .d_version = D_VERSION, 254 .d_flags = D_NEEDMINOR, 255 .d_open = tunopen, 256 .d_close = tunclose, 257 .d_read = tunread, 258 .d_write = tunwrite, 259 .d_ioctl = tunioctl, 260 .d_poll = tunpoll, 261 .d_kqfilter = tunkqfilter, 262 .d_name = tunname, 263 }, 264 .clone_match_fn = tun_clone_match, 265 .clone_create_fn = tun_clone_create, 266 .clone_destroy_fn = tun_clone_destroy, 267 }, 268 { 269 .ident_flags = TUN_L2, 270 .cdevsw = { 271 .d_version = D_VERSION, 272 .d_flags = D_NEEDMINOR, 273 .d_open = tunopen, 274 .d_close = tunclose, 275 .d_read = tunread, 276 .d_write = tunwrite, 277 .d_ioctl = tunioctl, 278 .d_poll = tunpoll, 279 .d_kqfilter = tunkqfilter, 280 .d_name = tapname, 281 }, 282 .clone_match_fn = tap_clone_match, 283 .clone_create_fn = tun_clone_create, 284 .clone_destroy_fn = tun_clone_destroy, 285 }, 286 { 287 .ident_flags = TUN_L2 | TUN_VMNET, 288 .cdevsw = { 289 .d_version = D_VERSION, 290 .d_flags = D_NEEDMINOR, 291 .d_open = tunopen, 292 .d_close = tunclose, 293 .d_read = tunread, 294 .d_write = tunwrite, 295 .d_ioctl = tunioctl, 296 .d_poll = tunpoll, 297 .d_kqfilter = tunkqfilter, 298 .d_name = vmnetname, 299 }, 300 .clone_match_fn = vmnet_clone_match, 301 .clone_create_fn = tun_clone_create, 302 .clone_destroy_fn = tun_clone_destroy, 303 }, 304 }; 305 306 struct tuntap_driver_cloner { 307 SLIST_ENTRY(tuntap_driver_cloner) link; 308 struct tuntap_driver *drv; 309 struct if_clone *cloner; 310 }; 311 312 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) = 313 SLIST_HEAD_INITIALIZER(tuntap_driver_cloners); 314 315 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners) 316 317 /* 318 * Mechanism for marking a tunnel device as busy so that we can safely do some 319 * orthogonal operations (such as operations on devices) without racing against 320 * tun_destroy. tun_destroy will wait on the condvar if we're at all busy or 321 * open, to be woken up when the condition is alleviated. 322 */ 323 static int 324 tun_busy_locked(struct tuntap_softc *tp) 325 { 326 327 TUN_LOCK_ASSERT(tp); 328 if ((tp->tun_flags & TUN_DYING) != 0) { 329 /* 330 * Perhaps unintuitive, but the device is busy going away. 331 * Other interpretations of EBUSY from tun_busy make little 332 * sense, since making a busy device even more busy doesn't 333 * sound like a problem. 334 */ 335 return (EBUSY); 336 } 337 338 ++tp->tun_busy; 339 return (0); 340 } 341 342 static void 343 tun_unbusy_locked(struct tuntap_softc *tp) 344 { 345 346 TUN_LOCK_ASSERT(tp); 347 KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel")); 348 349 --tp->tun_busy; 350 /* Wake up anything that may be waiting on our busy tunnel. */ 351 if (tp->tun_busy == 0) 352 cv_broadcast(&tp->tun_cv); 353 } 354 355 static int 356 tun_busy(struct tuntap_softc *tp) 357 { 358 int ret; 359 360 TUN_LOCK(tp); 361 ret = tun_busy_locked(tp); 362 TUN_UNLOCK(tp); 363 return (ret); 364 } 365 366 367 static void 368 tun_unbusy(struct tuntap_softc *tp) 369 { 370 371 TUN_LOCK(tp); 372 tun_unbusy_locked(tp); 373 TUN_UNLOCK(tp); 374 } 375 376 /* 377 * Sets unit and/or flags given the device name. Must be called with correct 378 * vnet context. 379 */ 380 static int 381 tuntap_name2info(const char *name, int *outunit, int *outflags) 382 { 383 struct tuntap_driver *drv; 384 struct tuntap_driver_cloner *drvc; 385 char *dname; 386 int flags, unit; 387 bool found; 388 389 if (name == NULL) 390 return (EINVAL); 391 392 /* 393 * Needed for dev_stdclone, but dev_stdclone will not modify, it just 394 * wants to be able to pass back a char * through the second param. We 395 * will always set that as NULL here, so we'll fake it. 396 */ 397 dname = __DECONST(char *, name); 398 found = false; 399 400 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 401 ("tuntap_driver_cloners failed to initialize")); 402 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 403 KASSERT(drvc->drv != NULL, 404 ("tuntap_driver_cloners entry not properly initialized")); 405 drv = drvc->drv; 406 407 if (strcmp(name, drv->cdevsw.d_name) == 0) { 408 found = true; 409 unit = -1; 410 flags = drv->ident_flags; 411 break; 412 } 413 414 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) { 415 found = true; 416 flags = drv->ident_flags; 417 break; 418 } 419 } 420 421 if (!found) 422 return (ENXIO); 423 424 if (outunit != NULL) 425 *outunit = unit; 426 if (outflags != NULL) 427 *outflags = flags; 428 return (0); 429 } 430 431 /* 432 * Get driver information from a set of flags specified. Masks the identifying 433 * part of the flags and compares it against all of the available 434 * tuntap_drivers. Must be called with correct vnet context. 435 */ 436 static struct tuntap_driver * 437 tuntap_driver_from_flags(int tun_flags) 438 { 439 struct tuntap_driver *drv; 440 struct tuntap_driver_cloner *drvc; 441 442 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 443 ("tuntap_driver_cloners failed to initialize")); 444 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 445 KASSERT(drvc->drv != NULL, 446 ("tuntap_driver_cloners entry not properly initialized")); 447 drv = drvc->drv; 448 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags) 449 return (drv); 450 } 451 452 return (NULL); 453 } 454 455 456 457 static int 458 tun_clone_match(struct if_clone *ifc, const char *name) 459 { 460 int tunflags; 461 462 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 463 if ((tunflags & TUN_L2) == 0) 464 return (1); 465 } 466 467 return (0); 468 } 469 470 static int 471 tap_clone_match(struct if_clone *ifc, const char *name) 472 { 473 int tunflags; 474 475 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 476 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2) 477 return (1); 478 } 479 480 return (0); 481 } 482 483 static int 484 vmnet_clone_match(struct if_clone *ifc, const char *name) 485 { 486 int tunflags; 487 488 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 489 if ((tunflags & TUN_VMNET) != 0) 490 return (1); 491 } 492 493 return (0); 494 } 495 496 static int 497 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 498 { 499 struct tuntap_driver *drv; 500 struct cdev *dev; 501 int err, i, tunflags, unit; 502 503 tunflags = 0; 504 /* The name here tells us exactly what we're creating */ 505 err = tuntap_name2info(name, &unit, &tunflags); 506 if (err != 0) 507 return (err); 508 509 drv = tuntap_driver_from_flags(tunflags); 510 if (drv == NULL) 511 return (ENXIO); 512 513 if (unit != -1) { 514 /* If this unit number is still available that's okay. */ 515 if (alloc_unr_specific(drv->unrhdr, unit) == -1) 516 return (EEXIST); 517 } else { 518 unit = alloc_unr(drv->unrhdr); 519 } 520 521 snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit); 522 523 /* find any existing device, or allocate new unit number */ 524 i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0); 525 if (i) { 526 /* No preexisting struct cdev *, create one */ 527 dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600, 528 "%s%d", drv->cdevsw.d_name, unit); 529 } 530 531 tuncreate(dev, drv); 532 533 return (0); 534 } 535 536 static void 537 tunclone(void *arg, struct ucred *cred, char *name, int namelen, 538 struct cdev **dev) 539 { 540 char devname[SPECNAMELEN + 1]; 541 struct tuntap_driver *drv; 542 int append_unit, i, u, tunflags; 543 bool mayclone; 544 545 if (*dev != NULL) 546 return; 547 548 tunflags = 0; 549 CURVNET_SET(CRED_TO_VNET(cred)); 550 if (tuntap_name2info(name, &u, &tunflags) != 0) 551 goto out; /* Not recognized */ 552 553 if (u != -1 && u > IF_MAXUNIT) 554 goto out; /* Unit number too high */ 555 556 mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0; 557 if ((tunflags & TUN_L2) != 0) { 558 /* tap/vmnet allow user open with a sysctl */ 559 mayclone = (mayclone || tap_allow_uopen) && tapdclone; 560 } else { 561 mayclone = mayclone && tundclone; 562 } 563 564 /* 565 * If tun cloning is enabled, only the superuser can create an 566 * interface. 567 */ 568 if (!mayclone) 569 goto out; 570 571 if (u == -1) 572 append_unit = 1; 573 else 574 append_unit = 0; 575 576 drv = tuntap_driver_from_flags(tunflags); 577 if (drv == NULL) 578 goto out; 579 580 /* find any existing device, or allocate new unit number */ 581 i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0); 582 if (i) { 583 if (append_unit) { 584 namelen = snprintf(devname, sizeof(devname), "%s%d", 585 name, u); 586 name = devname; 587 } 588 /* No preexisting struct cdev *, create one */ 589 *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred, 590 UID_UUCP, GID_DIALER, 0600, "%s", name); 591 } 592 593 if_clone_create(name, namelen, NULL); 594 out: 595 CURVNET_RESTORE(); 596 } 597 598 static void 599 tun_destroy(struct tuntap_softc *tp) 600 { 601 602 TUN_LOCK(tp); 603 tp->tun_flags |= TUN_DYING; 604 if (tp->tun_busy != 0) 605 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); 606 else 607 TUN_UNLOCK(tp); 608 609 CURVNET_SET(TUN2IFP(tp)->if_vnet); 610 611 /* destroy_dev will take care of any alias. */ 612 destroy_dev(tp->tun_dev); 613 seldrain(&tp->tun_rsel); 614 knlist_clear(&tp->tun_rsel.si_note, 0); 615 knlist_destroy(&tp->tun_rsel.si_note); 616 if ((tp->tun_flags & TUN_L2) != 0) { 617 ether_ifdetach(TUN2IFP(tp)); 618 } else { 619 bpfdetach(TUN2IFP(tp)); 620 if_detach(TUN2IFP(tp)); 621 } 622 sx_xlock(&tun_ioctl_sx); 623 TUN2IFP(tp)->if_softc = NULL; 624 sx_xunlock(&tun_ioctl_sx); 625 free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); 626 if_free(TUN2IFP(tp)); 627 mtx_destroy(&tp->tun_mtx); 628 cv_destroy(&tp->tun_cv); 629 free(tp, M_TUN); 630 CURVNET_RESTORE(); 631 } 632 633 static int 634 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp) 635 { 636 struct tuntap_softc *tp = ifp->if_softc; 637 638 mtx_lock(&tunmtx); 639 TAILQ_REMOVE(&tunhead, tp, tun_list); 640 mtx_unlock(&tunmtx); 641 tun_destroy(tp); 642 643 return (0); 644 } 645 646 static void 647 vnet_tun_init(const void *unused __unused) 648 { 649 struct tuntap_driver *drv; 650 struct tuntap_driver_cloner *drvc; 651 int i; 652 653 for (i = 0; i < nitems(tuntap_drivers); ++i) { 654 drv = &tuntap_drivers[i]; 655 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO); 656 657 drvc->drv = drv; 658 drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0, 659 drv->clone_match_fn, drv->clone_create_fn, 660 drv->clone_destroy_fn); 661 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link); 662 }; 663 } 664 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, 665 vnet_tun_init, NULL); 666 667 static void 668 vnet_tun_uninit(const void *unused __unused) 669 { 670 struct tuntap_driver_cloner *drvc; 671 672 while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) { 673 drvc = SLIST_FIRST(&V_tuntap_driver_cloners); 674 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link); 675 676 if_clone_detach(drvc->cloner); 677 free(drvc, M_TUN); 678 } 679 } 680 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, 681 vnet_tun_uninit, NULL); 682 683 static void 684 tun_uninit(const void *unused __unused) 685 { 686 struct tuntap_driver *drv; 687 struct tuntap_softc *tp; 688 int i; 689 690 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag); 691 EVENTHANDLER_DEREGISTER(dev_clone, clone_tag); 692 drain_dev_clone_events(); 693 694 mtx_lock(&tunmtx); 695 while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { 696 TAILQ_REMOVE(&tunhead, tp, tun_list); 697 mtx_unlock(&tunmtx); 698 tun_destroy(tp); 699 mtx_lock(&tunmtx); 700 } 701 mtx_unlock(&tunmtx); 702 for (i = 0; i < nitems(tuntap_drivers); ++i) { 703 drv = &tuntap_drivers[i]; 704 delete_unrhdr(drv->unrhdr); 705 clone_cleanup(&drv->clones); 706 } 707 mtx_destroy(&tunmtx); 708 } 709 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); 710 711 static struct tuntap_driver * 712 tuntap_driver_from_ifnet(const struct ifnet *ifp) 713 { 714 struct tuntap_driver *drv; 715 int i; 716 717 if (ifp == NULL) 718 return (NULL); 719 720 for (i = 0; i < nitems(tuntap_drivers); ++i) { 721 drv = &tuntap_drivers[i]; 722 if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0) 723 return (drv); 724 } 725 726 return (NULL); 727 } 728 729 static int 730 tuntapmodevent(module_t mod, int type, void *data) 731 { 732 struct tuntap_driver *drv; 733 int i; 734 735 switch (type) { 736 case MOD_LOAD: 737 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); 738 for (i = 0; i < nitems(tuntap_drivers); ++i) { 739 drv = &tuntap_drivers[i]; 740 clone_setup(&drv->clones); 741 drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); 742 } 743 arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, 744 tunrename, 0, 1000); 745 if (arrival_tag == NULL) 746 return (ENOMEM); 747 clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); 748 if (clone_tag == NULL) 749 return (ENOMEM); 750 break; 751 case MOD_UNLOAD: 752 /* See tun_uninit, so it's done after the vnet_sysuninit() */ 753 break; 754 default: 755 return EOPNOTSUPP; 756 } 757 return 0; 758 } 759 760 static moduledata_t tuntap_mod = { 761 "if_tuntap", 762 tuntapmodevent, 763 0 764 }; 765 766 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 767 MODULE_VERSION(if_tuntap, 1); 768 MODULE_VERSION(if_tun, 1); 769 MODULE_VERSION(if_tap, 1); 770 771 static void 772 tunstart(struct ifnet *ifp) 773 { 774 struct tuntap_softc *tp = ifp->if_softc; 775 struct mbuf *m; 776 777 TUNDEBUG(ifp, "starting\n"); 778 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 779 IFQ_LOCK(&ifp->if_snd); 780 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 781 if (m == NULL) { 782 IFQ_UNLOCK(&ifp->if_snd); 783 return; 784 } 785 IFQ_UNLOCK(&ifp->if_snd); 786 } 787 788 TUN_LOCK(tp); 789 if (tp->tun_flags & TUN_RWAIT) { 790 tp->tun_flags &= ~TUN_RWAIT; 791 wakeup(tp); 792 } 793 selwakeuppri(&tp->tun_rsel, PZERO + 1); 794 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 795 if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { 796 TUN_UNLOCK(tp); 797 pgsigio(&tp->tun_sigio, SIGIO, 0); 798 } else 799 TUN_UNLOCK(tp); 800 } 801 802 /* 803 * tunstart_l2 804 * 805 * queue packets from higher level ready to put out 806 */ 807 static void 808 tunstart_l2(struct ifnet *ifp) 809 { 810 struct tuntap_softc *tp = ifp->if_softc; 811 812 TUNDEBUG(ifp, "starting\n"); 813 814 /* 815 * do not junk pending output if we are in VMnet mode. 816 * XXX: can this do any harm because of queue overflow? 817 */ 818 819 TUN_LOCK(tp); 820 if (((tp->tun_flags & TUN_VMNET) == 0) && 821 ((tp->tun_flags & TUN_READY) != TUN_READY)) { 822 struct mbuf *m; 823 824 /* Unlocked read. */ 825 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags); 826 827 for (;;) { 828 IF_DEQUEUE(&ifp->if_snd, m); 829 if (m != NULL) { 830 m_freem(m); 831 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 832 } else 833 break; 834 } 835 TUN_UNLOCK(tp); 836 837 return; 838 } 839 840 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 841 842 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 843 if (tp->tun_flags & TUN_RWAIT) { 844 tp->tun_flags &= ~TUN_RWAIT; 845 wakeup(tp); 846 } 847 848 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) { 849 TUN_UNLOCK(tp); 850 pgsigio(&tp->tun_sigio, SIGIO, 0); 851 TUN_LOCK(tp); 852 } 853 854 selwakeuppri(&tp->tun_rsel, PZERO+1); 855 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 856 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 857 } 858 859 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 860 TUN_UNLOCK(tp); 861 } /* tunstart_l2 */ 862 863 864 /* XXX: should return an error code so it can fail. */ 865 static void 866 tuncreate(struct cdev *dev, struct tuntap_driver *drv) 867 { 868 struct tuntap_softc *sc; 869 struct ifnet *ifp; 870 struct ether_addr eaddr; 871 int iflags; 872 u_char type; 873 874 sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO); 875 mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF); 876 cv_init(&sc->tun_cv, "tun_condvar"); 877 sc->tun_flags = drv->ident_flags; 878 sc->tun_dev = dev; 879 sc->tun_drv = drv; 880 mtx_lock(&tunmtx); 881 TAILQ_INSERT_TAIL(&tunhead, sc, tun_list); 882 mtx_unlock(&tunmtx); 883 884 iflags = IFF_MULTICAST; 885 if ((sc->tun_flags & TUN_L2) != 0) { 886 type = IFT_ETHER; 887 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 888 } else { 889 type = IFT_PPP; 890 iflags |= IFF_POINTOPOINT; 891 } 892 ifp = sc->tun_ifp = if_alloc(type); 893 if (ifp == NULL) 894 panic("%s%d: failed to if_alloc() interface.\n", 895 drv->cdevsw.d_name, dev2unit(dev)); 896 ifp->if_softc = sc; 897 if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev)); 898 ifp->if_ioctl = tunifioctl; 899 ifp->if_flags = iflags; 900 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 901 knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx); 902 ifp->if_capabilities |= IFCAP_LINKSTATE; 903 ifp->if_capenable |= IFCAP_LINKSTATE; 904 905 if ((sc->tun_flags & TUN_L2) != 0) { 906 ifp->if_mtu = ETHERMTU; 907 ifp->if_init = tunifinit; 908 ifp->if_start = tunstart_l2; 909 910 ether_gen_addr(ifp, &eaddr); 911 ether_ifattach(ifp, eaddr.octet); 912 } else { 913 ifp->if_mtu = TUNMTU; 914 ifp->if_start = tunstart; 915 ifp->if_output = tunoutput; 916 917 ifp->if_snd.ifq_drv_maxlen = 0; 918 IFQ_SET_READY(&ifp->if_snd); 919 920 if_attach(ifp); 921 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 922 } 923 dev->si_drv1 = sc; 924 925 TUN_LOCK(sc); 926 sc->tun_flags |= TUN_INITED; 927 TUN_UNLOCK(sc); 928 929 TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", 930 ifp->if_xname, dev2unit(dev)); 931 } 932 933 static void 934 tunrename(void *arg __unused, struct ifnet *ifp) 935 { 936 struct tuntap_softc *tp; 937 int error; 938 939 if ((ifp->if_flags & IFF_RENAMING) == 0) 940 return; 941 942 if (tuntap_driver_from_ifnet(ifp) == NULL) 943 return; 944 945 /* 946 * We need to grab the ioctl sx long enough to make sure the softc is 947 * still there. If it is, we can safely try to busy the tun device. 948 * The busy may fail if the device is currently dying, in which case 949 * we do nothing. If it doesn't fail, the busy count stops the device 950 * from dying until we've created the alias (that will then be 951 * subsequently destroyed). 952 */ 953 sx_xlock(&tun_ioctl_sx); 954 tp = ifp->if_softc; 955 if (tp == NULL) { 956 sx_xunlock(&tun_ioctl_sx); 957 return; 958 } 959 error = tun_busy(tp); 960 sx_xunlock(&tun_ioctl_sx); 961 if (error != 0) 962 return; 963 if (tp->tun_alias != NULL) { 964 destroy_dev(tp->tun_alias); 965 tp->tun_alias = NULL; 966 } 967 968 if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0) 969 goto out; 970 971 /* 972 * Failure's ok, aliases are created on a best effort basis. If a 973 * tun user/consumer decides to rename the interface to conflict with 974 * another device (non-ifnet) on the system, we will assume they know 975 * what they are doing. make_dev_alias_p won't touch tun_alias on 976 * failure, so we use it but ignore the return value. 977 */ 978 make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s", 979 ifp->if_xname); 980 out: 981 tun_unbusy(tp); 982 } 983 984 static int 985 tunopen(struct cdev *dev, int flag, int mode, struct thread *td) 986 { 987 struct ifnet *ifp; 988 struct tuntap_driver *drv; 989 struct tuntap_softc *tp; 990 int error, tunflags; 991 992 tunflags = 0; 993 CURVNET_SET(TD_TO_VNET(td)); 994 error = tuntap_name2info(dev->si_name, NULL, &tunflags); 995 if (error != 0) { 996 CURVNET_RESTORE(); 997 return (error); /* Shouldn't happen */ 998 } 999 1000 if ((tunflags & TUN_L2) != 0) { 1001 /* Restrict? */ 1002 if (tap_allow_uopen == 0) { 1003 error = priv_check(td, PRIV_NET_TAP); 1004 if (error != 0) { 1005 CURVNET_RESTORE(); 1006 return (error); 1007 } 1008 } 1009 } 1010 1011 /* 1012 * XXXRW: Non-atomic test and set of dev->si_drv1 requires 1013 * synchronization. 1014 */ 1015 tp = dev->si_drv1; 1016 if (!tp) { 1017 drv = tuntap_driver_from_flags(tunflags); 1018 if (drv == NULL) { 1019 CURVNET_RESTORE(); 1020 return (ENXIO); 1021 } 1022 tuncreate(dev, drv); 1023 tp = dev->si_drv1; 1024 } 1025 1026 TUN_LOCK(tp); 1027 if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { 1028 TUN_UNLOCK(tp); 1029 CURVNET_RESTORE(); 1030 return (EBUSY); 1031 } 1032 1033 error = tun_busy_locked(tp); 1034 KASSERT(error == 0, ("Must be able to busy an unopen tunnel")); 1035 ifp = TUN2IFP(tp); 1036 1037 if ((tp->tun_flags & TUN_L2) != 0) { 1038 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet, 1039 sizeof(tp->tun_ether.octet)); 1040 1041 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1042 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1043 1044 if (tapuponopen) 1045 ifp->if_flags |= IFF_UP; 1046 } 1047 1048 tp->tun_pid = td->td_proc->p_pid; 1049 tp->tun_flags |= TUN_OPEN; 1050 1051 if_link_state_change(ifp, LINK_STATE_UP); 1052 TUNDEBUG(ifp, "open\n"); 1053 TUN_UNLOCK(tp); 1054 CURVNET_RESTORE(); 1055 return (0); 1056 } 1057 1058 /* 1059 * tunclose - close the device - mark i/f down & delete 1060 * routing info 1061 */ 1062 static int 1063 tunclose(struct cdev *dev, int foo, int bar, struct thread *td) 1064 { 1065 struct proc *p; 1066 struct tuntap_softc *tp; 1067 struct ifnet *ifp; 1068 bool l2tun; 1069 1070 p = td->td_proc; 1071 tp = dev->si_drv1; 1072 ifp = TUN2IFP(tp); 1073 1074 TUN_LOCK(tp); 1075 1076 /* 1077 * Realistically, we can't be obstinate here. This only means that the 1078 * tuntap device was closed out of order, and the last closer wasn't the 1079 * controller. These are still good to know about, though, as software 1080 * should avoid multiple processes with a tuntap device open and 1081 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in 1082 * parent). 1083 */ 1084 if (p->p_pid != tp->tun_pid) { 1085 log(LOG_INFO, 1086 "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n", 1087 p->p_pid, p->p_comm, dev->si_name); 1088 } 1089 1090 /* 1091 * junk all pending output 1092 */ 1093 CURVNET_SET(ifp->if_vnet); 1094 1095 l2tun = false; 1096 if ((tp->tun_flags & TUN_L2) != 0) { 1097 l2tun = true; 1098 IF_DRAIN(&ifp->if_snd); 1099 } else { 1100 IFQ_PURGE(&ifp->if_snd); 1101 } 1102 1103 /* For vmnet, we won't do most of the address/route bits */ 1104 if ((tp->tun_flags & TUN_VMNET) != 0 || 1105 (l2tun && (ifp->if_flags & IFF_LINK0) != 0)) 1106 goto out; 1107 1108 if (ifp->if_flags & IFF_UP) { 1109 TUN_UNLOCK(tp); 1110 if_down(ifp); 1111 TUN_LOCK(tp); 1112 } 1113 1114 /* Delete all addresses and routes which reference this interface. */ 1115 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1116 struct ifaddr *ifa; 1117 1118 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1119 TUN_UNLOCK(tp); 1120 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1121 /* deal w/IPv4 PtP destination; unlocked read */ 1122 if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) { 1123 rtinit(ifa, (int)RTM_DELETE, 1124 tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 1125 } else { 1126 rtinit(ifa, (int)RTM_DELETE, 0); 1127 } 1128 } 1129 if_purgeaddrs(ifp); 1130 TUN_LOCK(tp); 1131 } 1132 1133 out: 1134 if_link_state_change(ifp, LINK_STATE_DOWN); 1135 CURVNET_RESTORE(); 1136 1137 funsetown(&tp->tun_sigio); 1138 selwakeuppri(&tp->tun_rsel, PZERO + 1); 1139 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 1140 TUNDEBUG (ifp, "closed\n"); 1141 tp->tun_flags &= ~TUN_OPEN; 1142 tp->tun_pid = 0; 1143 1144 tun_unbusy_locked(tp); 1145 TUN_UNLOCK(tp); 1146 return (0); 1147 } 1148 1149 static void 1150 tuninit(struct ifnet *ifp) 1151 { 1152 struct tuntap_softc *tp = ifp->if_softc; 1153 #ifdef INET 1154 struct ifaddr *ifa; 1155 #endif 1156 1157 TUNDEBUG(ifp, "tuninit\n"); 1158 1159 TUN_LOCK(tp); 1160 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1161 if ((tp->tun_flags & TUN_L2) == 0) { 1162 ifp->if_flags |= IFF_UP; 1163 getmicrotime(&ifp->if_lastchange); 1164 #ifdef INET 1165 if_addr_rlock(ifp); 1166 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1167 if (ifa->ifa_addr->sa_family == AF_INET) { 1168 struct sockaddr_in *si; 1169 1170 si = (struct sockaddr_in *)ifa->ifa_addr; 1171 if (si->sin_addr.s_addr) 1172 tp->tun_flags |= TUN_IASET; 1173 1174 si = (struct sockaddr_in *)ifa->ifa_dstaddr; 1175 if (si && si->sin_addr.s_addr) 1176 tp->tun_flags |= TUN_DSTADDR; 1177 } 1178 } 1179 if_addr_runlock(ifp); 1180 #endif 1181 TUN_UNLOCK(tp); 1182 } else { 1183 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1184 TUN_UNLOCK(tp); 1185 /* attempt to start output */ 1186 tunstart_l2(ifp); 1187 } 1188 1189 } 1190 1191 /* 1192 * Used only for l2 tunnel. 1193 */ 1194 static void 1195 tunifinit(void *xtp) 1196 { 1197 struct tuntap_softc *tp; 1198 1199 tp = (struct tuntap_softc *)xtp; 1200 tuninit(tp->tun_ifp); 1201 } 1202 1203 /* 1204 * Process an ioctl request. 1205 */ 1206 static int 1207 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1208 { 1209 struct ifreq *ifr = (struct ifreq *)data; 1210 struct tuntap_softc *tp; 1211 struct ifstat *ifs; 1212 struct ifmediareq *ifmr; 1213 int dummy, error = 0; 1214 bool l2tun; 1215 1216 ifmr = NULL; 1217 sx_xlock(&tun_ioctl_sx); 1218 tp = ifp->if_softc; 1219 if (tp == NULL) { 1220 error = ENXIO; 1221 goto bad; 1222 } 1223 l2tun = (tp->tun_flags & TUN_L2) != 0; 1224 switch(cmd) { 1225 case SIOCGIFSTATUS: 1226 ifs = (struct ifstat *)data; 1227 TUN_LOCK(tp); 1228 if (tp->tun_pid) 1229 snprintf(ifs->ascii, sizeof(ifs->ascii), 1230 "\tOpened by PID %d\n", tp->tun_pid); 1231 else 1232 ifs->ascii[0] = '\0'; 1233 TUN_UNLOCK(tp); 1234 break; 1235 case SIOCSIFADDR: 1236 if (l2tun) 1237 error = ether_ioctl(ifp, cmd, data); 1238 else 1239 tuninit(ifp); 1240 if (error == 0) 1241 TUNDEBUG(ifp, "address set\n"); 1242 break; 1243 case SIOCSIFMTU: 1244 ifp->if_mtu = ifr->ifr_mtu; 1245 TUNDEBUG(ifp, "mtu set\n"); 1246 break; 1247 case SIOCSIFFLAGS: 1248 case SIOCADDMULTI: 1249 case SIOCDELMULTI: 1250 break; 1251 case SIOCGIFMEDIA: 1252 if (!l2tun) { 1253 error = EINVAL; 1254 break; 1255 } 1256 1257 ifmr = (struct ifmediareq *)data; 1258 dummy = ifmr->ifm_count; 1259 ifmr->ifm_count = 1; 1260 ifmr->ifm_status = IFM_AVALID; 1261 ifmr->ifm_active = IFM_ETHER; 1262 if (tp->tun_flags & TUN_OPEN) 1263 ifmr->ifm_status |= IFM_ACTIVE; 1264 ifmr->ifm_current = ifmr->ifm_active; 1265 if (dummy >= 1) { 1266 int media = IFM_ETHER; 1267 error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); 1268 } 1269 break; 1270 default: 1271 if (l2tun) { 1272 error = ether_ioctl(ifp, cmd, data); 1273 } else { 1274 error = EINVAL; 1275 } 1276 } 1277 bad: 1278 sx_xunlock(&tun_ioctl_sx); 1279 return (error); 1280 } 1281 1282 /* 1283 * tunoutput - queue packets from higher level ready to put out. 1284 */ 1285 static int 1286 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, 1287 struct route *ro) 1288 { 1289 struct tuntap_softc *tp = ifp->if_softc; 1290 u_short cached_tun_flags; 1291 int error; 1292 u_int32_t af; 1293 1294 TUNDEBUG (ifp, "tunoutput\n"); 1295 1296 #ifdef MAC 1297 error = mac_ifnet_check_transmit(ifp, m0); 1298 if (error) { 1299 m_freem(m0); 1300 return (error); 1301 } 1302 #endif 1303 1304 /* Could be unlocked read? */ 1305 TUN_LOCK(tp); 1306 cached_tun_flags = tp->tun_flags; 1307 TUN_UNLOCK(tp); 1308 if ((cached_tun_flags & TUN_READY) != TUN_READY) { 1309 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1310 m_freem (m0); 1311 return (EHOSTDOWN); 1312 } 1313 1314 if ((ifp->if_flags & IFF_UP) != IFF_UP) { 1315 m_freem (m0); 1316 return (EHOSTDOWN); 1317 } 1318 1319 /* BPF writes need to be handled specially. */ 1320 if (dst->sa_family == AF_UNSPEC) 1321 bcopy(dst->sa_data, &af, sizeof(af)); 1322 else 1323 af = dst->sa_family; 1324 1325 if (bpf_peers_present(ifp->if_bpf)) 1326 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); 1327 1328 /* prepend sockaddr? this may abort if the mbuf allocation fails */ 1329 if (cached_tun_flags & TUN_LMODE) { 1330 /* allocate space for sockaddr */ 1331 M_PREPEND(m0, dst->sa_len, M_NOWAIT); 1332 1333 /* if allocation failed drop packet */ 1334 if (m0 == NULL) { 1335 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1336 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1337 return (ENOBUFS); 1338 } else { 1339 bcopy(dst, m0->m_data, dst->sa_len); 1340 } 1341 } 1342 1343 if (cached_tun_flags & TUN_IFHEAD) { 1344 /* Prepend the address family */ 1345 M_PREPEND(m0, 4, M_NOWAIT); 1346 1347 /* if allocation failed drop packet */ 1348 if (m0 == NULL) { 1349 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1350 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1351 return (ENOBUFS); 1352 } else 1353 *(u_int32_t *)m0->m_data = htonl(af); 1354 } else { 1355 #ifdef INET 1356 if (af != AF_INET) 1357 #endif 1358 { 1359 m_freem(m0); 1360 return (EAFNOSUPPORT); 1361 } 1362 } 1363 1364 error = (ifp->if_transmit)(ifp, m0); 1365 if (error) 1366 return (ENOBUFS); 1367 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1368 return (0); 1369 } 1370 1371 /* 1372 * the cdevsw interface is now pretty minimal. 1373 */ 1374 static int 1375 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 1376 struct thread *td) 1377 { 1378 struct ifreq ifr, *ifrp; 1379 struct tuntap_softc *tp = dev->si_drv1; 1380 struct tuninfo *tunp; 1381 int error, iflags; 1382 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 1383 defined(COMPAT_FREEBSD4) 1384 int ival; 1385 #endif 1386 bool l2tun; 1387 1388 l2tun = (tp->tun_flags & TUN_L2) != 0; 1389 if (l2tun) { 1390 /* tap specific ioctls */ 1391 switch(cmd) { 1392 /* VMware/VMnet port ioctl's */ 1393 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 1394 defined(COMPAT_FREEBSD4) 1395 case _IO('V', 0): 1396 ival = IOCPARM_IVAL(data); 1397 data = (caddr_t)&ival; 1398 /* FALLTHROUGH */ 1399 #endif 1400 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 1401 iflags = *(int *)data; 1402 iflags &= TUN_VMIO_FLAG_MASK; 1403 iflags &= ~IFF_CANTCHANGE; 1404 iflags |= IFF_UP; 1405 1406 TUN_LOCK(tp); 1407 TUN2IFP(tp)->if_flags = iflags | 1408 (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE); 1409 TUN_UNLOCK(tp); 1410 1411 return (0); 1412 case SIOCGIFADDR: /* get MAC address of the remote side */ 1413 TUN_LOCK(tp); 1414 bcopy(&tp->tun_ether.octet, data, 1415 sizeof(tp->tun_ether.octet)); 1416 TUN_UNLOCK(tp); 1417 1418 return (0); 1419 case SIOCSIFADDR: /* set MAC address of the remote side */ 1420 TUN_LOCK(tp); 1421 bcopy(data, &tp->tun_ether.octet, 1422 sizeof(tp->tun_ether.octet)); 1423 TUN_UNLOCK(tp); 1424 1425 return (0); 1426 } 1427 1428 /* Fall through to the common ioctls if unhandled */ 1429 } else { 1430 switch (cmd) { 1431 case TUNSLMODE: 1432 TUN_LOCK(tp); 1433 if (*(int *)data) { 1434 tp->tun_flags |= TUN_LMODE; 1435 tp->tun_flags &= ~TUN_IFHEAD; 1436 } else 1437 tp->tun_flags &= ~TUN_LMODE; 1438 TUN_UNLOCK(tp); 1439 1440 return (0); 1441 case TUNSIFHEAD: 1442 TUN_LOCK(tp); 1443 if (*(int *)data) { 1444 tp->tun_flags |= TUN_IFHEAD; 1445 tp->tun_flags &= ~TUN_LMODE; 1446 } else 1447 tp->tun_flags &= ~TUN_IFHEAD; 1448 TUN_UNLOCK(tp); 1449 1450 return (0); 1451 case TUNGIFHEAD: 1452 TUN_LOCK(tp); 1453 *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; 1454 TUN_UNLOCK(tp); 1455 1456 return (0); 1457 case TUNSIFMODE: 1458 /* deny this if UP */ 1459 if (TUN2IFP(tp)->if_flags & IFF_UP) 1460 return (EBUSY); 1461 1462 switch (*(int *)data & ~IFF_MULTICAST) { 1463 case IFF_POINTOPOINT: 1464 case IFF_BROADCAST: 1465 TUN_LOCK(tp); 1466 TUN2IFP(tp)->if_flags &= 1467 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); 1468 TUN2IFP(tp)->if_flags |= *(int *)data; 1469 TUN_UNLOCK(tp); 1470 1471 break; 1472 default: 1473 return (EINVAL); 1474 } 1475 1476 return (0); 1477 case TUNSIFPID: 1478 TUN_LOCK(tp); 1479 tp->tun_pid = curthread->td_proc->p_pid; 1480 TUN_UNLOCK(tp); 1481 1482 return (0); 1483 } 1484 /* Fall through to the common ioctls if unhandled */ 1485 } 1486 1487 switch (cmd) { 1488 case TUNGIFNAME: 1489 ifrp = (struct ifreq *)data; 1490 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ); 1491 1492 return (0); 1493 case TUNSIFINFO: 1494 tunp = (struct tuninfo *)data; 1495 if (TUN2IFP(tp)->if_type != tunp->type) 1496 return (EPROTOTYPE); 1497 TUN_LOCK(tp); 1498 if (TUN2IFP(tp)->if_mtu != tunp->mtu) { 1499 strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); 1500 ifr.ifr_mtu = tunp->mtu; 1501 CURVNET_SET(TUN2IFP(tp)->if_vnet); 1502 error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), 1503 (caddr_t)&ifr, td); 1504 CURVNET_RESTORE(); 1505 if (error) { 1506 TUN_UNLOCK(tp); 1507 return (error); 1508 } 1509 } 1510 TUN2IFP(tp)->if_baudrate = tunp->baudrate; 1511 TUN_UNLOCK(tp); 1512 break; 1513 case TUNGIFINFO: 1514 tunp = (struct tuninfo *)data; 1515 TUN_LOCK(tp); 1516 tunp->mtu = TUN2IFP(tp)->if_mtu; 1517 tunp->type = TUN2IFP(tp)->if_type; 1518 tunp->baudrate = TUN2IFP(tp)->if_baudrate; 1519 TUN_UNLOCK(tp); 1520 break; 1521 case TUNSDEBUG: 1522 tundebug = *(int *)data; 1523 break; 1524 case TUNGDEBUG: 1525 *(int *)data = tundebug; 1526 break; 1527 case FIONBIO: 1528 break; 1529 case FIOASYNC: 1530 TUN_LOCK(tp); 1531 if (*(int *)data) 1532 tp->tun_flags |= TUN_ASYNC; 1533 else 1534 tp->tun_flags &= ~TUN_ASYNC; 1535 TUN_UNLOCK(tp); 1536 break; 1537 case FIONREAD: 1538 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { 1539 struct mbuf *mb; 1540 IFQ_LOCK(&TUN2IFP(tp)->if_snd); 1541 IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); 1542 for (*(int *)data = 0; mb != NULL; mb = mb->m_next) 1543 *(int *)data += mb->m_len; 1544 IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); 1545 } else 1546 *(int *)data = 0; 1547 break; 1548 case FIOSETOWN: 1549 return (fsetown(*(int *)data, &tp->tun_sigio)); 1550 1551 case FIOGETOWN: 1552 *(int *)data = fgetown(&tp->tun_sigio); 1553 return (0); 1554 1555 /* This is deprecated, FIOSETOWN should be used instead. */ 1556 case TIOCSPGRP: 1557 return (fsetown(-(*(int *)data), &tp->tun_sigio)); 1558 1559 /* This is deprecated, FIOGETOWN should be used instead. */ 1560 case TIOCGPGRP: 1561 *(int *)data = -fgetown(&tp->tun_sigio); 1562 return (0); 1563 1564 default: 1565 return (ENOTTY); 1566 } 1567 return (0); 1568 } 1569 1570 /* 1571 * The cdevsw read interface - reads a packet at a time, or at 1572 * least as much of a packet as can be read. 1573 */ 1574 static int 1575 tunread(struct cdev *dev, struct uio *uio, int flag) 1576 { 1577 struct tuntap_softc *tp = dev->si_drv1; 1578 struct ifnet *ifp = TUN2IFP(tp); 1579 struct mbuf *m; 1580 int error=0, len; 1581 1582 TUNDEBUG (ifp, "read\n"); 1583 TUN_LOCK(tp); 1584 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 1585 TUN_UNLOCK(tp); 1586 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1587 return (EHOSTDOWN); 1588 } 1589 1590 tp->tun_flags &= ~TUN_RWAIT; 1591 1592 for (;;) { 1593 IFQ_DEQUEUE(&ifp->if_snd, m); 1594 if (m != NULL) 1595 break; 1596 if (flag & O_NONBLOCK) { 1597 TUN_UNLOCK(tp); 1598 return (EWOULDBLOCK); 1599 } 1600 tp->tun_flags |= TUN_RWAIT; 1601 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), 1602 "tunread", 0); 1603 if (error != 0) { 1604 TUN_UNLOCK(tp); 1605 return (error); 1606 } 1607 } 1608 TUN_UNLOCK(tp); 1609 1610 if ((tp->tun_flags & TUN_L2) != 0) 1611 BPF_MTAP(ifp, m); 1612 1613 while (m && uio->uio_resid > 0 && error == 0) { 1614 len = min(uio->uio_resid, m->m_len); 1615 if (len != 0) 1616 error = uiomove(mtod(m, void *), len, uio); 1617 m = m_free(m); 1618 } 1619 1620 if (m) { 1621 TUNDEBUG(ifp, "Dropping mbuf\n"); 1622 m_freem(m); 1623 } 1624 return (error); 1625 } 1626 1627 static int 1628 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m) 1629 { 1630 struct ether_header *eh; 1631 struct ifnet *ifp; 1632 1633 ifp = TUN2IFP(tp); 1634 1635 /* 1636 * Only pass a unicast frame to ether_input(), if it would 1637 * actually have been received by non-virtual hardware. 1638 */ 1639 if (m->m_len < sizeof(struct ether_header)) { 1640 m_freem(m); 1641 return (0); 1642 } 1643 1644 eh = mtod(m, struct ether_header *); 1645 1646 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 1647 !ETHER_IS_MULTICAST(eh->ether_dhost) && 1648 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 1649 m_freem(m); 1650 return (0); 1651 } 1652 1653 /* Pass packet up to parent. */ 1654 CURVNET_SET(ifp->if_vnet); 1655 (*ifp->if_input)(ifp, m); 1656 CURVNET_RESTORE(); 1657 /* ibytes are counted in parent */ 1658 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1659 return (0); 1660 } 1661 1662 static int 1663 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m) 1664 { 1665 struct epoch_tracker et; 1666 struct ifnet *ifp; 1667 int family, isr; 1668 1669 ifp = TUN2IFP(tp); 1670 /* Could be unlocked read? */ 1671 TUN_LOCK(tp); 1672 if (tp->tun_flags & TUN_IFHEAD) { 1673 TUN_UNLOCK(tp); 1674 if (m->m_len < sizeof(family) && 1675 (m = m_pullup(m, sizeof(family))) == NULL) 1676 return (ENOBUFS); 1677 family = ntohl(*mtod(m, u_int32_t *)); 1678 m_adj(m, sizeof(family)); 1679 } else { 1680 TUN_UNLOCK(tp); 1681 family = AF_INET; 1682 } 1683 1684 BPF_MTAP2(ifp, &family, sizeof(family), m); 1685 1686 switch (family) { 1687 #ifdef INET 1688 case AF_INET: 1689 isr = NETISR_IP; 1690 break; 1691 #endif 1692 #ifdef INET6 1693 case AF_INET6: 1694 isr = NETISR_IPV6; 1695 break; 1696 #endif 1697 default: 1698 m_freem(m); 1699 return (EAFNOSUPPORT); 1700 } 1701 random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); 1702 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1703 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1704 CURVNET_SET(ifp->if_vnet); 1705 M_SETFIB(m, ifp->if_fib); 1706 NET_EPOCH_ENTER(et); 1707 netisr_dispatch(isr, m); 1708 NET_EPOCH_EXIT(et); 1709 CURVNET_RESTORE(); 1710 return (0); 1711 } 1712 1713 /* 1714 * the cdevsw write interface - an atomic write is a packet - or else! 1715 */ 1716 static int 1717 tunwrite(struct cdev *dev, struct uio *uio, int flag) 1718 { 1719 struct tuntap_softc *tp; 1720 struct ifnet *ifp; 1721 struct mbuf *m; 1722 uint32_t mru; 1723 int align; 1724 bool l2tun; 1725 1726 tp = dev->si_drv1; 1727 ifp = TUN2IFP(tp); 1728 TUNDEBUG(ifp, "tunwrite\n"); 1729 if ((ifp->if_flags & IFF_UP) != IFF_UP) 1730 /* ignore silently */ 1731 return (0); 1732 1733 if (uio->uio_resid == 0) 1734 return (0); 1735 1736 l2tun = (tp->tun_flags & TUN_L2) != 0; 1737 align = 0; 1738 mru = l2tun ? TAPMRU : TUNMRU; 1739 if (l2tun) 1740 align = ETHER_ALIGN; 1741 else if ((tp->tun_flags & TUN_IFHEAD) != 0) 1742 mru += sizeof(uint32_t); /* family */ 1743 if (uio->uio_resid < 0 || uio->uio_resid > mru) { 1744 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); 1745 return (EIO); 1746 } 1747 1748 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) { 1749 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1750 return (ENOBUFS); 1751 } 1752 1753 m->m_pkthdr.rcvif = ifp; 1754 #ifdef MAC 1755 mac_ifnet_create_mbuf(ifp, m); 1756 #endif 1757 1758 if (l2tun) 1759 return (tunwrite_l2(tp, m)); 1760 1761 return (tunwrite_l3(tp, m)); 1762 } 1763 1764 /* 1765 * tunpoll - the poll interface, this is only useful on reads 1766 * really. The write detect always returns true, write never blocks 1767 * anyway, it either accepts the packet or drops it. 1768 */ 1769 static int 1770 tunpoll(struct cdev *dev, int events, struct thread *td) 1771 { 1772 struct tuntap_softc *tp = dev->si_drv1; 1773 struct ifnet *ifp = TUN2IFP(tp); 1774 int revents = 0; 1775 1776 TUNDEBUG(ifp, "tunpoll\n"); 1777 1778 if (events & (POLLIN | POLLRDNORM)) { 1779 IFQ_LOCK(&ifp->if_snd); 1780 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1781 TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); 1782 revents |= events & (POLLIN | POLLRDNORM); 1783 } else { 1784 TUNDEBUG(ifp, "tunpoll waiting\n"); 1785 selrecord(td, &tp->tun_rsel); 1786 } 1787 IFQ_UNLOCK(&ifp->if_snd); 1788 } 1789 revents |= events & (POLLOUT | POLLWRNORM); 1790 1791 return (revents); 1792 } 1793 1794 /* 1795 * tunkqfilter - support for the kevent() system call. 1796 */ 1797 static int 1798 tunkqfilter(struct cdev *dev, struct knote *kn) 1799 { 1800 struct tuntap_softc *tp = dev->si_drv1; 1801 struct ifnet *ifp = TUN2IFP(tp); 1802 1803 switch(kn->kn_filter) { 1804 case EVFILT_READ: 1805 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", 1806 ifp->if_xname, dev2unit(dev)); 1807 kn->kn_fop = &tun_read_filterops; 1808 break; 1809 1810 case EVFILT_WRITE: 1811 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1812 ifp->if_xname, dev2unit(dev)); 1813 kn->kn_fop = &tun_write_filterops; 1814 break; 1815 1816 default: 1817 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", 1818 ifp->if_xname, dev2unit(dev)); 1819 return(EINVAL); 1820 } 1821 1822 kn->kn_hook = tp; 1823 knlist_add(&tp->tun_rsel.si_note, kn, 0); 1824 1825 return (0); 1826 } 1827 1828 /* 1829 * Return true of there is data in the interface queue. 1830 */ 1831 static int 1832 tunkqread(struct knote *kn, long hint) 1833 { 1834 int ret; 1835 struct tuntap_softc *tp = kn->kn_hook; 1836 struct cdev *dev = tp->tun_dev; 1837 struct ifnet *ifp = TUN2IFP(tp); 1838 1839 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1840 TUNDEBUG(ifp, 1841 "%s have data in the queue. Len = %d, minor = %#x\n", 1842 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1843 ret = 1; 1844 } else { 1845 TUNDEBUG(ifp, 1846 "%s waiting for data, minor = %#x\n", ifp->if_xname, 1847 dev2unit(dev)); 1848 ret = 0; 1849 } 1850 1851 return (ret); 1852 } 1853 1854 /* 1855 * Always can write, always return MTU in kn->data. 1856 */ 1857 static int 1858 tunkqwrite(struct knote *kn, long hint) 1859 { 1860 struct tuntap_softc *tp = kn->kn_hook; 1861 struct ifnet *ifp = TUN2IFP(tp); 1862 1863 kn->kn_data = ifp->if_mtu; 1864 1865 return (1); 1866 } 1867 1868 static void 1869 tunkqdetach(struct knote *kn) 1870 { 1871 struct tuntap_softc *tp = kn->kn_hook; 1872 1873 knlist_remove(&tp->tun_rsel.si_note, kn, 0); 1874 } 1875