1 /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2 /*- 3 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4 * 5 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 6 * All rights reserved. 7 * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * BASED ON: 32 * ------------------------------------------------------------------------- 33 * 34 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 35 * Nottingham University 1987. 36 * 37 * This source may be freely distributed, however I would be interested 38 * in any changes that are made. 39 * 40 * This driver takes packets off the IP i/f and hands them up to a 41 * user process to have its wicked way with. This driver has it's 42 * roots in a similar driver written by Phil Cockcroft (formerly) at 43 * UCL. This driver is based much more on read/write/poll mode of 44 * operation though. 45 * 46 * $FreeBSD$ 47 */ 48 49 #include "opt_inet.h" 50 #include "opt_inet6.h" 51 52 #include <sys/param.h> 53 #include <sys/lock.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/systm.h> 57 #include <sys/jail.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/socket.h> 61 #include <sys/eventhandler.h> 62 #include <sys/fcntl.h> 63 #include <sys/filio.h> 64 #include <sys/sockio.h> 65 #include <sys/sx.h> 66 #include <sys/syslog.h> 67 #include <sys/ttycom.h> 68 #include <sys/poll.h> 69 #include <sys/selinfo.h> 70 #include <sys/signalvar.h> 71 #include <sys/filedesc.h> 72 #include <sys/kernel.h> 73 #include <sys/sysctl.h> 74 #include <sys/conf.h> 75 #include <sys/uio.h> 76 #include <sys/malloc.h> 77 #include <sys/random.h> 78 #include <sys/ctype.h> 79 80 #include <net/ethernet.h> 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/if_clone.h> 84 #include <net/if_dl.h> 85 #include <net/if_media.h> 86 #include <net/if_types.h> 87 #include <net/if_vlan_var.h> 88 #include <net/netisr.h> 89 #include <net/route.h> 90 #include <net/vnet.h> 91 #include <netinet/in.h> 92 #ifdef INET 93 #include <netinet/ip.h> 94 #endif 95 #ifdef INET6 96 #include <netinet/ip6.h> 97 #include <netinet6/ip6_var.h> 98 #endif 99 #include <netinet/udp.h> 100 #include <netinet/tcp.h> 101 #include <net/bpf.h> 102 #include <net/if_tap.h> 103 #include <net/if_tun.h> 104 105 #include <dev/virtio/network/virtio_net.h> 106 107 #include <sys/queue.h> 108 #include <sys/condvar.h> 109 #include <security/mac/mac_framework.h> 110 111 struct tuntap_driver; 112 113 /* 114 * tun_list is protected by global tunmtx. Other mutable fields are 115 * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is 116 * static for the duration of a tunnel interface. 117 */ 118 struct tuntap_softc { 119 TAILQ_ENTRY(tuntap_softc) tun_list; 120 struct cdev *tun_alias; 121 struct cdev *tun_dev; 122 u_short tun_flags; /* misc flags */ 123 #define TUN_OPEN 0x0001 124 #define TUN_INITED 0x0002 125 #define TUN_UNUSED1 0x0008 126 #define TUN_DSTADDR 0x0010 127 #define TUN_LMODE 0x0020 128 #define TUN_RWAIT 0x0040 129 #define TUN_ASYNC 0x0080 130 #define TUN_IFHEAD 0x0100 131 #define TUN_DYING 0x0200 132 #define TUN_L2 0x0400 133 #define TUN_VMNET 0x0800 134 135 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) 136 #define TUN_READY (TUN_OPEN | TUN_INITED) 137 138 pid_t tun_pid; /* owning pid */ 139 struct ifnet *tun_ifp; /* the interface */ 140 struct sigio *tun_sigio; /* async I/O info */ 141 struct tuntap_driver *tun_drv; /* appropriate driver */ 142 struct selinfo tun_rsel; /* read select */ 143 struct mtx tun_mtx; /* softc field mutex */ 144 struct cv tun_cv; /* for ref'd dev destroy */ 145 struct ether_addr tun_ether; /* remote address */ 146 int tun_busy; /* busy count */ 147 int tun_vhdrlen; /* virtio-net header length */ 148 }; 149 #define TUN2IFP(sc) ((sc)->tun_ifp) 150 151 #define TUNDEBUG if (tundebug) if_printf 152 153 #define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx) 154 #define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx) 155 #define TUN_LOCK_ASSERT(tp) mtx_assert(&(tp)->tun_mtx, MA_OWNED); 156 157 #define TUN_VMIO_FLAG_MASK 0x0fff 158 159 /* 160 * Interface capabilities of a tap device that supports the virtio-net 161 * header. 162 */ 163 #define TAP_VNET_HDR_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 \ 164 | IFCAP_VLAN_HWCSUM \ 165 | IFCAP_TSO | IFCAP_LRO \ 166 | IFCAP_VLAN_HWTSO) 167 168 #define TAP_ALL_OFFLOAD (CSUM_TSO | CSUM_TCP | CSUM_UDP |\ 169 CSUM_TCP_IPV6 | CSUM_UDP_IPV6) 170 171 172 /* 173 * All mutable global variables in if_tun are locked using tunmtx, with 174 * the exception of tundebug, which is used unlocked, and the drivers' *clones, 175 * which are static after setup. 176 */ 177 static struct mtx tunmtx; 178 static eventhandler_tag arrival_tag; 179 static eventhandler_tag clone_tag; 180 static const char tunname[] = "tun"; 181 static const char tapname[] = "tap"; 182 static const char vmnetname[] = "vmnet"; 183 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); 184 static int tundebug = 0; 185 static int tundclone = 1; 186 static int tap_allow_uopen = 0; /* allow user devfs cloning */ 187 static int tapuponopen = 0; /* IFF_UP on open() */ 188 static int tapdclone = 1; /* enable devfs cloning */ 189 190 static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); 191 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 192 193 static struct sx tun_ioctl_sx; 194 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); 195 196 SYSCTL_DECL(_net_link); 197 /* tun */ 198 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 199 "IP tunnel software network interface"); 200 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, 201 "Enable legacy devfs interface creation"); 202 203 /* tap */ 204 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 205 "Ethernet tunnel software network interface"); 206 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0, 207 "Enable legacy devfs interface creation for all users"); 208 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 209 "Bring interface up when /dev/tap is opened"); 210 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 211 "Enable legacy devfs interface creation"); 212 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, ""); 213 214 static int tun_create_device(struct tuntap_driver *drv, int unit, 215 struct ucred *cr, struct cdev **dev, const char *name); 216 static int tun_busy_locked(struct tuntap_softc *tp); 217 static void tun_unbusy_locked(struct tuntap_softc *tp); 218 static int tun_busy(struct tuntap_softc *tp); 219 static void tun_unbusy(struct tuntap_softc *tp); 220 221 static int tuntap_name2info(const char *name, int *unit, int *flags); 222 static void tunclone(void *arg, struct ucred *cred, char *name, 223 int namelen, struct cdev **dev); 224 static void tuncreate(struct cdev *dev); 225 static void tundtor(void *data); 226 static void tunrename(void *arg, struct ifnet *ifp); 227 static int tunifioctl(struct ifnet *, u_long, caddr_t); 228 static void tuninit(struct ifnet *); 229 static void tunifinit(void *xtp); 230 static int tuntapmodevent(module_t, int, void *); 231 static int tunoutput(struct ifnet *, struct mbuf *, 232 const struct sockaddr *, struct route *ro); 233 static void tunstart(struct ifnet *); 234 static void tunstart_l2(struct ifnet *); 235 236 static int tun_clone_match(struct if_clone *ifc, const char *name); 237 static int tap_clone_match(struct if_clone *ifc, const char *name); 238 static int vmnet_clone_match(struct if_clone *ifc, const char *name); 239 static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); 240 static int tun_clone_destroy(struct if_clone *, struct ifnet *); 241 static void tun_vnethdr_set(struct ifnet *ifp, int vhdrlen); 242 243 static d_open_t tunopen; 244 static d_read_t tunread; 245 static d_write_t tunwrite; 246 static d_ioctl_t tunioctl; 247 static d_poll_t tunpoll; 248 static d_kqfilter_t tunkqfilter; 249 250 static int tunkqread(struct knote *, long); 251 static int tunkqwrite(struct knote *, long); 252 static void tunkqdetach(struct knote *); 253 254 static struct filterops tun_read_filterops = { 255 .f_isfd = 1, 256 .f_attach = NULL, 257 .f_detach = tunkqdetach, 258 .f_event = tunkqread, 259 }; 260 261 static struct filterops tun_write_filterops = { 262 .f_isfd = 1, 263 .f_attach = NULL, 264 .f_detach = tunkqdetach, 265 .f_event = tunkqwrite, 266 }; 267 268 static struct tuntap_driver { 269 struct cdevsw cdevsw; 270 int ident_flags; 271 struct unrhdr *unrhdr; 272 struct clonedevs *clones; 273 ifc_match_t *clone_match_fn; 274 ifc_create_t *clone_create_fn; 275 ifc_destroy_t *clone_destroy_fn; 276 } tuntap_drivers[] = { 277 { 278 .ident_flags = 0, 279 .cdevsw = { 280 .d_version = D_VERSION, 281 .d_flags = D_NEEDMINOR, 282 .d_open = tunopen, 283 .d_read = tunread, 284 .d_write = tunwrite, 285 .d_ioctl = tunioctl, 286 .d_poll = tunpoll, 287 .d_kqfilter = tunkqfilter, 288 .d_name = tunname, 289 }, 290 .clone_match_fn = tun_clone_match, 291 .clone_create_fn = tun_clone_create, 292 .clone_destroy_fn = tun_clone_destroy, 293 }, 294 { 295 .ident_flags = TUN_L2, 296 .cdevsw = { 297 .d_version = D_VERSION, 298 .d_flags = D_NEEDMINOR, 299 .d_open = tunopen, 300 .d_read = tunread, 301 .d_write = tunwrite, 302 .d_ioctl = tunioctl, 303 .d_poll = tunpoll, 304 .d_kqfilter = tunkqfilter, 305 .d_name = tapname, 306 }, 307 .clone_match_fn = tap_clone_match, 308 .clone_create_fn = tun_clone_create, 309 .clone_destroy_fn = tun_clone_destroy, 310 }, 311 { 312 .ident_flags = TUN_L2 | TUN_VMNET, 313 .cdevsw = { 314 .d_version = D_VERSION, 315 .d_flags = D_NEEDMINOR, 316 .d_open = tunopen, 317 .d_read = tunread, 318 .d_write = tunwrite, 319 .d_ioctl = tunioctl, 320 .d_poll = tunpoll, 321 .d_kqfilter = tunkqfilter, 322 .d_name = vmnetname, 323 }, 324 .clone_match_fn = vmnet_clone_match, 325 .clone_create_fn = tun_clone_create, 326 .clone_destroy_fn = tun_clone_destroy, 327 }, 328 }; 329 330 struct tuntap_driver_cloner { 331 SLIST_ENTRY(tuntap_driver_cloner) link; 332 struct tuntap_driver *drv; 333 struct if_clone *cloner; 334 }; 335 336 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) = 337 SLIST_HEAD_INITIALIZER(tuntap_driver_cloners); 338 339 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners) 340 341 /* 342 * Mechanism for marking a tunnel device as busy so that we can safely do some 343 * orthogonal operations (such as operations on devices) without racing against 344 * tun_destroy. tun_destroy will wait on the condvar if we're at all busy or 345 * open, to be woken up when the condition is alleviated. 346 */ 347 static int 348 tun_busy_locked(struct tuntap_softc *tp) 349 { 350 351 TUN_LOCK_ASSERT(tp); 352 if ((tp->tun_flags & TUN_DYING) != 0) { 353 /* 354 * Perhaps unintuitive, but the device is busy going away. 355 * Other interpretations of EBUSY from tun_busy make little 356 * sense, since making a busy device even more busy doesn't 357 * sound like a problem. 358 */ 359 return (EBUSY); 360 } 361 362 ++tp->tun_busy; 363 return (0); 364 } 365 366 static void 367 tun_unbusy_locked(struct tuntap_softc *tp) 368 { 369 370 TUN_LOCK_ASSERT(tp); 371 KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel")); 372 373 --tp->tun_busy; 374 /* Wake up anything that may be waiting on our busy tunnel. */ 375 if (tp->tun_busy == 0) 376 cv_broadcast(&tp->tun_cv); 377 } 378 379 static int 380 tun_busy(struct tuntap_softc *tp) 381 { 382 int ret; 383 384 TUN_LOCK(tp); 385 ret = tun_busy_locked(tp); 386 TUN_UNLOCK(tp); 387 return (ret); 388 } 389 390 391 static void 392 tun_unbusy(struct tuntap_softc *tp) 393 { 394 395 TUN_LOCK(tp); 396 tun_unbusy_locked(tp); 397 TUN_UNLOCK(tp); 398 } 399 400 /* 401 * Sets unit and/or flags given the device name. Must be called with correct 402 * vnet context. 403 */ 404 static int 405 tuntap_name2info(const char *name, int *outunit, int *outflags) 406 { 407 struct tuntap_driver *drv; 408 struct tuntap_driver_cloner *drvc; 409 char *dname; 410 int flags, unit; 411 bool found; 412 413 if (name == NULL) 414 return (EINVAL); 415 416 /* 417 * Needed for dev_stdclone, but dev_stdclone will not modify, it just 418 * wants to be able to pass back a char * through the second param. We 419 * will always set that as NULL here, so we'll fake it. 420 */ 421 dname = __DECONST(char *, name); 422 found = false; 423 424 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 425 ("tuntap_driver_cloners failed to initialize")); 426 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 427 KASSERT(drvc->drv != NULL, 428 ("tuntap_driver_cloners entry not properly initialized")); 429 drv = drvc->drv; 430 431 if (strcmp(name, drv->cdevsw.d_name) == 0) { 432 found = true; 433 unit = -1; 434 flags = drv->ident_flags; 435 break; 436 } 437 438 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) { 439 found = true; 440 flags = drv->ident_flags; 441 break; 442 } 443 } 444 445 if (!found) 446 return (ENXIO); 447 448 if (outunit != NULL) 449 *outunit = unit; 450 if (outflags != NULL) 451 *outflags = flags; 452 return (0); 453 } 454 455 /* 456 * Get driver information from a set of flags specified. Masks the identifying 457 * part of the flags and compares it against all of the available 458 * tuntap_drivers. Must be called with correct vnet context. 459 */ 460 static struct tuntap_driver * 461 tuntap_driver_from_flags(int tun_flags) 462 { 463 struct tuntap_driver *drv; 464 struct tuntap_driver_cloner *drvc; 465 466 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 467 ("tuntap_driver_cloners failed to initialize")); 468 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 469 KASSERT(drvc->drv != NULL, 470 ("tuntap_driver_cloners entry not properly initialized")); 471 drv = drvc->drv; 472 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags) 473 return (drv); 474 } 475 476 return (NULL); 477 } 478 479 480 481 static int 482 tun_clone_match(struct if_clone *ifc, const char *name) 483 { 484 int tunflags; 485 486 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 487 if ((tunflags & TUN_L2) == 0) 488 return (1); 489 } 490 491 return (0); 492 } 493 494 static int 495 tap_clone_match(struct if_clone *ifc, const char *name) 496 { 497 int tunflags; 498 499 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 500 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2) 501 return (1); 502 } 503 504 return (0); 505 } 506 507 static int 508 vmnet_clone_match(struct if_clone *ifc, const char *name) 509 { 510 int tunflags; 511 512 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 513 if ((tunflags & TUN_VMNET) != 0) 514 return (1); 515 } 516 517 return (0); 518 } 519 520 static int 521 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 522 { 523 struct tuntap_driver *drv; 524 struct cdev *dev; 525 int err, i, tunflags, unit; 526 527 tunflags = 0; 528 /* The name here tells us exactly what we're creating */ 529 err = tuntap_name2info(name, &unit, &tunflags); 530 if (err != 0) 531 return (err); 532 533 drv = tuntap_driver_from_flags(tunflags); 534 if (drv == NULL) 535 return (ENXIO); 536 537 if (unit != -1) { 538 /* If this unit number is still available that's okay. */ 539 if (alloc_unr_specific(drv->unrhdr, unit) == -1) 540 return (EEXIST); 541 } else { 542 unit = alloc_unr(drv->unrhdr); 543 } 544 545 snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit); 546 547 /* find any existing device, or allocate new unit number */ 548 dev = NULL; 549 i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0); 550 /* No preexisting struct cdev *, create one */ 551 if (i != 0) 552 i = tun_create_device(drv, unit, NULL, &dev, name); 553 if (i == 0) 554 tuncreate(dev); 555 556 return (i); 557 } 558 559 static void 560 tunclone(void *arg, struct ucred *cred, char *name, int namelen, 561 struct cdev **dev) 562 { 563 char devname[SPECNAMELEN + 1]; 564 struct tuntap_driver *drv; 565 int append_unit, i, u, tunflags; 566 bool mayclone; 567 568 if (*dev != NULL) 569 return; 570 571 tunflags = 0; 572 CURVNET_SET(CRED_TO_VNET(cred)); 573 if (tuntap_name2info(name, &u, &tunflags) != 0) 574 goto out; /* Not recognized */ 575 576 if (u != -1 && u > IF_MAXUNIT) 577 goto out; /* Unit number too high */ 578 579 mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0; 580 if ((tunflags & TUN_L2) != 0) { 581 /* tap/vmnet allow user open with a sysctl */ 582 mayclone = (mayclone || tap_allow_uopen) && tapdclone; 583 } else { 584 mayclone = mayclone && tundclone; 585 } 586 587 /* 588 * If tun cloning is enabled, only the superuser can create an 589 * interface. 590 */ 591 if (!mayclone) 592 goto out; 593 594 if (u == -1) 595 append_unit = 1; 596 else 597 append_unit = 0; 598 599 drv = tuntap_driver_from_flags(tunflags); 600 if (drv == NULL) 601 goto out; 602 603 /* find any existing device, or allocate new unit number */ 604 i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0); 605 if (i) { 606 if (append_unit) { 607 namelen = snprintf(devname, sizeof(devname), "%s%d", 608 name, u); 609 name = devname; 610 } 611 612 i = tun_create_device(drv, u, cred, dev, name); 613 } 614 if (i == 0) 615 if_clone_create(name, namelen, NULL); 616 out: 617 CURVNET_RESTORE(); 618 } 619 620 static void 621 tun_destroy(struct tuntap_softc *tp) 622 { 623 624 TUN_LOCK(tp); 625 tp->tun_flags |= TUN_DYING; 626 if (tp->tun_busy != 0) 627 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); 628 else 629 TUN_UNLOCK(tp); 630 631 CURVNET_SET(TUN2IFP(tp)->if_vnet); 632 633 /* destroy_dev will take care of any alias. */ 634 destroy_dev(tp->tun_dev); 635 seldrain(&tp->tun_rsel); 636 knlist_clear(&tp->tun_rsel.si_note, 0); 637 knlist_destroy(&tp->tun_rsel.si_note); 638 if ((tp->tun_flags & TUN_L2) != 0) { 639 ether_ifdetach(TUN2IFP(tp)); 640 } else { 641 bpfdetach(TUN2IFP(tp)); 642 if_detach(TUN2IFP(tp)); 643 } 644 sx_xlock(&tun_ioctl_sx); 645 TUN2IFP(tp)->if_softc = NULL; 646 sx_xunlock(&tun_ioctl_sx); 647 free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); 648 if_free(TUN2IFP(tp)); 649 mtx_destroy(&tp->tun_mtx); 650 cv_destroy(&tp->tun_cv); 651 free(tp, M_TUN); 652 CURVNET_RESTORE(); 653 } 654 655 static int 656 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp) 657 { 658 struct tuntap_softc *tp = ifp->if_softc; 659 660 mtx_lock(&tunmtx); 661 TAILQ_REMOVE(&tunhead, tp, tun_list); 662 mtx_unlock(&tunmtx); 663 tun_destroy(tp); 664 665 return (0); 666 } 667 668 static void 669 vnet_tun_init(const void *unused __unused) 670 { 671 struct tuntap_driver *drv; 672 struct tuntap_driver_cloner *drvc; 673 int i; 674 675 for (i = 0; i < nitems(tuntap_drivers); ++i) { 676 drv = &tuntap_drivers[i]; 677 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO); 678 679 drvc->drv = drv; 680 drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0, 681 drv->clone_match_fn, drv->clone_create_fn, 682 drv->clone_destroy_fn); 683 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link); 684 }; 685 } 686 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, 687 vnet_tun_init, NULL); 688 689 static void 690 vnet_tun_uninit(const void *unused __unused) 691 { 692 struct tuntap_driver_cloner *drvc; 693 694 while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) { 695 drvc = SLIST_FIRST(&V_tuntap_driver_cloners); 696 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link); 697 698 if_clone_detach(drvc->cloner); 699 free(drvc, M_TUN); 700 } 701 } 702 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, 703 vnet_tun_uninit, NULL); 704 705 static void 706 tun_uninit(const void *unused __unused) 707 { 708 struct tuntap_driver *drv; 709 struct tuntap_softc *tp; 710 int i; 711 712 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag); 713 EVENTHANDLER_DEREGISTER(dev_clone, clone_tag); 714 drain_dev_clone_events(); 715 716 mtx_lock(&tunmtx); 717 while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { 718 TAILQ_REMOVE(&tunhead, tp, tun_list); 719 mtx_unlock(&tunmtx); 720 tun_destroy(tp); 721 mtx_lock(&tunmtx); 722 } 723 mtx_unlock(&tunmtx); 724 for (i = 0; i < nitems(tuntap_drivers); ++i) { 725 drv = &tuntap_drivers[i]; 726 delete_unrhdr(drv->unrhdr); 727 clone_cleanup(&drv->clones); 728 } 729 mtx_destroy(&tunmtx); 730 } 731 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); 732 733 static struct tuntap_driver * 734 tuntap_driver_from_ifnet(const struct ifnet *ifp) 735 { 736 struct tuntap_driver *drv; 737 int i; 738 739 if (ifp == NULL) 740 return (NULL); 741 742 for (i = 0; i < nitems(tuntap_drivers); ++i) { 743 drv = &tuntap_drivers[i]; 744 if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0) 745 return (drv); 746 } 747 748 return (NULL); 749 } 750 751 static int 752 tuntapmodevent(module_t mod, int type, void *data) 753 { 754 struct tuntap_driver *drv; 755 int i; 756 757 switch (type) { 758 case MOD_LOAD: 759 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); 760 for (i = 0; i < nitems(tuntap_drivers); ++i) { 761 drv = &tuntap_drivers[i]; 762 clone_setup(&drv->clones); 763 drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); 764 } 765 arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, 766 tunrename, 0, 1000); 767 if (arrival_tag == NULL) 768 return (ENOMEM); 769 clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); 770 if (clone_tag == NULL) 771 return (ENOMEM); 772 break; 773 case MOD_UNLOAD: 774 /* See tun_uninit, so it's done after the vnet_sysuninit() */ 775 break; 776 default: 777 return EOPNOTSUPP; 778 } 779 return 0; 780 } 781 782 static moduledata_t tuntap_mod = { 783 "if_tuntap", 784 tuntapmodevent, 785 0 786 }; 787 788 /* We'll only ever have these two, so no need for a macro. */ 789 static moduledata_t tun_mod = { "if_tun", NULL, 0 }; 790 static moduledata_t tap_mod = { "if_tap", NULL, 0 }; 791 792 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 793 MODULE_VERSION(if_tuntap, 1); 794 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 795 MODULE_VERSION(if_tun, 1); 796 DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 797 MODULE_VERSION(if_tap, 1); 798 799 static int 800 tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr, 801 struct cdev **dev, const char *name) 802 { 803 struct make_dev_args args; 804 struct tuntap_softc *tp; 805 int error; 806 807 tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO); 808 mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF); 809 cv_init(&tp->tun_cv, "tun_condvar"); 810 tp->tun_flags = drv->ident_flags; 811 tp->tun_drv = drv; 812 813 make_dev_args_init(&args); 814 if (cr != NULL) 815 args.mda_flags = MAKEDEV_REF; 816 args.mda_devsw = &drv->cdevsw; 817 args.mda_cr = cr; 818 args.mda_uid = UID_UUCP; 819 args.mda_gid = GID_DIALER; 820 args.mda_mode = 0600; 821 args.mda_unit = unit; 822 args.mda_si_drv1 = tp; 823 error = make_dev_s(&args, dev, "%s", name); 824 if (error != 0) { 825 free(tp, M_TUN); 826 return (error); 827 } 828 829 KASSERT((*dev)->si_drv1 != NULL, 830 ("Failed to set si_drv1 at %s creation", name)); 831 tp->tun_dev = *dev; 832 knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx); 833 mtx_lock(&tunmtx); 834 TAILQ_INSERT_TAIL(&tunhead, tp, tun_list); 835 mtx_unlock(&tunmtx); 836 return (0); 837 } 838 839 static void 840 tunstart(struct ifnet *ifp) 841 { 842 struct tuntap_softc *tp = ifp->if_softc; 843 struct mbuf *m; 844 845 TUNDEBUG(ifp, "starting\n"); 846 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 847 IFQ_LOCK(&ifp->if_snd); 848 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 849 if (m == NULL) { 850 IFQ_UNLOCK(&ifp->if_snd); 851 return; 852 } 853 IFQ_UNLOCK(&ifp->if_snd); 854 } 855 856 TUN_LOCK(tp); 857 if (tp->tun_flags & TUN_RWAIT) { 858 tp->tun_flags &= ~TUN_RWAIT; 859 wakeup(tp); 860 } 861 selwakeuppri(&tp->tun_rsel, PZERO + 1); 862 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 863 if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { 864 TUN_UNLOCK(tp); 865 pgsigio(&tp->tun_sigio, SIGIO, 0); 866 } else 867 TUN_UNLOCK(tp); 868 } 869 870 /* 871 * tunstart_l2 872 * 873 * queue packets from higher level ready to put out 874 */ 875 static void 876 tunstart_l2(struct ifnet *ifp) 877 { 878 struct tuntap_softc *tp = ifp->if_softc; 879 880 TUNDEBUG(ifp, "starting\n"); 881 882 /* 883 * do not junk pending output if we are in VMnet mode. 884 * XXX: can this do any harm because of queue overflow? 885 */ 886 887 TUN_LOCK(tp); 888 if (((tp->tun_flags & TUN_VMNET) == 0) && 889 ((tp->tun_flags & TUN_READY) != TUN_READY)) { 890 struct mbuf *m; 891 892 /* Unlocked read. */ 893 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags); 894 895 for (;;) { 896 IF_DEQUEUE(&ifp->if_snd, m); 897 if (m != NULL) { 898 m_freem(m); 899 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 900 } else 901 break; 902 } 903 TUN_UNLOCK(tp); 904 905 return; 906 } 907 908 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 909 910 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 911 if (tp->tun_flags & TUN_RWAIT) { 912 tp->tun_flags &= ~TUN_RWAIT; 913 wakeup(tp); 914 } 915 916 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) { 917 TUN_UNLOCK(tp); 918 pgsigio(&tp->tun_sigio, SIGIO, 0); 919 TUN_LOCK(tp); 920 } 921 922 selwakeuppri(&tp->tun_rsel, PZERO+1); 923 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 924 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 925 } 926 927 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 928 TUN_UNLOCK(tp); 929 } /* tunstart_l2 */ 930 931 /* XXX: should return an error code so it can fail. */ 932 static void 933 tuncreate(struct cdev *dev) 934 { 935 struct tuntap_driver *drv; 936 struct tuntap_softc *tp; 937 struct ifnet *ifp; 938 struct ether_addr eaddr; 939 int iflags; 940 u_char type; 941 942 tp = dev->si_drv1; 943 KASSERT(tp != NULL, 944 ("si_drv1 should have been initialized at creation")); 945 946 drv = tp->tun_drv; 947 iflags = IFF_MULTICAST; 948 if ((tp->tun_flags & TUN_L2) != 0) { 949 type = IFT_ETHER; 950 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 951 } else { 952 type = IFT_PPP; 953 iflags |= IFF_POINTOPOINT; 954 } 955 ifp = tp->tun_ifp = if_alloc(type); 956 if (ifp == NULL) 957 panic("%s%d: failed to if_alloc() interface.\n", 958 drv->cdevsw.d_name, dev2unit(dev)); 959 ifp->if_softc = tp; 960 if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev)); 961 ifp->if_ioctl = tunifioctl; 962 ifp->if_flags = iflags; 963 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 964 ifp->if_capabilities |= IFCAP_LINKSTATE; 965 ifp->if_capenable |= IFCAP_LINKSTATE; 966 967 if ((tp->tun_flags & TUN_L2) != 0) { 968 ifp->if_init = tunifinit; 969 ifp->if_start = tunstart_l2; 970 971 ether_gen_addr(ifp, &eaddr); 972 ether_ifattach(ifp, eaddr.octet); 973 } else { 974 ifp->if_mtu = TUNMTU; 975 ifp->if_start = tunstart; 976 ifp->if_output = tunoutput; 977 978 ifp->if_snd.ifq_drv_maxlen = 0; 979 IFQ_SET_READY(&ifp->if_snd); 980 981 if_attach(ifp); 982 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 983 } 984 985 TUN_LOCK(tp); 986 tp->tun_flags |= TUN_INITED; 987 TUN_UNLOCK(tp); 988 989 TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", 990 ifp->if_xname, dev2unit(dev)); 991 } 992 993 static void 994 tunrename(void *arg __unused, struct ifnet *ifp) 995 { 996 struct tuntap_softc *tp; 997 int error; 998 999 if ((ifp->if_flags & IFF_RENAMING) == 0) 1000 return; 1001 1002 if (tuntap_driver_from_ifnet(ifp) == NULL) 1003 return; 1004 1005 /* 1006 * We need to grab the ioctl sx long enough to make sure the softc is 1007 * still there. If it is, we can safely try to busy the tun device. 1008 * The busy may fail if the device is currently dying, in which case 1009 * we do nothing. If it doesn't fail, the busy count stops the device 1010 * from dying until we've created the alias (that will then be 1011 * subsequently destroyed). 1012 */ 1013 sx_xlock(&tun_ioctl_sx); 1014 tp = ifp->if_softc; 1015 if (tp == NULL) { 1016 sx_xunlock(&tun_ioctl_sx); 1017 return; 1018 } 1019 error = tun_busy(tp); 1020 sx_xunlock(&tun_ioctl_sx); 1021 if (error != 0) 1022 return; 1023 if (tp->tun_alias != NULL) { 1024 destroy_dev(tp->tun_alias); 1025 tp->tun_alias = NULL; 1026 } 1027 1028 if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0) 1029 goto out; 1030 1031 /* 1032 * Failure's ok, aliases are created on a best effort basis. If a 1033 * tun user/consumer decides to rename the interface to conflict with 1034 * another device (non-ifnet) on the system, we will assume they know 1035 * what they are doing. make_dev_alias_p won't touch tun_alias on 1036 * failure, so we use it but ignore the return value. 1037 */ 1038 make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s", 1039 ifp->if_xname); 1040 out: 1041 tun_unbusy(tp); 1042 } 1043 1044 static int 1045 tunopen(struct cdev *dev, int flag, int mode, struct thread *td) 1046 { 1047 struct ifnet *ifp; 1048 struct tuntap_softc *tp; 1049 int error, tunflags; 1050 1051 tunflags = 0; 1052 CURVNET_SET(TD_TO_VNET(td)); 1053 error = tuntap_name2info(dev->si_name, NULL, &tunflags); 1054 if (error != 0) { 1055 CURVNET_RESTORE(); 1056 return (error); /* Shouldn't happen */ 1057 } 1058 1059 tp = dev->si_drv1; 1060 KASSERT(tp != NULL, 1061 ("si_drv1 should have been initialized at creation")); 1062 1063 TUN_LOCK(tp); 1064 if ((tp->tun_flags & TUN_INITED) == 0) { 1065 TUN_UNLOCK(tp); 1066 CURVNET_RESTORE(); 1067 return (ENXIO); 1068 } 1069 if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { 1070 TUN_UNLOCK(tp); 1071 CURVNET_RESTORE(); 1072 return (EBUSY); 1073 } 1074 1075 error = tun_busy_locked(tp); 1076 KASSERT(error == 0, ("Must be able to busy an unopen tunnel")); 1077 ifp = TUN2IFP(tp); 1078 1079 if ((tp->tun_flags & TUN_L2) != 0) { 1080 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet, 1081 sizeof(tp->tun_ether.octet)); 1082 1083 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1084 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1085 1086 if (tapuponopen) 1087 ifp->if_flags |= IFF_UP; 1088 } 1089 1090 tp->tun_pid = td->td_proc->p_pid; 1091 tp->tun_flags |= TUN_OPEN; 1092 1093 if_link_state_change(ifp, LINK_STATE_UP); 1094 TUNDEBUG(ifp, "open\n"); 1095 TUN_UNLOCK(tp); 1096 1097 /* 1098 * This can fail with either ENOENT or EBUSY. This is in the middle of 1099 * d_open, so ENOENT should not be possible. EBUSY is possible, but 1100 * the only cdevpriv dtor being set will be tundtor and the softc being 1101 * passed is constant for a given cdev. We ignore the possible error 1102 * because of this as either "unlikely" or "not actually a problem." 1103 */ 1104 (void)devfs_set_cdevpriv(tp, tundtor); 1105 CURVNET_RESTORE(); 1106 return (0); 1107 } 1108 1109 /* 1110 * tundtor - tear down the device - mark i/f down & delete 1111 * routing info 1112 */ 1113 static void 1114 tundtor(void *data) 1115 { 1116 struct proc *p; 1117 struct tuntap_softc *tp; 1118 struct ifnet *ifp; 1119 bool l2tun; 1120 1121 tp = data; 1122 p = curproc; 1123 ifp = TUN2IFP(tp); 1124 1125 TUN_LOCK(tp); 1126 1127 /* 1128 * Realistically, we can't be obstinate here. This only means that the 1129 * tuntap device was closed out of order, and the last closer wasn't the 1130 * controller. These are still good to know about, though, as software 1131 * should avoid multiple processes with a tuntap device open and 1132 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in 1133 * parent). 1134 */ 1135 if (p->p_pid != tp->tun_pid) { 1136 log(LOG_INFO, 1137 "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n", 1138 p->p_pid, p->p_comm, tp->tun_dev->si_name); 1139 } 1140 1141 /* 1142 * junk all pending output 1143 */ 1144 CURVNET_SET(ifp->if_vnet); 1145 1146 l2tun = false; 1147 if ((tp->tun_flags & TUN_L2) != 0) { 1148 l2tun = true; 1149 IF_DRAIN(&ifp->if_snd); 1150 } else { 1151 IFQ_PURGE(&ifp->if_snd); 1152 } 1153 1154 /* For vmnet, we won't do most of the address/route bits */ 1155 if ((tp->tun_flags & TUN_VMNET) != 0 || 1156 (l2tun && (ifp->if_flags & IFF_LINK0) != 0)) 1157 goto out; 1158 1159 if (ifp->if_flags & IFF_UP) { 1160 TUN_UNLOCK(tp); 1161 if_down(ifp); 1162 TUN_LOCK(tp); 1163 } 1164 1165 /* Delete all addresses and routes which reference this interface. */ 1166 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1167 struct ifaddr *ifa; 1168 1169 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1170 TUN_UNLOCK(tp); 1171 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1172 /* deal w/IPv4 PtP destination; unlocked read */ 1173 if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) { 1174 rtinit(ifa, (int)RTM_DELETE, 1175 tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 1176 } else { 1177 rtinit(ifa, (int)RTM_DELETE, 0); 1178 } 1179 } 1180 if_purgeaddrs(ifp); 1181 TUN_LOCK(tp); 1182 } 1183 1184 out: 1185 if_link_state_change(ifp, LINK_STATE_DOWN); 1186 CURVNET_RESTORE(); 1187 1188 funsetown(&tp->tun_sigio); 1189 selwakeuppri(&tp->tun_rsel, PZERO + 1); 1190 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 1191 TUNDEBUG (ifp, "closed\n"); 1192 tp->tun_flags &= ~TUN_OPEN; 1193 tp->tun_pid = 0; 1194 tun_vnethdr_set(ifp, 0); 1195 1196 tun_unbusy_locked(tp); 1197 TUN_UNLOCK(tp); 1198 } 1199 1200 static void 1201 tuninit(struct ifnet *ifp) 1202 { 1203 struct tuntap_softc *tp = ifp->if_softc; 1204 #ifdef INET 1205 struct epoch_tracker et; 1206 struct ifaddr *ifa; 1207 #endif 1208 1209 TUNDEBUG(ifp, "tuninit\n"); 1210 1211 TUN_LOCK(tp); 1212 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1213 if ((tp->tun_flags & TUN_L2) == 0) { 1214 ifp->if_flags |= IFF_UP; 1215 getmicrotime(&ifp->if_lastchange); 1216 #ifdef INET 1217 NET_EPOCH_ENTER(et); 1218 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1219 if (ifa->ifa_addr->sa_family == AF_INET) { 1220 struct sockaddr_in *si; 1221 1222 si = (struct sockaddr_in *)ifa->ifa_dstaddr; 1223 if (si && si->sin_addr.s_addr) { 1224 tp->tun_flags |= TUN_DSTADDR; 1225 break; 1226 } 1227 } 1228 } 1229 NET_EPOCH_EXIT(et); 1230 #endif 1231 TUN_UNLOCK(tp); 1232 } else { 1233 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1234 TUN_UNLOCK(tp); 1235 /* attempt to start output */ 1236 tunstart_l2(ifp); 1237 } 1238 1239 } 1240 1241 /* 1242 * Used only for l2 tunnel. 1243 */ 1244 static void 1245 tunifinit(void *xtp) 1246 { 1247 struct tuntap_softc *tp; 1248 1249 tp = (struct tuntap_softc *)xtp; 1250 tuninit(tp->tun_ifp); 1251 } 1252 1253 /* 1254 * To be called under TUN_LOCK. Update ifp->if_hwassist according to the 1255 * current value of ifp->if_capenable. 1256 */ 1257 static void 1258 tun_caps_changed(struct ifnet *ifp) 1259 { 1260 uint64_t hwassist = 0; 1261 1262 TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc); 1263 if (ifp->if_capenable & IFCAP_TXCSUM) 1264 hwassist |= CSUM_TCP | CSUM_UDP; 1265 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1266 hwassist |= CSUM_TCP_IPV6 1267 | CSUM_UDP_IPV6; 1268 if (ifp->if_capenable & IFCAP_TSO4) 1269 hwassist |= CSUM_IP_TSO; 1270 if (ifp->if_capenable & IFCAP_TSO6) 1271 hwassist |= CSUM_IP6_TSO; 1272 ifp->if_hwassist = hwassist; 1273 } 1274 1275 /* 1276 * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust 1277 * if_capabilities and if_capenable as needed. 1278 */ 1279 static void 1280 tun_vnethdr_set(struct ifnet *ifp, int vhdrlen) 1281 { 1282 struct tuntap_softc *tp = ifp->if_softc; 1283 1284 TUN_LOCK_ASSERT(tp); 1285 1286 if (tp->tun_vhdrlen == vhdrlen) 1287 return; 1288 1289 /* 1290 * Update if_capabilities to reflect the 1291 * functionalities offered by the virtio-net 1292 * header. 1293 */ 1294 if (vhdrlen != 0) 1295 ifp->if_capabilities |= 1296 TAP_VNET_HDR_CAPS; 1297 else 1298 ifp->if_capabilities &= 1299 ~TAP_VNET_HDR_CAPS; 1300 /* 1301 * Disable any capabilities that we don't 1302 * support anymore. 1303 */ 1304 ifp->if_capenable &= ifp->if_capabilities; 1305 tun_caps_changed(ifp); 1306 tp->tun_vhdrlen = vhdrlen; 1307 1308 TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n", 1309 vhdrlen, ifp->if_capabilities); 1310 } 1311 1312 /* 1313 * Process an ioctl request. 1314 */ 1315 static int 1316 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1317 { 1318 struct ifreq *ifr = (struct ifreq *)data; 1319 struct tuntap_softc *tp; 1320 struct ifstat *ifs; 1321 struct ifmediareq *ifmr; 1322 int dummy, error = 0; 1323 bool l2tun; 1324 1325 ifmr = NULL; 1326 sx_xlock(&tun_ioctl_sx); 1327 tp = ifp->if_softc; 1328 if (tp == NULL) { 1329 error = ENXIO; 1330 goto bad; 1331 } 1332 l2tun = (tp->tun_flags & TUN_L2) != 0; 1333 switch(cmd) { 1334 case SIOCGIFSTATUS: 1335 ifs = (struct ifstat *)data; 1336 TUN_LOCK(tp); 1337 if (tp->tun_pid) 1338 snprintf(ifs->ascii, sizeof(ifs->ascii), 1339 "\tOpened by PID %d\n", tp->tun_pid); 1340 else 1341 ifs->ascii[0] = '\0'; 1342 TUN_UNLOCK(tp); 1343 break; 1344 case SIOCSIFADDR: 1345 if (l2tun) 1346 error = ether_ioctl(ifp, cmd, data); 1347 else 1348 tuninit(ifp); 1349 if (error == 0) 1350 TUNDEBUG(ifp, "address set\n"); 1351 break; 1352 case SIOCSIFMTU: 1353 ifp->if_mtu = ifr->ifr_mtu; 1354 TUNDEBUG(ifp, "mtu set\n"); 1355 break; 1356 case SIOCSIFFLAGS: 1357 case SIOCADDMULTI: 1358 case SIOCDELMULTI: 1359 break; 1360 case SIOCGIFMEDIA: 1361 if (!l2tun) { 1362 error = EINVAL; 1363 break; 1364 } 1365 1366 ifmr = (struct ifmediareq *)data; 1367 dummy = ifmr->ifm_count; 1368 ifmr->ifm_count = 1; 1369 ifmr->ifm_status = IFM_AVALID; 1370 ifmr->ifm_active = IFM_ETHER; 1371 if (tp->tun_flags & TUN_OPEN) 1372 ifmr->ifm_status |= IFM_ACTIVE; 1373 ifmr->ifm_current = ifmr->ifm_active; 1374 if (dummy >= 1) { 1375 int media = IFM_ETHER; 1376 error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); 1377 } 1378 break; 1379 case SIOCSIFCAP: 1380 TUN_LOCK(tp); 1381 ifp->if_capenable = ifr->ifr_reqcap; 1382 tun_caps_changed(ifp); 1383 TUN_UNLOCK(tp); 1384 VLAN_CAPABILITIES(ifp); 1385 break; 1386 default: 1387 if (l2tun) { 1388 error = ether_ioctl(ifp, cmd, data); 1389 } else { 1390 error = EINVAL; 1391 } 1392 } 1393 bad: 1394 sx_xunlock(&tun_ioctl_sx); 1395 return (error); 1396 } 1397 1398 /* 1399 * tunoutput - queue packets from higher level ready to put out. 1400 */ 1401 static int 1402 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, 1403 struct route *ro) 1404 { 1405 struct tuntap_softc *tp = ifp->if_softc; 1406 u_short cached_tun_flags; 1407 int error; 1408 u_int32_t af; 1409 1410 TUNDEBUG (ifp, "tunoutput\n"); 1411 1412 #ifdef MAC 1413 error = mac_ifnet_check_transmit(ifp, m0); 1414 if (error) { 1415 m_freem(m0); 1416 return (error); 1417 } 1418 #endif 1419 1420 /* Could be unlocked read? */ 1421 TUN_LOCK(tp); 1422 cached_tun_flags = tp->tun_flags; 1423 TUN_UNLOCK(tp); 1424 if ((cached_tun_flags & TUN_READY) != TUN_READY) { 1425 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1426 m_freem (m0); 1427 return (EHOSTDOWN); 1428 } 1429 1430 if ((ifp->if_flags & IFF_UP) != IFF_UP) { 1431 m_freem (m0); 1432 return (EHOSTDOWN); 1433 } 1434 1435 /* BPF writes need to be handled specially. */ 1436 if (dst->sa_family == AF_UNSPEC) 1437 bcopy(dst->sa_data, &af, sizeof(af)); 1438 else 1439 af = dst->sa_family; 1440 1441 if (bpf_peers_present(ifp->if_bpf)) 1442 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); 1443 1444 /* prepend sockaddr? this may abort if the mbuf allocation fails */ 1445 if (cached_tun_flags & TUN_LMODE) { 1446 /* allocate space for sockaddr */ 1447 M_PREPEND(m0, dst->sa_len, M_NOWAIT); 1448 1449 /* if allocation failed drop packet */ 1450 if (m0 == NULL) { 1451 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1452 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1453 return (ENOBUFS); 1454 } else { 1455 bcopy(dst, m0->m_data, dst->sa_len); 1456 } 1457 } 1458 1459 if (cached_tun_flags & TUN_IFHEAD) { 1460 /* Prepend the address family */ 1461 M_PREPEND(m0, 4, M_NOWAIT); 1462 1463 /* if allocation failed drop packet */ 1464 if (m0 == NULL) { 1465 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1466 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1467 return (ENOBUFS); 1468 } else 1469 *(u_int32_t *)m0->m_data = htonl(af); 1470 } else { 1471 #ifdef INET 1472 if (af != AF_INET) 1473 #endif 1474 { 1475 m_freem(m0); 1476 return (EAFNOSUPPORT); 1477 } 1478 } 1479 1480 error = (ifp->if_transmit)(ifp, m0); 1481 if (error) 1482 return (ENOBUFS); 1483 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1484 return (0); 1485 } 1486 1487 /* 1488 * the cdevsw interface is now pretty minimal. 1489 */ 1490 static int 1491 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 1492 struct thread *td) 1493 { 1494 struct ifreq ifr, *ifrp; 1495 struct tuntap_softc *tp = dev->si_drv1; 1496 struct ifnet *ifp = TUN2IFP(tp); 1497 struct tuninfo *tunp; 1498 int error, iflags, ival; 1499 bool l2tun; 1500 1501 l2tun = (tp->tun_flags & TUN_L2) != 0; 1502 if (l2tun) { 1503 /* tap specific ioctls */ 1504 switch(cmd) { 1505 /* VMware/VMnet port ioctl's */ 1506 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 1507 defined(COMPAT_FREEBSD4) 1508 case _IO('V', 0): 1509 ival = IOCPARM_IVAL(data); 1510 data = (caddr_t)&ival; 1511 /* FALLTHROUGH */ 1512 #endif 1513 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 1514 iflags = *(int *)data; 1515 iflags &= TUN_VMIO_FLAG_MASK; 1516 iflags &= ~IFF_CANTCHANGE; 1517 iflags |= IFF_UP; 1518 1519 TUN_LOCK(tp); 1520 ifp->if_flags = iflags | 1521 (ifp->if_flags & IFF_CANTCHANGE); 1522 TUN_UNLOCK(tp); 1523 1524 return (0); 1525 case SIOCGIFADDR: /* get MAC address of the remote side */ 1526 TUN_LOCK(tp); 1527 bcopy(&tp->tun_ether.octet, data, 1528 sizeof(tp->tun_ether.octet)); 1529 TUN_UNLOCK(tp); 1530 1531 return (0); 1532 case SIOCSIFADDR: /* set MAC address of the remote side */ 1533 TUN_LOCK(tp); 1534 bcopy(data, &tp->tun_ether.octet, 1535 sizeof(tp->tun_ether.octet)); 1536 TUN_UNLOCK(tp); 1537 1538 return (0); 1539 case TAPSVNETHDR: 1540 ival = *(int *)data; 1541 if (ival != 0 && 1542 ival != sizeof(struct virtio_net_hdr) && 1543 ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) { 1544 return (EINVAL); 1545 } 1546 TUN_LOCK(tp); 1547 tun_vnethdr_set(ifp, ival); 1548 TUN_UNLOCK(tp); 1549 1550 return (0); 1551 case TAPGVNETHDR: 1552 TUN_LOCK(tp); 1553 *(int *)data = tp->tun_vhdrlen; 1554 TUN_UNLOCK(tp); 1555 1556 return (0); 1557 } 1558 1559 /* Fall through to the common ioctls if unhandled */ 1560 } else { 1561 switch (cmd) { 1562 case TUNSLMODE: 1563 TUN_LOCK(tp); 1564 if (*(int *)data) { 1565 tp->tun_flags |= TUN_LMODE; 1566 tp->tun_flags &= ~TUN_IFHEAD; 1567 } else 1568 tp->tun_flags &= ~TUN_LMODE; 1569 TUN_UNLOCK(tp); 1570 1571 return (0); 1572 case TUNSIFHEAD: 1573 TUN_LOCK(tp); 1574 if (*(int *)data) { 1575 tp->tun_flags |= TUN_IFHEAD; 1576 tp->tun_flags &= ~TUN_LMODE; 1577 } else 1578 tp->tun_flags &= ~TUN_IFHEAD; 1579 TUN_UNLOCK(tp); 1580 1581 return (0); 1582 case TUNGIFHEAD: 1583 TUN_LOCK(tp); 1584 *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; 1585 TUN_UNLOCK(tp); 1586 1587 return (0); 1588 case TUNSIFMODE: 1589 /* deny this if UP */ 1590 if (TUN2IFP(tp)->if_flags & IFF_UP) 1591 return (EBUSY); 1592 1593 switch (*(int *)data & ~IFF_MULTICAST) { 1594 case IFF_POINTOPOINT: 1595 case IFF_BROADCAST: 1596 TUN_LOCK(tp); 1597 TUN2IFP(tp)->if_flags &= 1598 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); 1599 TUN2IFP(tp)->if_flags |= *(int *)data; 1600 TUN_UNLOCK(tp); 1601 1602 break; 1603 default: 1604 return (EINVAL); 1605 } 1606 1607 return (0); 1608 case TUNSIFPID: 1609 TUN_LOCK(tp); 1610 tp->tun_pid = curthread->td_proc->p_pid; 1611 TUN_UNLOCK(tp); 1612 1613 return (0); 1614 } 1615 /* Fall through to the common ioctls if unhandled */ 1616 } 1617 1618 switch (cmd) { 1619 case TUNGIFNAME: 1620 ifrp = (struct ifreq *)data; 1621 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ); 1622 1623 return (0); 1624 case TUNSIFINFO: 1625 tunp = (struct tuninfo *)data; 1626 if (TUN2IFP(tp)->if_type != tunp->type) 1627 return (EPROTOTYPE); 1628 TUN_LOCK(tp); 1629 if (TUN2IFP(tp)->if_mtu != tunp->mtu) { 1630 strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); 1631 ifr.ifr_mtu = tunp->mtu; 1632 CURVNET_SET(TUN2IFP(tp)->if_vnet); 1633 error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), 1634 (caddr_t)&ifr, td); 1635 CURVNET_RESTORE(); 1636 if (error) { 1637 TUN_UNLOCK(tp); 1638 return (error); 1639 } 1640 } 1641 TUN2IFP(tp)->if_baudrate = tunp->baudrate; 1642 TUN_UNLOCK(tp); 1643 break; 1644 case TUNGIFINFO: 1645 tunp = (struct tuninfo *)data; 1646 TUN_LOCK(tp); 1647 tunp->mtu = TUN2IFP(tp)->if_mtu; 1648 tunp->type = TUN2IFP(tp)->if_type; 1649 tunp->baudrate = TUN2IFP(tp)->if_baudrate; 1650 TUN_UNLOCK(tp); 1651 break; 1652 case TUNSDEBUG: 1653 tundebug = *(int *)data; 1654 break; 1655 case TUNGDEBUG: 1656 *(int *)data = tundebug; 1657 break; 1658 case FIONBIO: 1659 break; 1660 case FIOASYNC: 1661 TUN_LOCK(tp); 1662 if (*(int *)data) 1663 tp->tun_flags |= TUN_ASYNC; 1664 else 1665 tp->tun_flags &= ~TUN_ASYNC; 1666 TUN_UNLOCK(tp); 1667 break; 1668 case FIONREAD: 1669 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { 1670 struct mbuf *mb; 1671 IFQ_LOCK(&TUN2IFP(tp)->if_snd); 1672 IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); 1673 for (*(int *)data = 0; mb != NULL; mb = mb->m_next) 1674 *(int *)data += mb->m_len; 1675 IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); 1676 } else 1677 *(int *)data = 0; 1678 break; 1679 case FIOSETOWN: 1680 return (fsetown(*(int *)data, &tp->tun_sigio)); 1681 1682 case FIOGETOWN: 1683 *(int *)data = fgetown(&tp->tun_sigio); 1684 return (0); 1685 1686 /* This is deprecated, FIOSETOWN should be used instead. */ 1687 case TIOCSPGRP: 1688 return (fsetown(-(*(int *)data), &tp->tun_sigio)); 1689 1690 /* This is deprecated, FIOGETOWN should be used instead. */ 1691 case TIOCGPGRP: 1692 *(int *)data = -fgetown(&tp->tun_sigio); 1693 return (0); 1694 1695 default: 1696 return (ENOTTY); 1697 } 1698 return (0); 1699 } 1700 1701 /* 1702 * The cdevsw read interface - reads a packet at a time, or at 1703 * least as much of a packet as can be read. 1704 */ 1705 static int 1706 tunread(struct cdev *dev, struct uio *uio, int flag) 1707 { 1708 struct tuntap_softc *tp = dev->si_drv1; 1709 struct ifnet *ifp = TUN2IFP(tp); 1710 struct mbuf *m; 1711 size_t len; 1712 int error = 0; 1713 1714 TUNDEBUG (ifp, "read\n"); 1715 TUN_LOCK(tp); 1716 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 1717 TUN_UNLOCK(tp); 1718 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1719 return (EHOSTDOWN); 1720 } 1721 1722 tp->tun_flags &= ~TUN_RWAIT; 1723 1724 for (;;) { 1725 IFQ_DEQUEUE(&ifp->if_snd, m); 1726 if (m != NULL) 1727 break; 1728 if (flag & O_NONBLOCK) { 1729 TUN_UNLOCK(tp); 1730 return (EWOULDBLOCK); 1731 } 1732 tp->tun_flags |= TUN_RWAIT; 1733 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), 1734 "tunread", 0); 1735 if (error != 0) { 1736 TUN_UNLOCK(tp); 1737 return (error); 1738 } 1739 } 1740 TUN_UNLOCK(tp); 1741 1742 if ((tp->tun_flags & TUN_L2) != 0) 1743 BPF_MTAP(ifp, m); 1744 1745 len = min(tp->tun_vhdrlen, uio->uio_resid); 1746 if (len > 0) { 1747 struct virtio_net_hdr_mrg_rxbuf vhdr; 1748 1749 bzero(&vhdr, sizeof(vhdr)); 1750 if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) { 1751 m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr); 1752 } 1753 1754 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " 1755 "gs %u, cs %u, co %u\n", vhdr.hdr.flags, 1756 vhdr.hdr.gso_type, vhdr.hdr.hdr_len, 1757 vhdr.hdr.gso_size, vhdr.hdr.csum_start, 1758 vhdr.hdr.csum_offset); 1759 error = uiomove(&vhdr, len, uio); 1760 } 1761 1762 while (m && uio->uio_resid > 0 && error == 0) { 1763 len = min(uio->uio_resid, m->m_len); 1764 if (len != 0) 1765 error = uiomove(mtod(m, void *), len, uio); 1766 m = m_free(m); 1767 } 1768 1769 if (m) { 1770 TUNDEBUG(ifp, "Dropping mbuf\n"); 1771 m_freem(m); 1772 } 1773 return (error); 1774 } 1775 1776 static int 1777 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m, 1778 struct virtio_net_hdr_mrg_rxbuf *vhdr) 1779 { 1780 struct epoch_tracker et; 1781 struct ether_header *eh; 1782 struct ifnet *ifp; 1783 1784 ifp = TUN2IFP(tp); 1785 1786 /* 1787 * Only pass a unicast frame to ether_input(), if it would 1788 * actually have been received by non-virtual hardware. 1789 */ 1790 if (m->m_len < sizeof(struct ether_header)) { 1791 m_freem(m); 1792 return (0); 1793 } 1794 1795 eh = mtod(m, struct ether_header *); 1796 1797 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 1798 !ETHER_IS_MULTICAST(eh->ether_dhost) && 1799 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 1800 m_freem(m); 1801 return (0); 1802 } 1803 1804 if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) { 1805 m_freem(m); 1806 return (0); 1807 } 1808 1809 /* Pass packet up to parent. */ 1810 CURVNET_SET(ifp->if_vnet); 1811 NET_EPOCH_ENTER(et); 1812 (*ifp->if_input)(ifp, m); 1813 NET_EPOCH_EXIT(et); 1814 CURVNET_RESTORE(); 1815 /* ibytes are counted in parent */ 1816 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1817 return (0); 1818 } 1819 1820 static int 1821 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m) 1822 { 1823 struct epoch_tracker et; 1824 struct ifnet *ifp; 1825 int family, isr; 1826 1827 ifp = TUN2IFP(tp); 1828 /* Could be unlocked read? */ 1829 TUN_LOCK(tp); 1830 if (tp->tun_flags & TUN_IFHEAD) { 1831 TUN_UNLOCK(tp); 1832 if (m->m_len < sizeof(family) && 1833 (m = m_pullup(m, sizeof(family))) == NULL) 1834 return (ENOBUFS); 1835 family = ntohl(*mtod(m, u_int32_t *)); 1836 m_adj(m, sizeof(family)); 1837 } else { 1838 TUN_UNLOCK(tp); 1839 family = AF_INET; 1840 } 1841 1842 BPF_MTAP2(ifp, &family, sizeof(family), m); 1843 1844 switch (family) { 1845 #ifdef INET 1846 case AF_INET: 1847 isr = NETISR_IP; 1848 break; 1849 #endif 1850 #ifdef INET6 1851 case AF_INET6: 1852 isr = NETISR_IPV6; 1853 break; 1854 #endif 1855 default: 1856 m_freem(m); 1857 return (EAFNOSUPPORT); 1858 } 1859 random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); 1860 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1861 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1862 CURVNET_SET(ifp->if_vnet); 1863 M_SETFIB(m, ifp->if_fib); 1864 NET_EPOCH_ENTER(et); 1865 netisr_dispatch(isr, m); 1866 NET_EPOCH_EXIT(et); 1867 CURVNET_RESTORE(); 1868 return (0); 1869 } 1870 1871 /* 1872 * the cdevsw write interface - an atomic write is a packet - or else! 1873 */ 1874 static int 1875 tunwrite(struct cdev *dev, struct uio *uio, int flag) 1876 { 1877 struct virtio_net_hdr_mrg_rxbuf vhdr; 1878 struct tuntap_softc *tp; 1879 struct ifnet *ifp; 1880 struct mbuf *m; 1881 uint32_t mru; 1882 int align, vhdrlen, error; 1883 bool l2tun; 1884 1885 tp = dev->si_drv1; 1886 ifp = TUN2IFP(tp); 1887 TUNDEBUG(ifp, "tunwrite\n"); 1888 if ((ifp->if_flags & IFF_UP) != IFF_UP) 1889 /* ignore silently */ 1890 return (0); 1891 1892 if (uio->uio_resid == 0) 1893 return (0); 1894 1895 l2tun = (tp->tun_flags & TUN_L2) != 0; 1896 mru = l2tun ? TAPMRU : TUNMRU; 1897 vhdrlen = tp->tun_vhdrlen; 1898 align = 0; 1899 if (l2tun) { 1900 align = ETHER_ALIGN; 1901 mru += vhdrlen; 1902 } else if ((tp->tun_flags & TUN_IFHEAD) != 0) 1903 mru += sizeof(uint32_t); /* family */ 1904 if (uio->uio_resid < 0 || uio->uio_resid > mru) { 1905 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); 1906 return (EIO); 1907 } 1908 1909 if (vhdrlen > 0) { 1910 error = uiomove(&vhdr, vhdrlen, uio); 1911 if (error != 0) 1912 return (error); 1913 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " 1914 "gs %u, cs %u, co %u\n", vhdr.hdr.flags, 1915 vhdr.hdr.gso_type, vhdr.hdr.hdr_len, 1916 vhdr.hdr.gso_size, vhdr.hdr.csum_start, 1917 vhdr.hdr.csum_offset); 1918 } 1919 1920 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) { 1921 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1922 return (ENOBUFS); 1923 } 1924 1925 m->m_pkthdr.rcvif = ifp; 1926 #ifdef MAC 1927 mac_ifnet_create_mbuf(ifp, m); 1928 #endif 1929 1930 if (l2tun) 1931 return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL)); 1932 1933 return (tunwrite_l3(tp, m)); 1934 } 1935 1936 /* 1937 * tunpoll - the poll interface, this is only useful on reads 1938 * really. The write detect always returns true, write never blocks 1939 * anyway, it either accepts the packet or drops it. 1940 */ 1941 static int 1942 tunpoll(struct cdev *dev, int events, struct thread *td) 1943 { 1944 struct tuntap_softc *tp = dev->si_drv1; 1945 struct ifnet *ifp = TUN2IFP(tp); 1946 int revents = 0; 1947 1948 TUNDEBUG(ifp, "tunpoll\n"); 1949 1950 if (events & (POLLIN | POLLRDNORM)) { 1951 IFQ_LOCK(&ifp->if_snd); 1952 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1953 TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); 1954 revents |= events & (POLLIN | POLLRDNORM); 1955 } else { 1956 TUNDEBUG(ifp, "tunpoll waiting\n"); 1957 selrecord(td, &tp->tun_rsel); 1958 } 1959 IFQ_UNLOCK(&ifp->if_snd); 1960 } 1961 revents |= events & (POLLOUT | POLLWRNORM); 1962 1963 return (revents); 1964 } 1965 1966 /* 1967 * tunkqfilter - support for the kevent() system call. 1968 */ 1969 static int 1970 tunkqfilter(struct cdev *dev, struct knote *kn) 1971 { 1972 struct tuntap_softc *tp = dev->si_drv1; 1973 struct ifnet *ifp = TUN2IFP(tp); 1974 1975 switch(kn->kn_filter) { 1976 case EVFILT_READ: 1977 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", 1978 ifp->if_xname, dev2unit(dev)); 1979 kn->kn_fop = &tun_read_filterops; 1980 break; 1981 1982 case EVFILT_WRITE: 1983 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1984 ifp->if_xname, dev2unit(dev)); 1985 kn->kn_fop = &tun_write_filterops; 1986 break; 1987 1988 default: 1989 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", 1990 ifp->if_xname, dev2unit(dev)); 1991 return(EINVAL); 1992 } 1993 1994 kn->kn_hook = tp; 1995 knlist_add(&tp->tun_rsel.si_note, kn, 0); 1996 1997 return (0); 1998 } 1999 2000 /* 2001 * Return true of there is data in the interface queue. 2002 */ 2003 static int 2004 tunkqread(struct knote *kn, long hint) 2005 { 2006 int ret; 2007 struct tuntap_softc *tp = kn->kn_hook; 2008 struct cdev *dev = tp->tun_dev; 2009 struct ifnet *ifp = TUN2IFP(tp); 2010 2011 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 2012 TUNDEBUG(ifp, 2013 "%s have data in the queue. Len = %d, minor = %#x\n", 2014 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 2015 ret = 1; 2016 } else { 2017 TUNDEBUG(ifp, 2018 "%s waiting for data, minor = %#x\n", ifp->if_xname, 2019 dev2unit(dev)); 2020 ret = 0; 2021 } 2022 2023 return (ret); 2024 } 2025 2026 /* 2027 * Always can write, always return MTU in kn->data. 2028 */ 2029 static int 2030 tunkqwrite(struct knote *kn, long hint) 2031 { 2032 struct tuntap_softc *tp = kn->kn_hook; 2033 struct ifnet *ifp = TUN2IFP(tp); 2034 2035 kn->kn_data = ifp->if_mtu; 2036 2037 return (1); 2038 } 2039 2040 static void 2041 tunkqdetach(struct knote *kn) 2042 { 2043 struct tuntap_softc *tp = kn->kn_hook; 2044 2045 knlist_remove(&tp->tun_rsel.si_note, kn, 0); 2046 } 2047