1 /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2 /*- 3 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4 * 5 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com> 6 * All rights reserved. 7 * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * BASED ON: 32 * ------------------------------------------------------------------------- 33 * 34 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 35 * Nottingham University 1987. 36 * 37 * This source may be freely distributed, however I would be interested 38 * in any changes that are made. 39 * 40 * This driver takes packets off the IP i/f and hands them up to a 41 * user process to have its wicked way with. This driver has it's 42 * roots in a similar driver written by Phil Cockcroft (formerly) at 43 * UCL. This driver is based much more on read/write/poll mode of 44 * operation though. 45 * 46 * $FreeBSD$ 47 */ 48 49 #include "opt_inet.h" 50 #include "opt_inet6.h" 51 52 #include <sys/param.h> 53 #include <sys/lock.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/systm.h> 57 #include <sys/jail.h> 58 #include <sys/mbuf.h> 59 #include <sys/module.h> 60 #include <sys/socket.h> 61 #include <sys/eventhandler.h> 62 #include <sys/fcntl.h> 63 #include <sys/filio.h> 64 #include <sys/sockio.h> 65 #include <sys/sx.h> 66 #include <sys/syslog.h> 67 #include <sys/ttycom.h> 68 #include <sys/poll.h> 69 #include <sys/selinfo.h> 70 #include <sys/signalvar.h> 71 #include <sys/filedesc.h> 72 #include <sys/kernel.h> 73 #include <sys/sysctl.h> 74 #include <sys/conf.h> 75 #include <sys/uio.h> 76 #include <sys/malloc.h> 77 #include <sys/random.h> 78 #include <sys/ctype.h> 79 80 #include <net/ethernet.h> 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/if_clone.h> 84 #include <net/if_dl.h> 85 #include <net/if_media.h> 86 #include <net/if_types.h> 87 #include <net/if_vlan_var.h> 88 #include <net/netisr.h> 89 #include <net/route.h> 90 #include <net/vnet.h> 91 #include <netinet/in.h> 92 #ifdef INET 93 #include <netinet/ip.h> 94 #endif 95 #ifdef INET6 96 #include <netinet/ip6.h> 97 #include <netinet6/ip6_var.h> 98 #endif 99 #include <netinet/udp.h> 100 #include <netinet/tcp.h> 101 #include <net/bpf.h> 102 #include <net/if_tap.h> 103 #include <net/if_tun.h> 104 105 #include <dev/virtio/network/virtio_net.h> 106 107 #include <sys/queue.h> 108 #include <sys/condvar.h> 109 #include <security/mac/mac_framework.h> 110 111 struct tuntap_driver; 112 113 /* 114 * tun_list is protected by global tunmtx. Other mutable fields are 115 * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is 116 * static for the duration of a tunnel interface. 117 */ 118 struct tuntap_softc { 119 TAILQ_ENTRY(tuntap_softc) tun_list; 120 struct cdev *tun_alias; 121 struct cdev *tun_dev; 122 u_short tun_flags; /* misc flags */ 123 #define TUN_OPEN 0x0001 124 #define TUN_INITED 0x0002 125 #define TUN_UNUSED1 0x0008 126 #define TUN_DSTADDR 0x0010 127 #define TUN_LMODE 0x0020 128 #define TUN_RWAIT 0x0040 129 #define TUN_ASYNC 0x0080 130 #define TUN_IFHEAD 0x0100 131 #define TUN_DYING 0x0200 132 #define TUN_L2 0x0400 133 #define TUN_VMNET 0x0800 134 135 #define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) 136 #define TUN_READY (TUN_OPEN | TUN_INITED) 137 138 pid_t tun_pid; /* owning pid */ 139 struct ifnet *tun_ifp; /* the interface */ 140 struct sigio *tun_sigio; /* async I/O info */ 141 struct tuntap_driver *tun_drv; /* appropriate driver */ 142 struct selinfo tun_rsel; /* read select */ 143 struct mtx tun_mtx; /* softc field mutex */ 144 struct cv tun_cv; /* for ref'd dev destroy */ 145 struct ether_addr tun_ether; /* remote address */ 146 int tun_busy; /* busy count */ 147 int tun_vhdrlen; /* virtio-net header length */ 148 }; 149 #define TUN2IFP(sc) ((sc)->tun_ifp) 150 151 #define TUNDEBUG if (tundebug) if_printf 152 153 #define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx) 154 #define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx) 155 #define TUN_LOCK_ASSERT(tp) mtx_assert(&(tp)->tun_mtx, MA_OWNED); 156 157 #define TUN_VMIO_FLAG_MASK 0x0fff 158 159 /* 160 * Interface capabilities of a tap device that supports the virtio-net 161 * header. 162 */ 163 #define TAP_VNET_HDR_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 \ 164 | IFCAP_VLAN_HWCSUM \ 165 | IFCAP_TSO | IFCAP_LRO \ 166 | IFCAP_VLAN_HWTSO) 167 168 #define TAP_ALL_OFFLOAD (CSUM_TSO | CSUM_TCP | CSUM_UDP |\ 169 CSUM_TCP_IPV6 | CSUM_UDP_IPV6) 170 171 172 /* 173 * All mutable global variables in if_tun are locked using tunmtx, with 174 * the exception of tundebug, which is used unlocked, and the drivers' *clones, 175 * which are static after setup. 176 */ 177 static struct mtx tunmtx; 178 static eventhandler_tag arrival_tag; 179 static eventhandler_tag clone_tag; 180 static const char tunname[] = "tun"; 181 static const char tapname[] = "tap"; 182 static const char vmnetname[] = "vmnet"; 183 static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); 184 static int tundebug = 0; 185 static int tundclone = 1; 186 static int tap_allow_uopen = 0; /* allow user devfs cloning */ 187 static int tapuponopen = 0; /* IFF_UP on open() */ 188 static int tapdclone = 1; /* enable devfs cloning */ 189 190 static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); 191 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 192 193 static struct sx tun_ioctl_sx; 194 SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); 195 196 SYSCTL_DECL(_net_link); 197 /* tun */ 198 static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, 199 "IP tunnel software network interface"); 200 SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, 201 "Enable legacy devfs interface creation"); 202 203 /* tap */ 204 static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, 205 "Ethernet tunnel software network interface"); 206 SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0, 207 "Enable legacy devfs interface creation for all users"); 208 SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, 209 "Bring interface up when /dev/tap is opened"); 210 SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, 211 "Enable legacy devfs interface creation"); 212 SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, ""); 213 214 static int tun_create_device(struct tuntap_driver *drv, int unit, 215 struct ucred *cr, struct cdev **dev, const char *name); 216 static int tun_busy_locked(struct tuntap_softc *tp); 217 static void tun_unbusy_locked(struct tuntap_softc *tp); 218 static int tun_busy(struct tuntap_softc *tp); 219 static void tun_unbusy(struct tuntap_softc *tp); 220 221 static int tuntap_name2info(const char *name, int *unit, int *flags); 222 static void tunclone(void *arg, struct ucred *cred, char *name, 223 int namelen, struct cdev **dev); 224 static void tuncreate(struct cdev *dev); 225 static void tundtor(void *data); 226 static void tunrename(void *arg, struct ifnet *ifp); 227 static int tunifioctl(struct ifnet *, u_long, caddr_t); 228 static void tuninit(struct ifnet *); 229 static void tunifinit(void *xtp); 230 static int tuntapmodevent(module_t, int, void *); 231 static int tunoutput(struct ifnet *, struct mbuf *, 232 const struct sockaddr *, struct route *ro); 233 static void tunstart(struct ifnet *); 234 static void tunstart_l2(struct ifnet *); 235 236 static int tun_clone_match(struct if_clone *ifc, const char *name); 237 static int tap_clone_match(struct if_clone *ifc, const char *name); 238 static int vmnet_clone_match(struct if_clone *ifc, const char *name); 239 static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); 240 static int tun_clone_destroy(struct if_clone *, struct ifnet *); 241 static void tun_vnethdr_set(struct ifnet *ifp, int vhdrlen); 242 243 static d_open_t tunopen; 244 static d_read_t tunread; 245 static d_write_t tunwrite; 246 static d_ioctl_t tunioctl; 247 static d_poll_t tunpoll; 248 static d_kqfilter_t tunkqfilter; 249 250 static int tunkqread(struct knote *, long); 251 static int tunkqwrite(struct knote *, long); 252 static void tunkqdetach(struct knote *); 253 254 static struct filterops tun_read_filterops = { 255 .f_isfd = 1, 256 .f_attach = NULL, 257 .f_detach = tunkqdetach, 258 .f_event = tunkqread, 259 }; 260 261 static struct filterops tun_write_filterops = { 262 .f_isfd = 1, 263 .f_attach = NULL, 264 .f_detach = tunkqdetach, 265 .f_event = tunkqwrite, 266 }; 267 268 static struct tuntap_driver { 269 struct cdevsw cdevsw; 270 int ident_flags; 271 struct unrhdr *unrhdr; 272 struct clonedevs *clones; 273 ifc_match_t *clone_match_fn; 274 ifc_create_t *clone_create_fn; 275 ifc_destroy_t *clone_destroy_fn; 276 } tuntap_drivers[] = { 277 { 278 .ident_flags = 0, 279 .cdevsw = { 280 .d_version = D_VERSION, 281 .d_flags = D_NEEDMINOR, 282 .d_open = tunopen, 283 .d_read = tunread, 284 .d_write = tunwrite, 285 .d_ioctl = tunioctl, 286 .d_poll = tunpoll, 287 .d_kqfilter = tunkqfilter, 288 .d_name = tunname, 289 }, 290 .clone_match_fn = tun_clone_match, 291 .clone_create_fn = tun_clone_create, 292 .clone_destroy_fn = tun_clone_destroy, 293 }, 294 { 295 .ident_flags = TUN_L2, 296 .cdevsw = { 297 .d_version = D_VERSION, 298 .d_flags = D_NEEDMINOR, 299 .d_open = tunopen, 300 .d_read = tunread, 301 .d_write = tunwrite, 302 .d_ioctl = tunioctl, 303 .d_poll = tunpoll, 304 .d_kqfilter = tunkqfilter, 305 .d_name = tapname, 306 }, 307 .clone_match_fn = tap_clone_match, 308 .clone_create_fn = tun_clone_create, 309 .clone_destroy_fn = tun_clone_destroy, 310 }, 311 { 312 .ident_flags = TUN_L2 | TUN_VMNET, 313 .cdevsw = { 314 .d_version = D_VERSION, 315 .d_flags = D_NEEDMINOR, 316 .d_open = tunopen, 317 .d_read = tunread, 318 .d_write = tunwrite, 319 .d_ioctl = tunioctl, 320 .d_poll = tunpoll, 321 .d_kqfilter = tunkqfilter, 322 .d_name = vmnetname, 323 }, 324 .clone_match_fn = vmnet_clone_match, 325 .clone_create_fn = tun_clone_create, 326 .clone_destroy_fn = tun_clone_destroy, 327 }, 328 }; 329 330 struct tuntap_driver_cloner { 331 SLIST_ENTRY(tuntap_driver_cloner) link; 332 struct tuntap_driver *drv; 333 struct if_clone *cloner; 334 }; 335 336 VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) = 337 SLIST_HEAD_INITIALIZER(tuntap_driver_cloners); 338 339 #define V_tuntap_driver_cloners VNET(tuntap_driver_cloners) 340 341 /* 342 * Mechanism for marking a tunnel device as busy so that we can safely do some 343 * orthogonal operations (such as operations on devices) without racing against 344 * tun_destroy. tun_destroy will wait on the condvar if we're at all busy or 345 * open, to be woken up when the condition is alleviated. 346 */ 347 static int 348 tun_busy_locked(struct tuntap_softc *tp) 349 { 350 351 TUN_LOCK_ASSERT(tp); 352 if ((tp->tun_flags & TUN_DYING) != 0) { 353 /* 354 * Perhaps unintuitive, but the device is busy going away. 355 * Other interpretations of EBUSY from tun_busy make little 356 * sense, since making a busy device even more busy doesn't 357 * sound like a problem. 358 */ 359 return (EBUSY); 360 } 361 362 ++tp->tun_busy; 363 return (0); 364 } 365 366 static void 367 tun_unbusy_locked(struct tuntap_softc *tp) 368 { 369 370 TUN_LOCK_ASSERT(tp); 371 KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel")); 372 373 --tp->tun_busy; 374 /* Wake up anything that may be waiting on our busy tunnel. */ 375 if (tp->tun_busy == 0) 376 cv_broadcast(&tp->tun_cv); 377 } 378 379 static int 380 tun_busy(struct tuntap_softc *tp) 381 { 382 int ret; 383 384 TUN_LOCK(tp); 385 ret = tun_busy_locked(tp); 386 TUN_UNLOCK(tp); 387 return (ret); 388 } 389 390 391 static void 392 tun_unbusy(struct tuntap_softc *tp) 393 { 394 395 TUN_LOCK(tp); 396 tun_unbusy_locked(tp); 397 TUN_UNLOCK(tp); 398 } 399 400 /* 401 * Sets unit and/or flags given the device name. Must be called with correct 402 * vnet context. 403 */ 404 static int 405 tuntap_name2info(const char *name, int *outunit, int *outflags) 406 { 407 struct tuntap_driver *drv; 408 struct tuntap_driver_cloner *drvc; 409 char *dname; 410 int flags, unit; 411 bool found; 412 413 if (name == NULL) 414 return (EINVAL); 415 416 /* 417 * Needed for dev_stdclone, but dev_stdclone will not modify, it just 418 * wants to be able to pass back a char * through the second param. We 419 * will always set that as NULL here, so we'll fake it. 420 */ 421 dname = __DECONST(char *, name); 422 found = false; 423 424 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 425 ("tuntap_driver_cloners failed to initialize")); 426 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 427 KASSERT(drvc->drv != NULL, 428 ("tuntap_driver_cloners entry not properly initialized")); 429 drv = drvc->drv; 430 431 if (strcmp(name, drv->cdevsw.d_name) == 0) { 432 found = true; 433 unit = -1; 434 flags = drv->ident_flags; 435 break; 436 } 437 438 if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) { 439 found = true; 440 flags = drv->ident_flags; 441 break; 442 } 443 } 444 445 if (!found) 446 return (ENXIO); 447 448 if (outunit != NULL) 449 *outunit = unit; 450 if (outflags != NULL) 451 *outflags = flags; 452 return (0); 453 } 454 455 /* 456 * Get driver information from a set of flags specified. Masks the identifying 457 * part of the flags and compares it against all of the available 458 * tuntap_drivers. Must be called with correct vnet context. 459 */ 460 static struct tuntap_driver * 461 tuntap_driver_from_flags(int tun_flags) 462 { 463 struct tuntap_driver *drv; 464 struct tuntap_driver_cloner *drvc; 465 466 KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners), 467 ("tuntap_driver_cloners failed to initialize")); 468 SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) { 469 KASSERT(drvc->drv != NULL, 470 ("tuntap_driver_cloners entry not properly initialized")); 471 drv = drvc->drv; 472 if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags) 473 return (drv); 474 } 475 476 return (NULL); 477 } 478 479 480 481 static int 482 tun_clone_match(struct if_clone *ifc, const char *name) 483 { 484 int tunflags; 485 486 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 487 if ((tunflags & TUN_L2) == 0) 488 return (1); 489 } 490 491 return (0); 492 } 493 494 static int 495 tap_clone_match(struct if_clone *ifc, const char *name) 496 { 497 int tunflags; 498 499 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 500 if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2) 501 return (1); 502 } 503 504 return (0); 505 } 506 507 static int 508 vmnet_clone_match(struct if_clone *ifc, const char *name) 509 { 510 int tunflags; 511 512 if (tuntap_name2info(name, NULL, &tunflags) == 0) { 513 if ((tunflags & TUN_VMNET) != 0) 514 return (1); 515 } 516 517 return (0); 518 } 519 520 static int 521 tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 522 { 523 struct tuntap_driver *drv; 524 struct cdev *dev; 525 int err, i, tunflags, unit; 526 527 tunflags = 0; 528 /* The name here tells us exactly what we're creating */ 529 err = tuntap_name2info(name, &unit, &tunflags); 530 if (err != 0) 531 return (err); 532 533 drv = tuntap_driver_from_flags(tunflags); 534 if (drv == NULL) 535 return (ENXIO); 536 537 if (unit != -1) { 538 /* If this unit number is still available that's okay. */ 539 if (alloc_unr_specific(drv->unrhdr, unit) == -1) 540 return (EEXIST); 541 } else { 542 unit = alloc_unr(drv->unrhdr); 543 } 544 545 snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit); 546 547 /* find any existing device, or allocate new unit number */ 548 dev = NULL; 549 i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0); 550 /* No preexisting struct cdev *, create one */ 551 if (i != 0) 552 i = tun_create_device(drv, unit, NULL, &dev, name); 553 if (i == 0) 554 tuncreate(dev); 555 556 return (i); 557 } 558 559 static void 560 tunclone(void *arg, struct ucred *cred, char *name, int namelen, 561 struct cdev **dev) 562 { 563 char devname[SPECNAMELEN + 1]; 564 struct tuntap_driver *drv; 565 int append_unit, i, u, tunflags; 566 bool mayclone; 567 568 if (*dev != NULL) 569 return; 570 571 tunflags = 0; 572 CURVNET_SET(CRED_TO_VNET(cred)); 573 if (tuntap_name2info(name, &u, &tunflags) != 0) 574 goto out; /* Not recognized */ 575 576 if (u != -1 && u > IF_MAXUNIT) 577 goto out; /* Unit number too high */ 578 579 mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0; 580 if ((tunflags & TUN_L2) != 0) { 581 /* tap/vmnet allow user open with a sysctl */ 582 mayclone = (mayclone || tap_allow_uopen) && tapdclone; 583 } else { 584 mayclone = mayclone && tundclone; 585 } 586 587 /* 588 * If tun cloning is enabled, only the superuser can create an 589 * interface. 590 */ 591 if (!mayclone) 592 goto out; 593 594 if (u == -1) 595 append_unit = 1; 596 else 597 append_unit = 0; 598 599 drv = tuntap_driver_from_flags(tunflags); 600 if (drv == NULL) 601 goto out; 602 603 /* find any existing device, or allocate new unit number */ 604 i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0); 605 if (i) { 606 if (append_unit) { 607 namelen = snprintf(devname, sizeof(devname), "%s%d", 608 name, u); 609 name = devname; 610 } 611 612 i = tun_create_device(drv, u, cred, dev, name); 613 } 614 if (i == 0) 615 if_clone_create(name, namelen, NULL); 616 out: 617 CURVNET_RESTORE(); 618 } 619 620 static void 621 tun_destroy(struct tuntap_softc *tp) 622 { 623 624 TUN_LOCK(tp); 625 tp->tun_flags |= TUN_DYING; 626 if (tp->tun_busy != 0) 627 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); 628 else 629 TUN_UNLOCK(tp); 630 631 CURVNET_SET(TUN2IFP(tp)->if_vnet); 632 633 /* destroy_dev will take care of any alias. */ 634 destroy_dev(tp->tun_dev); 635 seldrain(&tp->tun_rsel); 636 knlist_clear(&tp->tun_rsel.si_note, 0); 637 knlist_destroy(&tp->tun_rsel.si_note); 638 if ((tp->tun_flags & TUN_L2) != 0) { 639 ether_ifdetach(TUN2IFP(tp)); 640 } else { 641 bpfdetach(TUN2IFP(tp)); 642 if_detach(TUN2IFP(tp)); 643 } 644 sx_xlock(&tun_ioctl_sx); 645 TUN2IFP(tp)->if_softc = NULL; 646 sx_xunlock(&tun_ioctl_sx); 647 free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); 648 if_free(TUN2IFP(tp)); 649 mtx_destroy(&tp->tun_mtx); 650 cv_destroy(&tp->tun_cv); 651 free(tp, M_TUN); 652 CURVNET_RESTORE(); 653 } 654 655 static int 656 tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp) 657 { 658 struct tuntap_softc *tp = ifp->if_softc; 659 660 mtx_lock(&tunmtx); 661 TAILQ_REMOVE(&tunhead, tp, tun_list); 662 mtx_unlock(&tunmtx); 663 tun_destroy(tp); 664 665 return (0); 666 } 667 668 static void 669 vnet_tun_init(const void *unused __unused) 670 { 671 struct tuntap_driver *drv; 672 struct tuntap_driver_cloner *drvc; 673 int i; 674 675 for (i = 0; i < nitems(tuntap_drivers); ++i) { 676 drv = &tuntap_drivers[i]; 677 drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO); 678 679 drvc->drv = drv; 680 drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0, 681 drv->clone_match_fn, drv->clone_create_fn, 682 drv->clone_destroy_fn); 683 SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link); 684 }; 685 } 686 VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, 687 vnet_tun_init, NULL); 688 689 static void 690 vnet_tun_uninit(const void *unused __unused) 691 { 692 struct tuntap_driver_cloner *drvc; 693 694 while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) { 695 drvc = SLIST_FIRST(&V_tuntap_driver_cloners); 696 SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link); 697 698 if_clone_detach(drvc->cloner); 699 free(drvc, M_TUN); 700 } 701 } 702 VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, 703 vnet_tun_uninit, NULL); 704 705 static void 706 tun_uninit(const void *unused __unused) 707 { 708 struct tuntap_driver *drv; 709 struct tuntap_softc *tp; 710 int i; 711 712 EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag); 713 EVENTHANDLER_DEREGISTER(dev_clone, clone_tag); 714 drain_dev_clone_events(); 715 716 mtx_lock(&tunmtx); 717 while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { 718 TAILQ_REMOVE(&tunhead, tp, tun_list); 719 mtx_unlock(&tunmtx); 720 tun_destroy(tp); 721 mtx_lock(&tunmtx); 722 } 723 mtx_unlock(&tunmtx); 724 for (i = 0; i < nitems(tuntap_drivers); ++i) { 725 drv = &tuntap_drivers[i]; 726 delete_unrhdr(drv->unrhdr); 727 clone_cleanup(&drv->clones); 728 } 729 mtx_destroy(&tunmtx); 730 } 731 SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); 732 733 static struct tuntap_driver * 734 tuntap_driver_from_ifnet(const struct ifnet *ifp) 735 { 736 struct tuntap_driver *drv; 737 int i; 738 739 if (ifp == NULL) 740 return (NULL); 741 742 for (i = 0; i < nitems(tuntap_drivers); ++i) { 743 drv = &tuntap_drivers[i]; 744 if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0) 745 return (drv); 746 } 747 748 return (NULL); 749 } 750 751 static int 752 tuntapmodevent(module_t mod, int type, void *data) 753 { 754 struct tuntap_driver *drv; 755 int i; 756 757 switch (type) { 758 case MOD_LOAD: 759 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); 760 for (i = 0; i < nitems(tuntap_drivers); ++i) { 761 drv = &tuntap_drivers[i]; 762 clone_setup(&drv->clones); 763 drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); 764 } 765 arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, 766 tunrename, 0, 1000); 767 if (arrival_tag == NULL) 768 return (ENOMEM); 769 clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); 770 if (clone_tag == NULL) 771 return (ENOMEM); 772 break; 773 case MOD_UNLOAD: 774 /* See tun_uninit, so it's done after the vnet_sysuninit() */ 775 break; 776 default: 777 return EOPNOTSUPP; 778 } 779 return 0; 780 } 781 782 static moduledata_t tuntap_mod = { 783 "if_tuntap", 784 tuntapmodevent, 785 0 786 }; 787 788 /* We'll only ever have these two, so no need for a macro. */ 789 static moduledata_t tun_mod = { "if_tun", NULL, 0 }; 790 static moduledata_t tap_mod = { "if_tap", NULL, 0 }; 791 792 DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 793 MODULE_VERSION(if_tuntap, 1); 794 DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 795 MODULE_VERSION(if_tun, 1); 796 DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 797 MODULE_VERSION(if_tap, 1); 798 799 static int 800 tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr, 801 struct cdev **dev, const char *name) 802 { 803 struct make_dev_args args; 804 struct tuntap_softc *tp; 805 int error; 806 807 tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO); 808 mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF); 809 cv_init(&tp->tun_cv, "tun_condvar"); 810 tp->tun_flags = drv->ident_flags; 811 tp->tun_drv = drv; 812 813 make_dev_args_init(&args); 814 if (cr != NULL) 815 args.mda_flags = MAKEDEV_REF; 816 args.mda_devsw = &drv->cdevsw; 817 args.mda_cr = cr; 818 args.mda_uid = UID_UUCP; 819 args.mda_gid = GID_DIALER; 820 args.mda_mode = 0600; 821 args.mda_unit = unit; 822 args.mda_si_drv1 = tp; 823 error = make_dev_s(&args, dev, "%s", name); 824 if (error != 0) { 825 free(tp, M_TUN); 826 return (error); 827 } 828 829 KASSERT((*dev)->si_drv1 != NULL, 830 ("Failed to set si_drv1 at %s creation", name)); 831 tp->tun_dev = *dev; 832 knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx); 833 mtx_lock(&tunmtx); 834 TAILQ_INSERT_TAIL(&tunhead, tp, tun_list); 835 mtx_unlock(&tunmtx); 836 return (0); 837 } 838 839 static void 840 tunstart(struct ifnet *ifp) 841 { 842 struct tuntap_softc *tp = ifp->if_softc; 843 struct mbuf *m; 844 845 TUNDEBUG(ifp, "starting\n"); 846 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 847 IFQ_LOCK(&ifp->if_snd); 848 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 849 if (m == NULL) { 850 IFQ_UNLOCK(&ifp->if_snd); 851 return; 852 } 853 IFQ_UNLOCK(&ifp->if_snd); 854 } 855 856 TUN_LOCK(tp); 857 if (tp->tun_flags & TUN_RWAIT) { 858 tp->tun_flags &= ~TUN_RWAIT; 859 wakeup(tp); 860 } 861 selwakeuppri(&tp->tun_rsel, PZERO + 1); 862 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 863 if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { 864 TUN_UNLOCK(tp); 865 pgsigio(&tp->tun_sigio, SIGIO, 0); 866 } else 867 TUN_UNLOCK(tp); 868 } 869 870 /* 871 * tunstart_l2 872 * 873 * queue packets from higher level ready to put out 874 */ 875 static void 876 tunstart_l2(struct ifnet *ifp) 877 { 878 struct tuntap_softc *tp = ifp->if_softc; 879 880 TUNDEBUG(ifp, "starting\n"); 881 882 /* 883 * do not junk pending output if we are in VMnet mode. 884 * XXX: can this do any harm because of queue overflow? 885 */ 886 887 TUN_LOCK(tp); 888 if (((tp->tun_flags & TUN_VMNET) == 0) && 889 ((tp->tun_flags & TUN_READY) != TUN_READY)) { 890 struct mbuf *m; 891 892 /* Unlocked read. */ 893 TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags); 894 895 for (;;) { 896 IF_DEQUEUE(&ifp->if_snd, m); 897 if (m != NULL) { 898 m_freem(m); 899 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 900 } else 901 break; 902 } 903 TUN_UNLOCK(tp); 904 905 return; 906 } 907 908 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 909 910 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 911 if (tp->tun_flags & TUN_RWAIT) { 912 tp->tun_flags &= ~TUN_RWAIT; 913 wakeup(tp); 914 } 915 916 if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) { 917 TUN_UNLOCK(tp); 918 pgsigio(&tp->tun_sigio, SIGIO, 0); 919 TUN_LOCK(tp); 920 } 921 922 selwakeuppri(&tp->tun_rsel, PZERO+1); 923 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 924 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ 925 } 926 927 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 928 TUN_UNLOCK(tp); 929 } /* tunstart_l2 */ 930 931 /* XXX: should return an error code so it can fail. */ 932 static void 933 tuncreate(struct cdev *dev) 934 { 935 struct tuntap_driver *drv; 936 struct tuntap_softc *tp; 937 struct ifnet *ifp; 938 struct ether_addr eaddr; 939 int iflags; 940 u_char type; 941 942 tp = dev->si_drv1; 943 KASSERT(tp != NULL, 944 ("si_drv1 should have been initialized at creation")); 945 946 drv = tp->tun_drv; 947 iflags = IFF_MULTICAST; 948 if ((tp->tun_flags & TUN_L2) != 0) { 949 type = IFT_ETHER; 950 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 951 } else { 952 type = IFT_PPP; 953 iflags |= IFF_POINTOPOINT; 954 } 955 ifp = tp->tun_ifp = if_alloc(type); 956 if (ifp == NULL) 957 panic("%s%d: failed to if_alloc() interface.\n", 958 drv->cdevsw.d_name, dev2unit(dev)); 959 ifp->if_softc = tp; 960 if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev)); 961 ifp->if_ioctl = tunifioctl; 962 ifp->if_flags = iflags; 963 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 964 ifp->if_capabilities |= IFCAP_LINKSTATE; 965 ifp->if_capenable |= IFCAP_LINKSTATE; 966 967 if ((tp->tun_flags & TUN_L2) != 0) { 968 ifp->if_mtu = ETHERMTU; 969 ifp->if_init = tunifinit; 970 ifp->if_start = tunstart_l2; 971 972 ether_gen_addr(ifp, &eaddr); 973 ether_ifattach(ifp, eaddr.octet); 974 } else { 975 ifp->if_mtu = TUNMTU; 976 ifp->if_start = tunstart; 977 ifp->if_output = tunoutput; 978 979 ifp->if_snd.ifq_drv_maxlen = 0; 980 IFQ_SET_READY(&ifp->if_snd); 981 982 if_attach(ifp); 983 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 984 } 985 986 TUN_LOCK(tp); 987 tp->tun_flags |= TUN_INITED; 988 TUN_UNLOCK(tp); 989 990 TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", 991 ifp->if_xname, dev2unit(dev)); 992 } 993 994 static void 995 tunrename(void *arg __unused, struct ifnet *ifp) 996 { 997 struct tuntap_softc *tp; 998 int error; 999 1000 if ((ifp->if_flags & IFF_RENAMING) == 0) 1001 return; 1002 1003 if (tuntap_driver_from_ifnet(ifp) == NULL) 1004 return; 1005 1006 /* 1007 * We need to grab the ioctl sx long enough to make sure the softc is 1008 * still there. If it is, we can safely try to busy the tun device. 1009 * The busy may fail if the device is currently dying, in which case 1010 * we do nothing. If it doesn't fail, the busy count stops the device 1011 * from dying until we've created the alias (that will then be 1012 * subsequently destroyed). 1013 */ 1014 sx_xlock(&tun_ioctl_sx); 1015 tp = ifp->if_softc; 1016 if (tp == NULL) { 1017 sx_xunlock(&tun_ioctl_sx); 1018 return; 1019 } 1020 error = tun_busy(tp); 1021 sx_xunlock(&tun_ioctl_sx); 1022 if (error != 0) 1023 return; 1024 if (tp->tun_alias != NULL) { 1025 destroy_dev(tp->tun_alias); 1026 tp->tun_alias = NULL; 1027 } 1028 1029 if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0) 1030 goto out; 1031 1032 /* 1033 * Failure's ok, aliases are created on a best effort basis. If a 1034 * tun user/consumer decides to rename the interface to conflict with 1035 * another device (non-ifnet) on the system, we will assume they know 1036 * what they are doing. make_dev_alias_p won't touch tun_alias on 1037 * failure, so we use it but ignore the return value. 1038 */ 1039 make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s", 1040 ifp->if_xname); 1041 out: 1042 tun_unbusy(tp); 1043 } 1044 1045 static int 1046 tunopen(struct cdev *dev, int flag, int mode, struct thread *td) 1047 { 1048 struct ifnet *ifp; 1049 struct tuntap_softc *tp; 1050 int error, tunflags; 1051 1052 tunflags = 0; 1053 CURVNET_SET(TD_TO_VNET(td)); 1054 error = tuntap_name2info(dev->si_name, NULL, &tunflags); 1055 if (error != 0) { 1056 CURVNET_RESTORE(); 1057 return (error); /* Shouldn't happen */ 1058 } 1059 1060 tp = dev->si_drv1; 1061 KASSERT(tp != NULL, 1062 ("si_drv1 should have been initialized at creation")); 1063 1064 TUN_LOCK(tp); 1065 if ((tp->tun_flags & TUN_INITED) == 0) { 1066 TUN_UNLOCK(tp); 1067 CURVNET_RESTORE(); 1068 return (ENXIO); 1069 } 1070 if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { 1071 TUN_UNLOCK(tp); 1072 CURVNET_RESTORE(); 1073 return (EBUSY); 1074 } 1075 1076 error = tun_busy_locked(tp); 1077 KASSERT(error == 0, ("Must be able to busy an unopen tunnel")); 1078 ifp = TUN2IFP(tp); 1079 1080 if ((tp->tun_flags & TUN_L2) != 0) { 1081 bcopy(IF_LLADDR(ifp), tp->tun_ether.octet, 1082 sizeof(tp->tun_ether.octet)); 1083 1084 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1085 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1086 1087 if (tapuponopen) 1088 ifp->if_flags |= IFF_UP; 1089 } 1090 1091 tp->tun_pid = td->td_proc->p_pid; 1092 tp->tun_flags |= TUN_OPEN; 1093 1094 if_link_state_change(ifp, LINK_STATE_UP); 1095 TUNDEBUG(ifp, "open\n"); 1096 TUN_UNLOCK(tp); 1097 1098 /* 1099 * This can fail with either ENOENT or EBUSY. This is in the middle of 1100 * d_open, so ENOENT should not be possible. EBUSY is possible, but 1101 * the only cdevpriv dtor being set will be tundtor and the softc being 1102 * passed is constant for a given cdev. We ignore the possible error 1103 * because of this as either "unlikely" or "not actually a problem." 1104 */ 1105 (void)devfs_set_cdevpriv(tp, tundtor); 1106 CURVNET_RESTORE(); 1107 return (0); 1108 } 1109 1110 /* 1111 * tundtor - tear down the device - mark i/f down & delete 1112 * routing info 1113 */ 1114 static void 1115 tundtor(void *data) 1116 { 1117 struct proc *p; 1118 struct tuntap_softc *tp; 1119 struct ifnet *ifp; 1120 bool l2tun; 1121 1122 tp = data; 1123 p = curproc; 1124 ifp = TUN2IFP(tp); 1125 1126 TUN_LOCK(tp); 1127 1128 /* 1129 * Realistically, we can't be obstinate here. This only means that the 1130 * tuntap device was closed out of order, and the last closer wasn't the 1131 * controller. These are still good to know about, though, as software 1132 * should avoid multiple processes with a tuntap device open and 1133 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in 1134 * parent). 1135 */ 1136 if (p->p_pid != tp->tun_pid) { 1137 log(LOG_INFO, 1138 "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n", 1139 p->p_pid, p->p_comm, tp->tun_dev->si_name); 1140 } 1141 1142 /* 1143 * junk all pending output 1144 */ 1145 CURVNET_SET(ifp->if_vnet); 1146 1147 l2tun = false; 1148 if ((tp->tun_flags & TUN_L2) != 0) { 1149 l2tun = true; 1150 IF_DRAIN(&ifp->if_snd); 1151 } else { 1152 IFQ_PURGE(&ifp->if_snd); 1153 } 1154 1155 /* For vmnet, we won't do most of the address/route bits */ 1156 if ((tp->tun_flags & TUN_VMNET) != 0 || 1157 (l2tun && (ifp->if_flags & IFF_LINK0) != 0)) 1158 goto out; 1159 1160 if (ifp->if_flags & IFF_UP) { 1161 TUN_UNLOCK(tp); 1162 if_down(ifp); 1163 TUN_LOCK(tp); 1164 } 1165 1166 /* Delete all addresses and routes which reference this interface. */ 1167 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1168 struct ifaddr *ifa; 1169 1170 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1171 TUN_UNLOCK(tp); 1172 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1173 /* deal w/IPv4 PtP destination; unlocked read */ 1174 if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) { 1175 rtinit(ifa, (int)RTM_DELETE, 1176 tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 1177 } else { 1178 rtinit(ifa, (int)RTM_DELETE, 0); 1179 } 1180 } 1181 if_purgeaddrs(ifp); 1182 TUN_LOCK(tp); 1183 } 1184 1185 out: 1186 if_link_state_change(ifp, LINK_STATE_DOWN); 1187 CURVNET_RESTORE(); 1188 1189 funsetown(&tp->tun_sigio); 1190 selwakeuppri(&tp->tun_rsel, PZERO + 1); 1191 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 1192 TUNDEBUG (ifp, "closed\n"); 1193 tp->tun_flags &= ~TUN_OPEN; 1194 tp->tun_pid = 0; 1195 tun_vnethdr_set(ifp, 0); 1196 1197 tun_unbusy_locked(tp); 1198 TUN_UNLOCK(tp); 1199 } 1200 1201 static void 1202 tuninit(struct ifnet *ifp) 1203 { 1204 struct tuntap_softc *tp = ifp->if_softc; 1205 #ifdef INET 1206 struct epoch_tracker et; 1207 struct ifaddr *ifa; 1208 #endif 1209 1210 TUNDEBUG(ifp, "tuninit\n"); 1211 1212 TUN_LOCK(tp); 1213 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1214 if ((tp->tun_flags & TUN_L2) == 0) { 1215 ifp->if_flags |= IFF_UP; 1216 getmicrotime(&ifp->if_lastchange); 1217 #ifdef INET 1218 NET_EPOCH_ENTER(et); 1219 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1220 if (ifa->ifa_addr->sa_family == AF_INET) { 1221 struct sockaddr_in *si; 1222 1223 si = (struct sockaddr_in *)ifa->ifa_dstaddr; 1224 if (si && si->sin_addr.s_addr) { 1225 tp->tun_flags |= TUN_DSTADDR; 1226 break; 1227 } 1228 } 1229 } 1230 NET_EPOCH_EXIT(et); 1231 #endif 1232 TUN_UNLOCK(tp); 1233 } else { 1234 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1235 TUN_UNLOCK(tp); 1236 /* attempt to start output */ 1237 tunstart_l2(ifp); 1238 } 1239 1240 } 1241 1242 /* 1243 * Used only for l2 tunnel. 1244 */ 1245 static void 1246 tunifinit(void *xtp) 1247 { 1248 struct tuntap_softc *tp; 1249 1250 tp = (struct tuntap_softc *)xtp; 1251 tuninit(tp->tun_ifp); 1252 } 1253 1254 /* 1255 * To be called under TUN_LOCK. Update ifp->if_hwassist according to the 1256 * current value of ifp->if_capenable. 1257 */ 1258 static void 1259 tun_caps_changed(struct ifnet *ifp) 1260 { 1261 uint64_t hwassist = 0; 1262 1263 TUN_LOCK_ASSERT((struct tuntap_softc *)ifp->if_softc); 1264 if (ifp->if_capenable & IFCAP_TXCSUM) 1265 hwassist |= CSUM_TCP | CSUM_UDP; 1266 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 1267 hwassist |= CSUM_TCP_IPV6 1268 | CSUM_UDP_IPV6; 1269 if (ifp->if_capenable & IFCAP_TSO4) 1270 hwassist |= CSUM_IP_TSO; 1271 if (ifp->if_capenable & IFCAP_TSO6) 1272 hwassist |= CSUM_IP6_TSO; 1273 ifp->if_hwassist = hwassist; 1274 } 1275 1276 /* 1277 * To be called under TUN_LOCK. Update tp->tun_vhdrlen and adjust 1278 * if_capabilities and if_capenable as needed. 1279 */ 1280 static void 1281 tun_vnethdr_set(struct ifnet *ifp, int vhdrlen) 1282 { 1283 struct tuntap_softc *tp = ifp->if_softc; 1284 1285 TUN_LOCK_ASSERT(tp); 1286 1287 if (tp->tun_vhdrlen == vhdrlen) 1288 return; 1289 1290 /* 1291 * Update if_capabilities to reflect the 1292 * functionalities offered by the virtio-net 1293 * header. 1294 */ 1295 if (vhdrlen != 0) 1296 ifp->if_capabilities |= 1297 TAP_VNET_HDR_CAPS; 1298 else 1299 ifp->if_capabilities &= 1300 ~TAP_VNET_HDR_CAPS; 1301 /* 1302 * Disable any capabilities that we don't 1303 * support anymore. 1304 */ 1305 ifp->if_capenable &= ifp->if_capabilities; 1306 tun_caps_changed(ifp); 1307 tp->tun_vhdrlen = vhdrlen; 1308 1309 TUNDEBUG(ifp, "vnet_hdr_len=%d, if_capabilities=%x\n", 1310 vhdrlen, ifp->if_capabilities); 1311 } 1312 1313 /* 1314 * Process an ioctl request. 1315 */ 1316 static int 1317 tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1318 { 1319 struct ifreq *ifr = (struct ifreq *)data; 1320 struct tuntap_softc *tp; 1321 struct ifstat *ifs; 1322 struct ifmediareq *ifmr; 1323 int dummy, error = 0; 1324 bool l2tun; 1325 1326 ifmr = NULL; 1327 sx_xlock(&tun_ioctl_sx); 1328 tp = ifp->if_softc; 1329 if (tp == NULL) { 1330 error = ENXIO; 1331 goto bad; 1332 } 1333 l2tun = (tp->tun_flags & TUN_L2) != 0; 1334 switch(cmd) { 1335 case SIOCGIFSTATUS: 1336 ifs = (struct ifstat *)data; 1337 TUN_LOCK(tp); 1338 if (tp->tun_pid) 1339 snprintf(ifs->ascii, sizeof(ifs->ascii), 1340 "\tOpened by PID %d\n", tp->tun_pid); 1341 else 1342 ifs->ascii[0] = '\0'; 1343 TUN_UNLOCK(tp); 1344 break; 1345 case SIOCSIFADDR: 1346 if (l2tun) 1347 error = ether_ioctl(ifp, cmd, data); 1348 else 1349 tuninit(ifp); 1350 if (error == 0) 1351 TUNDEBUG(ifp, "address set\n"); 1352 break; 1353 case SIOCSIFMTU: 1354 ifp->if_mtu = ifr->ifr_mtu; 1355 TUNDEBUG(ifp, "mtu set\n"); 1356 break; 1357 case SIOCSIFFLAGS: 1358 case SIOCADDMULTI: 1359 case SIOCDELMULTI: 1360 break; 1361 case SIOCGIFMEDIA: 1362 if (!l2tun) { 1363 error = EINVAL; 1364 break; 1365 } 1366 1367 ifmr = (struct ifmediareq *)data; 1368 dummy = ifmr->ifm_count; 1369 ifmr->ifm_count = 1; 1370 ifmr->ifm_status = IFM_AVALID; 1371 ifmr->ifm_active = IFM_ETHER; 1372 if (tp->tun_flags & TUN_OPEN) 1373 ifmr->ifm_status |= IFM_ACTIVE; 1374 ifmr->ifm_current = ifmr->ifm_active; 1375 if (dummy >= 1) { 1376 int media = IFM_ETHER; 1377 error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); 1378 } 1379 break; 1380 case SIOCSIFCAP: 1381 TUN_LOCK(tp); 1382 ifp->if_capenable = ifr->ifr_reqcap; 1383 tun_caps_changed(ifp); 1384 TUN_UNLOCK(tp); 1385 VLAN_CAPABILITIES(ifp); 1386 break; 1387 default: 1388 if (l2tun) { 1389 error = ether_ioctl(ifp, cmd, data); 1390 } else { 1391 error = EINVAL; 1392 } 1393 } 1394 bad: 1395 sx_xunlock(&tun_ioctl_sx); 1396 return (error); 1397 } 1398 1399 /* 1400 * tunoutput - queue packets from higher level ready to put out. 1401 */ 1402 static int 1403 tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, 1404 struct route *ro) 1405 { 1406 struct tuntap_softc *tp = ifp->if_softc; 1407 u_short cached_tun_flags; 1408 int error; 1409 u_int32_t af; 1410 1411 TUNDEBUG (ifp, "tunoutput\n"); 1412 1413 #ifdef MAC 1414 error = mac_ifnet_check_transmit(ifp, m0); 1415 if (error) { 1416 m_freem(m0); 1417 return (error); 1418 } 1419 #endif 1420 1421 /* Could be unlocked read? */ 1422 TUN_LOCK(tp); 1423 cached_tun_flags = tp->tun_flags; 1424 TUN_UNLOCK(tp); 1425 if ((cached_tun_flags & TUN_READY) != TUN_READY) { 1426 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1427 m_freem (m0); 1428 return (EHOSTDOWN); 1429 } 1430 1431 if ((ifp->if_flags & IFF_UP) != IFF_UP) { 1432 m_freem (m0); 1433 return (EHOSTDOWN); 1434 } 1435 1436 /* BPF writes need to be handled specially. */ 1437 if (dst->sa_family == AF_UNSPEC) 1438 bcopy(dst->sa_data, &af, sizeof(af)); 1439 else 1440 af = dst->sa_family; 1441 1442 if (bpf_peers_present(ifp->if_bpf)) 1443 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); 1444 1445 /* prepend sockaddr? this may abort if the mbuf allocation fails */ 1446 if (cached_tun_flags & TUN_LMODE) { 1447 /* allocate space for sockaddr */ 1448 M_PREPEND(m0, dst->sa_len, M_NOWAIT); 1449 1450 /* if allocation failed drop packet */ 1451 if (m0 == NULL) { 1452 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1453 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1454 return (ENOBUFS); 1455 } else { 1456 bcopy(dst, m0->m_data, dst->sa_len); 1457 } 1458 } 1459 1460 if (cached_tun_flags & TUN_IFHEAD) { 1461 /* Prepend the address family */ 1462 M_PREPEND(m0, 4, M_NOWAIT); 1463 1464 /* if allocation failed drop packet */ 1465 if (m0 == NULL) { 1466 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1467 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 1468 return (ENOBUFS); 1469 } else 1470 *(u_int32_t *)m0->m_data = htonl(af); 1471 } else { 1472 #ifdef INET 1473 if (af != AF_INET) 1474 #endif 1475 { 1476 m_freem(m0); 1477 return (EAFNOSUPPORT); 1478 } 1479 } 1480 1481 error = (ifp->if_transmit)(ifp, m0); 1482 if (error) 1483 return (ENOBUFS); 1484 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1485 return (0); 1486 } 1487 1488 /* 1489 * the cdevsw interface is now pretty minimal. 1490 */ 1491 static int 1492 tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 1493 struct thread *td) 1494 { 1495 struct ifreq ifr, *ifrp; 1496 struct tuntap_softc *tp = dev->si_drv1; 1497 struct ifnet *ifp = TUN2IFP(tp); 1498 struct tuninfo *tunp; 1499 int error, iflags, ival; 1500 bool l2tun; 1501 1502 l2tun = (tp->tun_flags & TUN_L2) != 0; 1503 if (l2tun) { 1504 /* tap specific ioctls */ 1505 switch(cmd) { 1506 /* VMware/VMnet port ioctl's */ 1507 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ 1508 defined(COMPAT_FREEBSD4) 1509 case _IO('V', 0): 1510 ival = IOCPARM_IVAL(data); 1511 data = (caddr_t)&ival; 1512 /* FALLTHROUGH */ 1513 #endif 1514 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ 1515 iflags = *(int *)data; 1516 iflags &= TUN_VMIO_FLAG_MASK; 1517 iflags &= ~IFF_CANTCHANGE; 1518 iflags |= IFF_UP; 1519 1520 TUN_LOCK(tp); 1521 ifp->if_flags = iflags | 1522 (ifp->if_flags & IFF_CANTCHANGE); 1523 TUN_UNLOCK(tp); 1524 1525 return (0); 1526 case SIOCGIFADDR: /* get MAC address of the remote side */ 1527 TUN_LOCK(tp); 1528 bcopy(&tp->tun_ether.octet, data, 1529 sizeof(tp->tun_ether.octet)); 1530 TUN_UNLOCK(tp); 1531 1532 return (0); 1533 case SIOCSIFADDR: /* set MAC address of the remote side */ 1534 TUN_LOCK(tp); 1535 bcopy(data, &tp->tun_ether.octet, 1536 sizeof(tp->tun_ether.octet)); 1537 TUN_UNLOCK(tp); 1538 1539 return (0); 1540 case TAPSVNETHDR: 1541 ival = *(int *)data; 1542 if (ival != 0 && 1543 ival != sizeof(struct virtio_net_hdr) && 1544 ival != sizeof(struct virtio_net_hdr_mrg_rxbuf)) { 1545 return (EINVAL); 1546 } 1547 TUN_LOCK(tp); 1548 tun_vnethdr_set(ifp, ival); 1549 TUN_UNLOCK(tp); 1550 1551 return (0); 1552 case TAPGVNETHDR: 1553 TUN_LOCK(tp); 1554 *(int *)data = tp->tun_vhdrlen; 1555 TUN_UNLOCK(tp); 1556 1557 return (0); 1558 } 1559 1560 /* Fall through to the common ioctls if unhandled */ 1561 } else { 1562 switch (cmd) { 1563 case TUNSLMODE: 1564 TUN_LOCK(tp); 1565 if (*(int *)data) { 1566 tp->tun_flags |= TUN_LMODE; 1567 tp->tun_flags &= ~TUN_IFHEAD; 1568 } else 1569 tp->tun_flags &= ~TUN_LMODE; 1570 TUN_UNLOCK(tp); 1571 1572 return (0); 1573 case TUNSIFHEAD: 1574 TUN_LOCK(tp); 1575 if (*(int *)data) { 1576 tp->tun_flags |= TUN_IFHEAD; 1577 tp->tun_flags &= ~TUN_LMODE; 1578 } else 1579 tp->tun_flags &= ~TUN_IFHEAD; 1580 TUN_UNLOCK(tp); 1581 1582 return (0); 1583 case TUNGIFHEAD: 1584 TUN_LOCK(tp); 1585 *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; 1586 TUN_UNLOCK(tp); 1587 1588 return (0); 1589 case TUNSIFMODE: 1590 /* deny this if UP */ 1591 if (TUN2IFP(tp)->if_flags & IFF_UP) 1592 return (EBUSY); 1593 1594 switch (*(int *)data & ~IFF_MULTICAST) { 1595 case IFF_POINTOPOINT: 1596 case IFF_BROADCAST: 1597 TUN_LOCK(tp); 1598 TUN2IFP(tp)->if_flags &= 1599 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); 1600 TUN2IFP(tp)->if_flags |= *(int *)data; 1601 TUN_UNLOCK(tp); 1602 1603 break; 1604 default: 1605 return (EINVAL); 1606 } 1607 1608 return (0); 1609 case TUNSIFPID: 1610 TUN_LOCK(tp); 1611 tp->tun_pid = curthread->td_proc->p_pid; 1612 TUN_UNLOCK(tp); 1613 1614 return (0); 1615 } 1616 /* Fall through to the common ioctls if unhandled */ 1617 } 1618 1619 switch (cmd) { 1620 case TUNGIFNAME: 1621 ifrp = (struct ifreq *)data; 1622 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ); 1623 1624 return (0); 1625 case TUNSIFINFO: 1626 tunp = (struct tuninfo *)data; 1627 if (TUN2IFP(tp)->if_type != tunp->type) 1628 return (EPROTOTYPE); 1629 TUN_LOCK(tp); 1630 if (TUN2IFP(tp)->if_mtu != tunp->mtu) { 1631 strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); 1632 ifr.ifr_mtu = tunp->mtu; 1633 CURVNET_SET(TUN2IFP(tp)->if_vnet); 1634 error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), 1635 (caddr_t)&ifr, td); 1636 CURVNET_RESTORE(); 1637 if (error) { 1638 TUN_UNLOCK(tp); 1639 return (error); 1640 } 1641 } 1642 TUN2IFP(tp)->if_baudrate = tunp->baudrate; 1643 TUN_UNLOCK(tp); 1644 break; 1645 case TUNGIFINFO: 1646 tunp = (struct tuninfo *)data; 1647 TUN_LOCK(tp); 1648 tunp->mtu = TUN2IFP(tp)->if_mtu; 1649 tunp->type = TUN2IFP(tp)->if_type; 1650 tunp->baudrate = TUN2IFP(tp)->if_baudrate; 1651 TUN_UNLOCK(tp); 1652 break; 1653 case TUNSDEBUG: 1654 tundebug = *(int *)data; 1655 break; 1656 case TUNGDEBUG: 1657 *(int *)data = tundebug; 1658 break; 1659 case FIONBIO: 1660 break; 1661 case FIOASYNC: 1662 TUN_LOCK(tp); 1663 if (*(int *)data) 1664 tp->tun_flags |= TUN_ASYNC; 1665 else 1666 tp->tun_flags &= ~TUN_ASYNC; 1667 TUN_UNLOCK(tp); 1668 break; 1669 case FIONREAD: 1670 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { 1671 struct mbuf *mb; 1672 IFQ_LOCK(&TUN2IFP(tp)->if_snd); 1673 IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); 1674 for (*(int *)data = 0; mb != NULL; mb = mb->m_next) 1675 *(int *)data += mb->m_len; 1676 IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); 1677 } else 1678 *(int *)data = 0; 1679 break; 1680 case FIOSETOWN: 1681 return (fsetown(*(int *)data, &tp->tun_sigio)); 1682 1683 case FIOGETOWN: 1684 *(int *)data = fgetown(&tp->tun_sigio); 1685 return (0); 1686 1687 /* This is deprecated, FIOSETOWN should be used instead. */ 1688 case TIOCSPGRP: 1689 return (fsetown(-(*(int *)data), &tp->tun_sigio)); 1690 1691 /* This is deprecated, FIOGETOWN should be used instead. */ 1692 case TIOCGPGRP: 1693 *(int *)data = -fgetown(&tp->tun_sigio); 1694 return (0); 1695 1696 default: 1697 return (ENOTTY); 1698 } 1699 return (0); 1700 } 1701 1702 /* 1703 * The cdevsw read interface - reads a packet at a time, or at 1704 * least as much of a packet as can be read. 1705 */ 1706 static int 1707 tunread(struct cdev *dev, struct uio *uio, int flag) 1708 { 1709 struct tuntap_softc *tp = dev->si_drv1; 1710 struct ifnet *ifp = TUN2IFP(tp); 1711 struct mbuf *m; 1712 size_t len; 1713 int error = 0; 1714 1715 TUNDEBUG (ifp, "read\n"); 1716 TUN_LOCK(tp); 1717 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 1718 TUN_UNLOCK(tp); 1719 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 1720 return (EHOSTDOWN); 1721 } 1722 1723 tp->tun_flags &= ~TUN_RWAIT; 1724 1725 for (;;) { 1726 IFQ_DEQUEUE(&ifp->if_snd, m); 1727 if (m != NULL) 1728 break; 1729 if (flag & O_NONBLOCK) { 1730 TUN_UNLOCK(tp); 1731 return (EWOULDBLOCK); 1732 } 1733 tp->tun_flags |= TUN_RWAIT; 1734 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), 1735 "tunread", 0); 1736 if (error != 0) { 1737 TUN_UNLOCK(tp); 1738 return (error); 1739 } 1740 } 1741 TUN_UNLOCK(tp); 1742 1743 if ((tp->tun_flags & TUN_L2) != 0) 1744 BPF_MTAP(ifp, m); 1745 1746 len = min(tp->tun_vhdrlen, uio->uio_resid); 1747 if (len > 0) { 1748 struct virtio_net_hdr_mrg_rxbuf vhdr; 1749 1750 bzero(&vhdr, sizeof(vhdr)); 1751 if (m->m_pkthdr.csum_flags & TAP_ALL_OFFLOAD) { 1752 m = virtio_net_tx_offload(ifp, m, false, &vhdr.hdr); 1753 } 1754 1755 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " 1756 "gs %u, cs %u, co %u\n", vhdr.hdr.flags, 1757 vhdr.hdr.gso_type, vhdr.hdr.hdr_len, 1758 vhdr.hdr.gso_size, vhdr.hdr.csum_start, 1759 vhdr.hdr.csum_offset); 1760 error = uiomove(&vhdr, len, uio); 1761 } 1762 1763 while (m && uio->uio_resid > 0 && error == 0) { 1764 len = min(uio->uio_resid, m->m_len); 1765 if (len != 0) 1766 error = uiomove(mtod(m, void *), len, uio); 1767 m = m_free(m); 1768 } 1769 1770 if (m) { 1771 TUNDEBUG(ifp, "Dropping mbuf\n"); 1772 m_freem(m); 1773 } 1774 return (error); 1775 } 1776 1777 static int 1778 tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m, 1779 struct virtio_net_hdr_mrg_rxbuf *vhdr) 1780 { 1781 struct epoch_tracker et; 1782 struct ether_header *eh; 1783 struct ifnet *ifp; 1784 1785 ifp = TUN2IFP(tp); 1786 1787 /* 1788 * Only pass a unicast frame to ether_input(), if it would 1789 * actually have been received by non-virtual hardware. 1790 */ 1791 if (m->m_len < sizeof(struct ether_header)) { 1792 m_freem(m); 1793 return (0); 1794 } 1795 1796 eh = mtod(m, struct ether_header *); 1797 1798 if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && 1799 !ETHER_IS_MULTICAST(eh->ether_dhost) && 1800 bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { 1801 m_freem(m); 1802 return (0); 1803 } 1804 1805 if (vhdr != NULL && virtio_net_rx_csum(m, &vhdr->hdr)) { 1806 m_freem(m); 1807 return (0); 1808 } 1809 1810 /* Pass packet up to parent. */ 1811 CURVNET_SET(ifp->if_vnet); 1812 NET_EPOCH_ENTER(et); 1813 (*ifp->if_input)(ifp, m); 1814 NET_EPOCH_EXIT(et); 1815 CURVNET_RESTORE(); 1816 /* ibytes are counted in parent */ 1817 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1818 return (0); 1819 } 1820 1821 static int 1822 tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m) 1823 { 1824 struct epoch_tracker et; 1825 struct ifnet *ifp; 1826 int family, isr; 1827 1828 ifp = TUN2IFP(tp); 1829 /* Could be unlocked read? */ 1830 TUN_LOCK(tp); 1831 if (tp->tun_flags & TUN_IFHEAD) { 1832 TUN_UNLOCK(tp); 1833 if (m->m_len < sizeof(family) && 1834 (m = m_pullup(m, sizeof(family))) == NULL) 1835 return (ENOBUFS); 1836 family = ntohl(*mtod(m, u_int32_t *)); 1837 m_adj(m, sizeof(family)); 1838 } else { 1839 TUN_UNLOCK(tp); 1840 family = AF_INET; 1841 } 1842 1843 BPF_MTAP2(ifp, &family, sizeof(family), m); 1844 1845 switch (family) { 1846 #ifdef INET 1847 case AF_INET: 1848 isr = NETISR_IP; 1849 break; 1850 #endif 1851 #ifdef INET6 1852 case AF_INET6: 1853 isr = NETISR_IPV6; 1854 break; 1855 #endif 1856 default: 1857 m_freem(m); 1858 return (EAFNOSUPPORT); 1859 } 1860 random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); 1861 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1862 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1863 CURVNET_SET(ifp->if_vnet); 1864 M_SETFIB(m, ifp->if_fib); 1865 NET_EPOCH_ENTER(et); 1866 netisr_dispatch(isr, m); 1867 NET_EPOCH_EXIT(et); 1868 CURVNET_RESTORE(); 1869 return (0); 1870 } 1871 1872 /* 1873 * the cdevsw write interface - an atomic write is a packet - or else! 1874 */ 1875 static int 1876 tunwrite(struct cdev *dev, struct uio *uio, int flag) 1877 { 1878 struct virtio_net_hdr_mrg_rxbuf vhdr; 1879 struct tuntap_softc *tp; 1880 struct ifnet *ifp; 1881 struct mbuf *m; 1882 uint32_t mru; 1883 int align, vhdrlen, error; 1884 bool l2tun; 1885 1886 tp = dev->si_drv1; 1887 ifp = TUN2IFP(tp); 1888 TUNDEBUG(ifp, "tunwrite\n"); 1889 if ((ifp->if_flags & IFF_UP) != IFF_UP) 1890 /* ignore silently */ 1891 return (0); 1892 1893 if (uio->uio_resid == 0) 1894 return (0); 1895 1896 l2tun = (tp->tun_flags & TUN_L2) != 0; 1897 mru = l2tun ? TAPMRU : TUNMRU; 1898 vhdrlen = tp->tun_vhdrlen; 1899 align = 0; 1900 if (l2tun) { 1901 align = ETHER_ALIGN; 1902 mru += vhdrlen; 1903 } else if ((tp->tun_flags & TUN_IFHEAD) != 0) 1904 mru += sizeof(uint32_t); /* family */ 1905 if (uio->uio_resid < 0 || uio->uio_resid > mru) { 1906 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); 1907 return (EIO); 1908 } 1909 1910 if (vhdrlen > 0) { 1911 error = uiomove(&vhdr, vhdrlen, uio); 1912 if (error != 0) 1913 return (error); 1914 TUNDEBUG(ifp, "txvhdr: f %u, gt %u, hl %u, " 1915 "gs %u, cs %u, co %u\n", vhdr.hdr.flags, 1916 vhdr.hdr.gso_type, vhdr.hdr.hdr_len, 1917 vhdr.hdr.gso_size, vhdr.hdr.csum_start, 1918 vhdr.hdr.csum_offset); 1919 } 1920 1921 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) { 1922 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1923 return (ENOBUFS); 1924 } 1925 1926 m->m_pkthdr.rcvif = ifp; 1927 #ifdef MAC 1928 mac_ifnet_create_mbuf(ifp, m); 1929 #endif 1930 1931 if (l2tun) 1932 return (tunwrite_l2(tp, m, vhdrlen > 0 ? &vhdr : NULL)); 1933 1934 return (tunwrite_l3(tp, m)); 1935 } 1936 1937 /* 1938 * tunpoll - the poll interface, this is only useful on reads 1939 * really. The write detect always returns true, write never blocks 1940 * anyway, it either accepts the packet or drops it. 1941 */ 1942 static int 1943 tunpoll(struct cdev *dev, int events, struct thread *td) 1944 { 1945 struct tuntap_softc *tp = dev->si_drv1; 1946 struct ifnet *ifp = TUN2IFP(tp); 1947 int revents = 0; 1948 1949 TUNDEBUG(ifp, "tunpoll\n"); 1950 1951 if (events & (POLLIN | POLLRDNORM)) { 1952 IFQ_LOCK(&ifp->if_snd); 1953 if (!IFQ_IS_EMPTY(&ifp->if_snd)) { 1954 TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); 1955 revents |= events & (POLLIN | POLLRDNORM); 1956 } else { 1957 TUNDEBUG(ifp, "tunpoll waiting\n"); 1958 selrecord(td, &tp->tun_rsel); 1959 } 1960 IFQ_UNLOCK(&ifp->if_snd); 1961 } 1962 revents |= events & (POLLOUT | POLLWRNORM); 1963 1964 return (revents); 1965 } 1966 1967 /* 1968 * tunkqfilter - support for the kevent() system call. 1969 */ 1970 static int 1971 tunkqfilter(struct cdev *dev, struct knote *kn) 1972 { 1973 struct tuntap_softc *tp = dev->si_drv1; 1974 struct ifnet *ifp = TUN2IFP(tp); 1975 1976 switch(kn->kn_filter) { 1977 case EVFILT_READ: 1978 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", 1979 ifp->if_xname, dev2unit(dev)); 1980 kn->kn_fop = &tun_read_filterops; 1981 break; 1982 1983 case EVFILT_WRITE: 1984 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1985 ifp->if_xname, dev2unit(dev)); 1986 kn->kn_fop = &tun_write_filterops; 1987 break; 1988 1989 default: 1990 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", 1991 ifp->if_xname, dev2unit(dev)); 1992 return(EINVAL); 1993 } 1994 1995 kn->kn_hook = tp; 1996 knlist_add(&tp->tun_rsel.si_note, kn, 0); 1997 1998 return (0); 1999 } 2000 2001 /* 2002 * Return true of there is data in the interface queue. 2003 */ 2004 static int 2005 tunkqread(struct knote *kn, long hint) 2006 { 2007 int ret; 2008 struct tuntap_softc *tp = kn->kn_hook; 2009 struct cdev *dev = tp->tun_dev; 2010 struct ifnet *ifp = TUN2IFP(tp); 2011 2012 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 2013 TUNDEBUG(ifp, 2014 "%s have data in the queue. Len = %d, minor = %#x\n", 2015 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 2016 ret = 1; 2017 } else { 2018 TUNDEBUG(ifp, 2019 "%s waiting for data, minor = %#x\n", ifp->if_xname, 2020 dev2unit(dev)); 2021 ret = 0; 2022 } 2023 2024 return (ret); 2025 } 2026 2027 /* 2028 * Always can write, always return MTU in kn->data. 2029 */ 2030 static int 2031 tunkqwrite(struct knote *kn, long hint) 2032 { 2033 struct tuntap_softc *tp = kn->kn_hook; 2034 struct ifnet *ifp = TUN2IFP(tp); 2035 2036 kn->kn_data = ifp->if_mtu; 2037 2038 return (1); 2039 } 2040 2041 static void 2042 tunkqdetach(struct knote *kn) 2043 { 2044 struct tuntap_softc *tp = kn->kn_hook; 2045 2046 knlist_remove(&tp->tun_rsel.si_note, kn, 0); 2047 } 2048