1 /*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_bpf.h" 41 #include "opt_mac.h" 42 #include "opt_netgraph.h" 43 44 #include <sys/types.h> 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/time.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/signalvar.h> 55 #include <sys/filio.h> 56 #include <sys/sockio.h> 57 #include <sys/ttycom.h> 58 #include <sys/uio.h> 59 60 #include <sys/event.h> 61 #include <sys/file.h> 62 #include <sys/poll.h> 63 #include <sys/proc.h> 64 65 #include <sys/socket.h> 66 67 #include <net/if.h> 68 #include <net/bpf.h> 69 #include <net/bpf_buffer.h> 70 #ifdef BPF_JITTER 71 #include <net/bpf_jitter.h> 72 #endif 73 #include <net/bpf_zerocopy.h> 74 #include <net/bpfdesc.h> 75 76 #include <netinet/in.h> 77 #include <netinet/if_ether.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 81 #include <net80211/ieee80211_freebsd.h> 82 83 #include <security/mac/mac_framework.h> 84 85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89 #define PRINET 26 /* interruptible */ 90 91 #define M_SKIP_BPF M_SKIP_FIREWALL 92 93 /* 94 * bpf_iflist is a list of BPF interface structures, each corresponding to a 95 * specific DLT. The same network interface might have several BPF interface 96 * structures registered by different layers in the stack (i.e., 802.11 97 * frames, ethernet frames, etc). 98 */ 99 static LIST_HEAD(, bpf_if) bpf_iflist; 100 static struct mtx bpf_mtx; /* bpf global lock */ 101 static int bpf_bpfd_cnt; 102 103 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 104 static void bpf_detachd(struct bpf_d *); 105 static void bpf_freed(struct bpf_d *); 106 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 107 struct sockaddr *, int *, struct bpf_insn *); 108 static int bpf_setif(struct bpf_d *, struct ifreq *); 109 static void bpf_timed_out(void *); 110 static __inline void 111 bpf_wakeup(struct bpf_d *); 112 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 113 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 114 struct timeval *); 115 static void reset_d(struct bpf_d *); 116 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 117 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 118 static int bpf_setdlt(struct bpf_d *, u_int); 119 static void filt_bpfdetach(struct knote *); 120 static int filt_bpfread(struct knote *, long); 121 static void bpf_drvinit(void *); 122 static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 123 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 124 125 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 126 static int bpf_maxinsns = BPF_MAXINSNS; 127 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 128 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 129 static int bpf_zerocopy_enable = 0; 130 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 131 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 132 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 133 bpf_stats_sysctl, "bpf statistics portal"); 134 135 static d_open_t bpfopen; 136 static d_close_t bpfclose; 137 static d_read_t bpfread; 138 static d_write_t bpfwrite; 139 static d_ioctl_t bpfioctl; 140 static d_poll_t bpfpoll; 141 static d_kqfilter_t bpfkqfilter; 142 143 static struct cdevsw bpf_cdevsw = { 144 .d_version = D_VERSION, 145 .d_open = bpfopen, 146 .d_close = bpfclose, 147 .d_read = bpfread, 148 .d_write = bpfwrite, 149 .d_ioctl = bpfioctl, 150 .d_poll = bpfpoll, 151 .d_name = "bpf", 152 .d_kqfilter = bpfkqfilter, 153 }; 154 155 static struct filterops bpfread_filtops = 156 { 1, NULL, filt_bpfdetach, filt_bpfread }; 157 158 /* 159 * Wrapper functions for various buffering methods. If the set of buffer 160 * modes expands, we will probably want to introduce a switch data structure 161 * similar to protosw, et. 162 */ 163 static void 164 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 165 u_int len) 166 { 167 168 BPFD_LOCK_ASSERT(d); 169 170 switch (d->bd_bufmode) { 171 case BPF_BUFMODE_BUFFER: 172 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 173 174 case BPF_BUFMODE_ZBUF: 175 d->bd_zcopy++; 176 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 177 178 default: 179 panic("bpf_buf_append_bytes"); 180 } 181 } 182 183 static void 184 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 185 u_int len) 186 { 187 188 BPFD_LOCK_ASSERT(d); 189 190 switch (d->bd_bufmode) { 191 case BPF_BUFMODE_BUFFER: 192 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 193 194 case BPF_BUFMODE_ZBUF: 195 d->bd_zcopy++; 196 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 197 198 default: 199 panic("bpf_buf_append_mbuf"); 200 } 201 } 202 203 /* 204 * If the buffer mechanism has a way to decide that a held buffer can be made 205 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 206 * returned if the buffer can be discarded, (0) is returned if it cannot. 207 */ 208 static int 209 bpf_canfreebuf(struct bpf_d *d) 210 { 211 212 BPFD_LOCK_ASSERT(d); 213 214 switch (d->bd_bufmode) { 215 case BPF_BUFMODE_ZBUF: 216 return (bpf_zerocopy_canfreebuf(d)); 217 } 218 return (0); 219 } 220 221 void 222 bpf_bufheld(struct bpf_d *d) 223 { 224 225 BPFD_LOCK_ASSERT(d); 226 227 switch (d->bd_bufmode) { 228 case BPF_BUFMODE_ZBUF: 229 bpf_zerocopy_bufheld(d); 230 break; 231 } 232 } 233 234 static void 235 bpf_free(struct bpf_d *d) 236 { 237 238 switch (d->bd_bufmode) { 239 case BPF_BUFMODE_BUFFER: 240 return (bpf_buffer_free(d)); 241 242 case BPF_BUFMODE_ZBUF: 243 return (bpf_zerocopy_free(d)); 244 245 default: 246 panic("bpf_buf_free"); 247 } 248 } 249 250 static int 251 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 252 { 253 254 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 255 return (EOPNOTSUPP); 256 return (bpf_buffer_uiomove(d, buf, len, uio)); 257 } 258 259 static int 260 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 261 { 262 263 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 264 return (EOPNOTSUPP); 265 return (bpf_buffer_ioctl_sblen(d, i)); 266 } 267 268 static int 269 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 270 { 271 272 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 273 return (EOPNOTSUPP); 274 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 275 } 276 277 static int 278 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 279 { 280 281 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 282 return (EOPNOTSUPP); 283 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 284 } 285 286 static int 287 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 288 { 289 290 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 291 return (EOPNOTSUPP); 292 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 293 } 294 295 /* 296 * General BPF functions. 297 */ 298 static int 299 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 300 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 301 { 302 const struct ieee80211_bpf_params *p; 303 struct ether_header *eh; 304 struct mbuf *m; 305 int error; 306 int len; 307 int hlen; 308 int slen; 309 310 /* 311 * Build a sockaddr based on the data link layer type. 312 * We do this at this level because the ethernet header 313 * is copied directly into the data field of the sockaddr. 314 * In the case of SLIP, there is no header and the packet 315 * is forwarded as is. 316 * Also, we are careful to leave room at the front of the mbuf 317 * for the link level header. 318 */ 319 switch (linktype) { 320 321 case DLT_SLIP: 322 sockp->sa_family = AF_INET; 323 hlen = 0; 324 break; 325 326 case DLT_EN10MB: 327 sockp->sa_family = AF_UNSPEC; 328 /* XXX Would MAXLINKHDR be better? */ 329 hlen = ETHER_HDR_LEN; 330 break; 331 332 case DLT_FDDI: 333 sockp->sa_family = AF_IMPLINK; 334 hlen = 0; 335 break; 336 337 case DLT_RAW: 338 sockp->sa_family = AF_UNSPEC; 339 hlen = 0; 340 break; 341 342 case DLT_NULL: 343 /* 344 * null interface types require a 4 byte pseudo header which 345 * corresponds to the address family of the packet. 346 */ 347 sockp->sa_family = AF_UNSPEC; 348 hlen = 4; 349 break; 350 351 case DLT_ATM_RFC1483: 352 /* 353 * en atm driver requires 4-byte atm pseudo header. 354 * though it isn't standard, vpi:vci needs to be 355 * specified anyway. 356 */ 357 sockp->sa_family = AF_UNSPEC; 358 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 359 break; 360 361 case DLT_PPP: 362 sockp->sa_family = AF_UNSPEC; 363 hlen = 4; /* This should match PPP_HDRLEN */ 364 break; 365 366 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 367 sockp->sa_family = AF_IEEE80211; 368 hlen = 0; 369 break; 370 371 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 372 sockp->sa_family = AF_IEEE80211; 373 sockp->sa_len = 12; /* XXX != 0 */ 374 hlen = sizeof(struct ieee80211_bpf_params); 375 break; 376 377 default: 378 return (EIO); 379 } 380 381 len = uio->uio_resid; 382 383 if (len - hlen > ifp->if_mtu) 384 return (EMSGSIZE); 385 386 if ((unsigned)len > MCLBYTES) 387 return (EIO); 388 389 if (len > MHLEN) 390 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 391 else 392 MGETHDR(m, M_WAIT, MT_DATA); 393 m->m_pkthdr.len = m->m_len = len; 394 m->m_pkthdr.rcvif = NULL; 395 *mp = m; 396 397 if (m->m_len < hlen) { 398 error = EPERM; 399 goto bad; 400 } 401 402 error = uiomove(mtod(m, u_char *), len, uio); 403 if (error) 404 goto bad; 405 406 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 407 if (slen == 0) { 408 error = EPERM; 409 goto bad; 410 } 411 412 /* Check for multicast destination */ 413 switch (linktype) { 414 case DLT_EN10MB: 415 eh = mtod(m, struct ether_header *); 416 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 417 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 418 ETHER_ADDR_LEN) == 0) 419 m->m_flags |= M_BCAST; 420 else 421 m->m_flags |= M_MCAST; 422 } 423 break; 424 } 425 426 /* 427 * Make room for link header, and copy it to sockaddr 428 */ 429 if (hlen != 0) { 430 if (sockp->sa_family == AF_IEEE80211) { 431 /* 432 * Collect true length from the parameter header 433 * NB: sockp is known to be zero'd so if we do a 434 * short copy unspecified parameters will be 435 * zero. 436 * NB: packet may not be aligned after stripping 437 * bpf params 438 * XXX check ibp_vers 439 */ 440 p = mtod(m, const struct ieee80211_bpf_params *); 441 hlen = p->ibp_len; 442 if (hlen > sizeof(sockp->sa_data)) { 443 error = EINVAL; 444 goto bad; 445 } 446 } 447 bcopy(m->m_data, sockp->sa_data, hlen); 448 } 449 *hdrlen = hlen; 450 451 return (0); 452 bad: 453 m_freem(m); 454 return (error); 455 } 456 457 /* 458 * Attach file to the bpf interface, i.e. make d listen on bp. 459 */ 460 static void 461 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 462 { 463 /* 464 * Point d at bp, and add d to the interface's list of listeners. 465 * Finally, point the driver's bpf cookie at the interface so 466 * it will divert packets to bpf. 467 */ 468 BPFIF_LOCK(bp); 469 d->bd_bif = bp; 470 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 471 472 bpf_bpfd_cnt++; 473 BPFIF_UNLOCK(bp); 474 } 475 476 /* 477 * Detach a file from its interface. 478 */ 479 static void 480 bpf_detachd(struct bpf_d *d) 481 { 482 int error; 483 struct bpf_if *bp; 484 struct ifnet *ifp; 485 486 bp = d->bd_bif; 487 BPFIF_LOCK(bp); 488 BPFD_LOCK(d); 489 ifp = d->bd_bif->bif_ifp; 490 491 /* 492 * Remove d from the interface's descriptor list. 493 */ 494 LIST_REMOVE(d, bd_next); 495 496 bpf_bpfd_cnt--; 497 d->bd_bif = NULL; 498 BPFD_UNLOCK(d); 499 BPFIF_UNLOCK(bp); 500 501 /* 502 * Check if this descriptor had requested promiscuous mode. 503 * If so, turn it off. 504 */ 505 if (d->bd_promisc) { 506 d->bd_promisc = 0; 507 error = ifpromisc(ifp, 0); 508 if (error != 0 && error != ENXIO) { 509 /* 510 * ENXIO can happen if a pccard is unplugged 511 * Something is really wrong if we were able to put 512 * the driver into promiscuous mode, but can't 513 * take it out. 514 */ 515 if_printf(bp->bif_ifp, 516 "bpf_detach: ifpromisc failed (%d)\n", error); 517 } 518 } 519 } 520 521 /* 522 * Open ethernet device. Returns ENXIO for illegal minor device number, 523 * EBUSY if file is open by another process. 524 */ 525 /* ARGSUSED */ 526 static int 527 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 528 { 529 struct bpf_d *d; 530 531 mtx_lock(&bpf_mtx); 532 d = dev->si_drv1; 533 /* 534 * Each minor can be opened by only one process. If the requested 535 * minor is in use, return EBUSY. 536 */ 537 if (d != NULL) { 538 mtx_unlock(&bpf_mtx); 539 return (EBUSY); 540 } 541 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 542 mtx_unlock(&bpf_mtx); 543 544 if ((dev->si_flags & SI_NAMED) == 0) 545 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 546 "bpf%d", dev2unit(dev)); 547 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 548 dev->si_drv1 = d; 549 550 /* 551 * For historical reasons, perform a one-time initialization call to 552 * the buffer routines, even though we're not yet committed to a 553 * particular buffer method. 554 */ 555 bpf_buffer_init(d); 556 d->bd_bufmode = BPF_BUFMODE_BUFFER; 557 d->bd_sig = SIGIO; 558 d->bd_direction = BPF_D_INOUT; 559 d->bd_pid = td->td_proc->p_pid; 560 #ifdef MAC 561 mac_bpfdesc_init(d); 562 mac_bpfdesc_create(td->td_ucred, d); 563 #endif 564 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 565 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 566 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 567 568 return (0); 569 } 570 571 /* 572 * Close the descriptor by detaching it from its interface, 573 * deallocating its buffers, and marking it free. 574 */ 575 /* ARGSUSED */ 576 static int 577 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 578 { 579 struct bpf_d *d = dev->si_drv1; 580 581 BPFD_LOCK(d); 582 if (d->bd_state == BPF_WAITING) 583 callout_stop(&d->bd_callout); 584 d->bd_state = BPF_IDLE; 585 BPFD_UNLOCK(d); 586 funsetown(&d->bd_sigio); 587 mtx_lock(&bpf_mtx); 588 if (d->bd_bif) 589 bpf_detachd(d); 590 mtx_unlock(&bpf_mtx); 591 selwakeuppri(&d->bd_sel, PRINET); 592 #ifdef MAC 593 mac_bpfdesc_destroy(d); 594 #endif /* MAC */ 595 knlist_destroy(&d->bd_sel.si_note); 596 bpf_freed(d); 597 dev->si_drv1 = NULL; 598 free(d, M_BPF); 599 600 return (0); 601 } 602 603 /* 604 * bpfread - read next chunk of packets from buffers 605 */ 606 static int 607 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 608 { 609 struct bpf_d *d = dev->si_drv1; 610 int timed_out; 611 int error; 612 613 /* 614 * Restrict application to use a buffer the same size as 615 * as kernel buffers. 616 */ 617 if (uio->uio_resid != d->bd_bufsize) 618 return (EINVAL); 619 620 BPFD_LOCK(d); 621 d->bd_pid = curthread->td_proc->p_pid; 622 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 623 BPFD_UNLOCK(d); 624 return (EOPNOTSUPP); 625 } 626 if (d->bd_state == BPF_WAITING) 627 callout_stop(&d->bd_callout); 628 timed_out = (d->bd_state == BPF_TIMED_OUT); 629 d->bd_state = BPF_IDLE; 630 /* 631 * If the hold buffer is empty, then do a timed sleep, which 632 * ends when the timeout expires or when enough packets 633 * have arrived to fill the store buffer. 634 */ 635 while (d->bd_hbuf == NULL) { 636 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 637 /* 638 * A packet(s) either arrived since the previous 639 * read or arrived while we were asleep. 640 * Rotate the buffers and return what's here. 641 */ 642 ROTATE_BUFFERS(d); 643 break; 644 } 645 646 /* 647 * No data is available, check to see if the bpf device 648 * is still pointed at a real interface. If not, return 649 * ENXIO so that the userland process knows to rebind 650 * it before using it again. 651 */ 652 if (d->bd_bif == NULL) { 653 BPFD_UNLOCK(d); 654 return (ENXIO); 655 } 656 657 if (ioflag & O_NONBLOCK) { 658 BPFD_UNLOCK(d); 659 return (EWOULDBLOCK); 660 } 661 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 662 "bpf", d->bd_rtout); 663 if (error == EINTR || error == ERESTART) { 664 BPFD_UNLOCK(d); 665 return (error); 666 } 667 if (error == EWOULDBLOCK) { 668 /* 669 * On a timeout, return what's in the buffer, 670 * which may be nothing. If there is something 671 * in the store buffer, we can rotate the buffers. 672 */ 673 if (d->bd_hbuf) 674 /* 675 * We filled up the buffer in between 676 * getting the timeout and arriving 677 * here, so we don't need to rotate. 678 */ 679 break; 680 681 if (d->bd_slen == 0) { 682 BPFD_UNLOCK(d); 683 return (0); 684 } 685 ROTATE_BUFFERS(d); 686 break; 687 } 688 } 689 /* 690 * At this point, we know we have something in the hold slot. 691 */ 692 BPFD_UNLOCK(d); 693 694 /* 695 * Move data from hold buffer into user space. 696 * We know the entire buffer is transferred since 697 * we checked above that the read buffer is bpf_bufsize bytes. 698 * 699 * XXXRW: More synchronization needed here: what if a second thread 700 * issues a read on the same fd at the same time? Don't want this 701 * getting invalidated. 702 */ 703 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 704 705 BPFD_LOCK(d); 706 d->bd_fbuf = d->bd_hbuf; 707 d->bd_hbuf = NULL; 708 d->bd_hlen = 0; 709 BPFD_UNLOCK(d); 710 711 return (error); 712 } 713 714 /* 715 * If there are processes sleeping on this descriptor, wake them up. 716 */ 717 static __inline void 718 bpf_wakeup(struct bpf_d *d) 719 { 720 721 BPFD_LOCK_ASSERT(d); 722 if (d->bd_state == BPF_WAITING) { 723 callout_stop(&d->bd_callout); 724 d->bd_state = BPF_IDLE; 725 } 726 wakeup(d); 727 if (d->bd_async && d->bd_sig && d->bd_sigio) 728 pgsigio(&d->bd_sigio, d->bd_sig, 0); 729 730 selwakeuppri(&d->bd_sel, PRINET); 731 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 732 } 733 734 static void 735 bpf_timed_out(void *arg) 736 { 737 struct bpf_d *d = (struct bpf_d *)arg; 738 739 BPFD_LOCK(d); 740 if (d->bd_state == BPF_WAITING) { 741 d->bd_state = BPF_TIMED_OUT; 742 if (d->bd_slen != 0) 743 bpf_wakeup(d); 744 } 745 BPFD_UNLOCK(d); 746 } 747 748 static int 749 bpf_ready(struct bpf_d *d) 750 { 751 752 BPFD_LOCK_ASSERT(d); 753 754 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 755 return (1); 756 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 757 d->bd_slen != 0) 758 return (1); 759 return (0); 760 } 761 762 static int 763 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 764 { 765 struct bpf_d *d = dev->si_drv1; 766 struct ifnet *ifp; 767 struct mbuf *m, *mc; 768 struct sockaddr dst; 769 int error, hlen; 770 771 d->bd_pid = curthread->td_proc->p_pid; 772 d->bd_wcount++; 773 if (d->bd_bif == NULL) { 774 d->bd_wdcount++; 775 return (ENXIO); 776 } 777 778 ifp = d->bd_bif->bif_ifp; 779 780 if ((ifp->if_flags & IFF_UP) == 0) { 781 d->bd_wdcount++; 782 return (ENETDOWN); 783 } 784 785 if (uio->uio_resid == 0) { 786 d->bd_wdcount++; 787 return (0); 788 } 789 790 bzero(&dst, sizeof(dst)); 791 m = NULL; 792 hlen = 0; 793 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 794 &m, &dst, &hlen, d->bd_wfilter); 795 if (error) { 796 d->bd_wdcount++; 797 return (error); 798 } 799 d->bd_wfcount++; 800 if (d->bd_hdrcmplt) 801 dst.sa_family = pseudo_AF_HDRCMPLT; 802 803 if (d->bd_feedback) { 804 mc = m_dup(m, M_DONTWAIT); 805 if (mc != NULL) 806 mc->m_pkthdr.rcvif = ifp; 807 /* XXX Do not return the same packet twice. */ 808 if (d->bd_direction == BPF_D_INOUT) 809 m->m_flags |= M_SKIP_BPF; 810 } else 811 mc = NULL; 812 813 m->m_pkthdr.len -= hlen; 814 m->m_len -= hlen; 815 m->m_data += hlen; /* XXX */ 816 817 #ifdef MAC 818 BPFD_LOCK(d); 819 mac_bpfdesc_create_mbuf(d, m); 820 if (mc != NULL) 821 mac_bpfdesc_create_mbuf(d, mc); 822 BPFD_UNLOCK(d); 823 #endif 824 825 error = (*ifp->if_output)(ifp, m, &dst, NULL); 826 if (error) 827 d->bd_wdcount++; 828 829 if (mc != NULL) { 830 if (error == 0) 831 (*ifp->if_input)(ifp, mc); 832 else 833 m_freem(mc); 834 } 835 836 return (error); 837 } 838 839 /* 840 * Reset a descriptor by flushing its packet buffer and clearing the 841 * receive and drop counts. 842 */ 843 static void 844 reset_d(struct bpf_d *d) 845 { 846 847 mtx_assert(&d->bd_mtx, MA_OWNED); 848 if (d->bd_hbuf) { 849 /* Free the hold buffer. */ 850 d->bd_fbuf = d->bd_hbuf; 851 d->bd_hbuf = NULL; 852 } 853 d->bd_slen = 0; 854 d->bd_hlen = 0; 855 d->bd_rcount = 0; 856 d->bd_dcount = 0; 857 d->bd_fcount = 0; 858 d->bd_wcount = 0; 859 d->bd_wfcount = 0; 860 d->bd_wdcount = 0; 861 d->bd_zcopy = 0; 862 } 863 864 /* 865 * FIONREAD Check for read packet available. 866 * SIOCGIFADDR Get interface address - convenient hook to driver. 867 * BIOCGBLEN Get buffer len [for read()]. 868 * BIOCSETF Set ethernet read filter. 869 * BIOCSETWF Set ethernet write filter. 870 * BIOCFLUSH Flush read packet buffer. 871 * BIOCPROMISC Put interface into promiscuous mode. 872 * BIOCGDLT Get link layer type. 873 * BIOCGETIF Get interface name. 874 * BIOCSETIF Set interface. 875 * BIOCSRTIMEOUT Set read timeout. 876 * BIOCGRTIMEOUT Get read timeout. 877 * BIOCGSTATS Get packet stats. 878 * BIOCIMMEDIATE Set immediate mode. 879 * BIOCVERSION Get filter language version. 880 * BIOCGHDRCMPLT Get "header already complete" flag 881 * BIOCSHDRCMPLT Set "header already complete" flag 882 * BIOCGDIRECTION Get packet direction flag 883 * BIOCSDIRECTION Set packet direction flag 884 * BIOCLOCK Set "locked" flag 885 * BIOCFEEDBACK Set packet feedback mode. 886 * BIOCSETZBUF Set current zero-copy buffer locations. 887 * BIOCGETZMAX Get maximum zero-copy buffer size. 888 * BIOCROTZBUF Force rotation of zero-copy buffer 889 * BIOCSETBUFMODE Set buffer mode. 890 * BIOCGETBUFMODE Get current buffer mode. 891 */ 892 /* ARGSUSED */ 893 static int 894 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 895 struct thread *td) 896 { 897 struct bpf_d *d = dev->si_drv1; 898 int error = 0; 899 900 /* 901 * Refresh PID associated with this descriptor. 902 */ 903 BPFD_LOCK(d); 904 d->bd_pid = td->td_proc->p_pid; 905 if (d->bd_state == BPF_WAITING) 906 callout_stop(&d->bd_callout); 907 d->bd_state = BPF_IDLE; 908 BPFD_UNLOCK(d); 909 910 if (d->bd_locked == 1) { 911 switch (cmd) { 912 case BIOCGBLEN: 913 case BIOCFLUSH: 914 case BIOCGDLT: 915 case BIOCGDLTLIST: 916 case BIOCGETIF: 917 case BIOCGRTIMEOUT: 918 case BIOCGSTATS: 919 case BIOCVERSION: 920 case BIOCGRSIG: 921 case BIOCGHDRCMPLT: 922 case BIOCFEEDBACK: 923 case FIONREAD: 924 case BIOCLOCK: 925 case BIOCSRTIMEOUT: 926 case BIOCIMMEDIATE: 927 case TIOCGPGRP: 928 case BIOCROTZBUF: 929 break; 930 default: 931 return (EPERM); 932 } 933 } 934 switch (cmd) { 935 936 default: 937 error = EINVAL; 938 break; 939 940 /* 941 * Check for read packet available. 942 */ 943 case FIONREAD: 944 { 945 int n; 946 947 BPFD_LOCK(d); 948 n = d->bd_slen; 949 if (d->bd_hbuf) 950 n += d->bd_hlen; 951 BPFD_UNLOCK(d); 952 953 *(int *)addr = n; 954 break; 955 } 956 957 case SIOCGIFADDR: 958 { 959 struct ifnet *ifp; 960 961 if (d->bd_bif == NULL) 962 error = EINVAL; 963 else { 964 ifp = d->bd_bif->bif_ifp; 965 error = (*ifp->if_ioctl)(ifp, cmd, addr); 966 } 967 break; 968 } 969 970 /* 971 * Get buffer len [for read()]. 972 */ 973 case BIOCGBLEN: 974 *(u_int *)addr = d->bd_bufsize; 975 break; 976 977 /* 978 * Set buffer length. 979 */ 980 case BIOCSBLEN: 981 error = bpf_ioctl_sblen(d, (u_int *)addr); 982 break; 983 984 /* 985 * Set link layer read filter. 986 */ 987 case BIOCSETF: 988 case BIOCSETWF: 989 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 990 break; 991 992 /* 993 * Flush read packet buffer. 994 */ 995 case BIOCFLUSH: 996 BPFD_LOCK(d); 997 reset_d(d); 998 BPFD_UNLOCK(d); 999 break; 1000 1001 /* 1002 * Put interface into promiscuous mode. 1003 */ 1004 case BIOCPROMISC: 1005 if (d->bd_bif == NULL) { 1006 /* 1007 * No interface attached yet. 1008 */ 1009 error = EINVAL; 1010 break; 1011 } 1012 if (d->bd_promisc == 0) { 1013 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1014 if (error == 0) 1015 d->bd_promisc = 1; 1016 } 1017 break; 1018 1019 /* 1020 * Get current data link type. 1021 */ 1022 case BIOCGDLT: 1023 if (d->bd_bif == NULL) 1024 error = EINVAL; 1025 else 1026 *(u_int *)addr = d->bd_bif->bif_dlt; 1027 break; 1028 1029 /* 1030 * Get a list of supported data link types. 1031 */ 1032 case BIOCGDLTLIST: 1033 if (d->bd_bif == NULL) 1034 error = EINVAL; 1035 else 1036 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1037 break; 1038 1039 /* 1040 * Set data link type. 1041 */ 1042 case BIOCSDLT: 1043 if (d->bd_bif == NULL) 1044 error = EINVAL; 1045 else 1046 error = bpf_setdlt(d, *(u_int *)addr); 1047 break; 1048 1049 /* 1050 * Get interface name. 1051 */ 1052 case BIOCGETIF: 1053 if (d->bd_bif == NULL) 1054 error = EINVAL; 1055 else { 1056 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1057 struct ifreq *const ifr = (struct ifreq *)addr; 1058 1059 strlcpy(ifr->ifr_name, ifp->if_xname, 1060 sizeof(ifr->ifr_name)); 1061 } 1062 break; 1063 1064 /* 1065 * Set interface. 1066 */ 1067 case BIOCSETIF: 1068 error = bpf_setif(d, (struct ifreq *)addr); 1069 break; 1070 1071 /* 1072 * Set read timeout. 1073 */ 1074 case BIOCSRTIMEOUT: 1075 { 1076 struct timeval *tv = (struct timeval *)addr; 1077 1078 /* 1079 * Subtract 1 tick from tvtohz() since this isn't 1080 * a one-shot timer. 1081 */ 1082 if ((error = itimerfix(tv)) == 0) 1083 d->bd_rtout = tvtohz(tv) - 1; 1084 break; 1085 } 1086 1087 /* 1088 * Get read timeout. 1089 */ 1090 case BIOCGRTIMEOUT: 1091 { 1092 struct timeval *tv = (struct timeval *)addr; 1093 1094 tv->tv_sec = d->bd_rtout / hz; 1095 tv->tv_usec = (d->bd_rtout % hz) * tick; 1096 break; 1097 } 1098 1099 /* 1100 * Get packet stats. 1101 */ 1102 case BIOCGSTATS: 1103 { 1104 struct bpf_stat *bs = (struct bpf_stat *)addr; 1105 1106 /* XXXCSJP overflow */ 1107 bs->bs_recv = d->bd_rcount; 1108 bs->bs_drop = d->bd_dcount; 1109 break; 1110 } 1111 1112 /* 1113 * Set immediate mode. 1114 */ 1115 case BIOCIMMEDIATE: 1116 d->bd_immediate = *(u_int *)addr; 1117 break; 1118 1119 case BIOCVERSION: 1120 { 1121 struct bpf_version *bv = (struct bpf_version *)addr; 1122 1123 bv->bv_major = BPF_MAJOR_VERSION; 1124 bv->bv_minor = BPF_MINOR_VERSION; 1125 break; 1126 } 1127 1128 /* 1129 * Get "header already complete" flag 1130 */ 1131 case BIOCGHDRCMPLT: 1132 *(u_int *)addr = d->bd_hdrcmplt; 1133 break; 1134 1135 /* 1136 * Set "header already complete" flag 1137 */ 1138 case BIOCSHDRCMPLT: 1139 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1140 break; 1141 1142 /* 1143 * Get packet direction flag 1144 */ 1145 case BIOCGDIRECTION: 1146 *(u_int *)addr = d->bd_direction; 1147 break; 1148 1149 /* 1150 * Set packet direction flag 1151 */ 1152 case BIOCSDIRECTION: 1153 { 1154 u_int direction; 1155 1156 direction = *(u_int *)addr; 1157 switch (direction) { 1158 case BPF_D_IN: 1159 case BPF_D_INOUT: 1160 case BPF_D_OUT: 1161 d->bd_direction = direction; 1162 break; 1163 default: 1164 error = EINVAL; 1165 } 1166 } 1167 break; 1168 1169 case BIOCFEEDBACK: 1170 d->bd_feedback = *(u_int *)addr; 1171 break; 1172 1173 case BIOCLOCK: 1174 d->bd_locked = 1; 1175 break; 1176 1177 case FIONBIO: /* Non-blocking I/O */ 1178 break; 1179 1180 case FIOASYNC: /* Send signal on receive packets */ 1181 d->bd_async = *(int *)addr; 1182 break; 1183 1184 case FIOSETOWN: 1185 error = fsetown(*(int *)addr, &d->bd_sigio); 1186 break; 1187 1188 case FIOGETOWN: 1189 *(int *)addr = fgetown(&d->bd_sigio); 1190 break; 1191 1192 /* This is deprecated, FIOSETOWN should be used instead. */ 1193 case TIOCSPGRP: 1194 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1195 break; 1196 1197 /* This is deprecated, FIOGETOWN should be used instead. */ 1198 case TIOCGPGRP: 1199 *(int *)addr = -fgetown(&d->bd_sigio); 1200 break; 1201 1202 case BIOCSRSIG: /* Set receive signal */ 1203 { 1204 u_int sig; 1205 1206 sig = *(u_int *)addr; 1207 1208 if (sig >= NSIG) 1209 error = EINVAL; 1210 else 1211 d->bd_sig = sig; 1212 break; 1213 } 1214 case BIOCGRSIG: 1215 *(u_int *)addr = d->bd_sig; 1216 break; 1217 1218 case BIOCGETBUFMODE: 1219 *(u_int *)addr = d->bd_bufmode; 1220 break; 1221 1222 case BIOCSETBUFMODE: 1223 /* 1224 * Allow the buffering mode to be changed as long as we 1225 * haven't yet committed to a particular mode. Our 1226 * definition of commitment, for now, is whether or not a 1227 * buffer has been allocated or an interface attached, since 1228 * that's the point where things get tricky. 1229 */ 1230 switch (*(u_int *)addr) { 1231 case BPF_BUFMODE_BUFFER: 1232 break; 1233 1234 case BPF_BUFMODE_ZBUF: 1235 if (bpf_zerocopy_enable) 1236 break; 1237 /* FALLSTHROUGH */ 1238 1239 default: 1240 return (EINVAL); 1241 } 1242 1243 BPFD_LOCK(d); 1244 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1245 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1246 BPFD_UNLOCK(d); 1247 return (EBUSY); 1248 } 1249 d->bd_bufmode = *(u_int *)addr; 1250 BPFD_UNLOCK(d); 1251 break; 1252 1253 case BIOCGETZMAX: 1254 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1255 1256 case BIOCSETZBUF: 1257 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1258 1259 case BIOCROTZBUF: 1260 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1261 } 1262 return (error); 1263 } 1264 1265 /* 1266 * Set d's packet filter program to fp. If this file already has a filter, 1267 * free it and replace it. Returns EINVAL for bogus requests. 1268 */ 1269 static int 1270 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1271 { 1272 struct bpf_insn *fcode, *old; 1273 u_int wfilter, flen, size; 1274 #ifdef BPF_JITTER 1275 bpf_jit_filter *ofunc; 1276 #endif 1277 1278 if (cmd == BIOCSETWF) { 1279 old = d->bd_wfilter; 1280 wfilter = 1; 1281 #ifdef BPF_JITTER 1282 ofunc = NULL; 1283 #endif 1284 } else { 1285 wfilter = 0; 1286 old = d->bd_rfilter; 1287 #ifdef BPF_JITTER 1288 ofunc = d->bd_bfilter; 1289 #endif 1290 } 1291 if (fp->bf_insns == NULL) { 1292 if (fp->bf_len != 0) 1293 return (EINVAL); 1294 BPFD_LOCK(d); 1295 if (wfilter) 1296 d->bd_wfilter = NULL; 1297 else { 1298 d->bd_rfilter = NULL; 1299 #ifdef BPF_JITTER 1300 d->bd_bfilter = NULL; 1301 #endif 1302 } 1303 reset_d(d); 1304 BPFD_UNLOCK(d); 1305 if (old != NULL) 1306 free((caddr_t)old, M_BPF); 1307 #ifdef BPF_JITTER 1308 if (ofunc != NULL) 1309 bpf_destroy_jit_filter(ofunc); 1310 #endif 1311 return (0); 1312 } 1313 flen = fp->bf_len; 1314 if (flen > bpf_maxinsns) 1315 return (EINVAL); 1316 1317 size = flen * sizeof(*fp->bf_insns); 1318 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1319 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1320 bpf_validate(fcode, (int)flen)) { 1321 BPFD_LOCK(d); 1322 if (wfilter) 1323 d->bd_wfilter = fcode; 1324 else { 1325 d->bd_rfilter = fcode; 1326 #ifdef BPF_JITTER 1327 d->bd_bfilter = bpf_jitter(fcode, flen); 1328 #endif 1329 } 1330 reset_d(d); 1331 BPFD_UNLOCK(d); 1332 if (old != NULL) 1333 free((caddr_t)old, M_BPF); 1334 #ifdef BPF_JITTER 1335 if (ofunc != NULL) 1336 bpf_destroy_jit_filter(ofunc); 1337 #endif 1338 1339 return (0); 1340 } 1341 free((caddr_t)fcode, M_BPF); 1342 return (EINVAL); 1343 } 1344 1345 /* 1346 * Detach a file from its current interface (if attached at all) and attach 1347 * to the interface indicated by the name stored in ifr. 1348 * Return an errno or 0. 1349 */ 1350 static int 1351 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1352 { 1353 struct bpf_if *bp; 1354 struct ifnet *theywant; 1355 1356 theywant = ifunit(ifr->ifr_name); 1357 if (theywant == NULL || theywant->if_bpf == NULL) 1358 return (ENXIO); 1359 1360 bp = theywant->if_bpf; 1361 1362 /* 1363 * Behavior here depends on the buffering model. If we're using 1364 * kernel memory buffers, then we can allocate them here. If we're 1365 * using zero-copy, then the user process must have registered 1366 * buffers by the time we get here. If not, return an error. 1367 * 1368 * XXXRW: There are locking issues here with multi-threaded use: what 1369 * if two threads try to set the interface at once? 1370 */ 1371 switch (d->bd_bufmode) { 1372 case BPF_BUFMODE_BUFFER: 1373 if (d->bd_sbuf == NULL) 1374 bpf_buffer_alloc(d); 1375 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1376 break; 1377 1378 case BPF_BUFMODE_ZBUF: 1379 if (d->bd_sbuf == NULL) 1380 return (EINVAL); 1381 break; 1382 1383 default: 1384 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1385 } 1386 if (bp != d->bd_bif) { 1387 if (d->bd_bif) 1388 /* 1389 * Detach if attached to something else. 1390 */ 1391 bpf_detachd(d); 1392 1393 bpf_attachd(d, bp); 1394 } 1395 BPFD_LOCK(d); 1396 reset_d(d); 1397 BPFD_UNLOCK(d); 1398 return (0); 1399 } 1400 1401 /* 1402 * Support for select() and poll() system calls 1403 * 1404 * Return true iff the specific operation will not block indefinitely. 1405 * Otherwise, return false but make a note that a selwakeup() must be done. 1406 */ 1407 static int 1408 bpfpoll(struct cdev *dev, int events, struct thread *td) 1409 { 1410 struct bpf_d *d; 1411 int revents; 1412 1413 d = dev->si_drv1; 1414 if (d->bd_bif == NULL) 1415 return (ENXIO); 1416 1417 /* 1418 * Refresh PID associated with this descriptor. 1419 */ 1420 revents = events & (POLLOUT | POLLWRNORM); 1421 BPFD_LOCK(d); 1422 d->bd_pid = td->td_proc->p_pid; 1423 if (events & (POLLIN | POLLRDNORM)) { 1424 if (bpf_ready(d)) 1425 revents |= events & (POLLIN | POLLRDNORM); 1426 else { 1427 selrecord(td, &d->bd_sel); 1428 /* Start the read timeout if necessary. */ 1429 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1430 callout_reset(&d->bd_callout, d->bd_rtout, 1431 bpf_timed_out, d); 1432 d->bd_state = BPF_WAITING; 1433 } 1434 } 1435 } 1436 BPFD_UNLOCK(d); 1437 return (revents); 1438 } 1439 1440 /* 1441 * Support for kevent() system call. Register EVFILT_READ filters and 1442 * reject all others. 1443 */ 1444 int 1445 bpfkqfilter(struct cdev *dev, struct knote *kn) 1446 { 1447 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1448 1449 if (kn->kn_filter != EVFILT_READ) 1450 return (1); 1451 1452 /* 1453 * Refresh PID associated with this descriptor. 1454 */ 1455 BPFD_LOCK(d); 1456 d->bd_pid = curthread->td_proc->p_pid; 1457 kn->kn_fop = &bpfread_filtops; 1458 kn->kn_hook = d; 1459 knlist_add(&d->bd_sel.si_note, kn, 1); 1460 BPFD_UNLOCK(d); 1461 1462 return (0); 1463 } 1464 1465 static void 1466 filt_bpfdetach(struct knote *kn) 1467 { 1468 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1469 1470 knlist_remove(&d->bd_sel.si_note, kn, 0); 1471 } 1472 1473 static int 1474 filt_bpfread(struct knote *kn, long hint) 1475 { 1476 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1477 int ready; 1478 1479 BPFD_LOCK_ASSERT(d); 1480 ready = bpf_ready(d); 1481 if (ready) { 1482 kn->kn_data = d->bd_slen; 1483 if (d->bd_hbuf) 1484 kn->kn_data += d->bd_hlen; 1485 } 1486 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1487 callout_reset(&d->bd_callout, d->bd_rtout, 1488 bpf_timed_out, d); 1489 d->bd_state = BPF_WAITING; 1490 } 1491 1492 return (ready); 1493 } 1494 1495 /* 1496 * Incoming linkage from device drivers. Process the packet pkt, of length 1497 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1498 * by each process' filter, and if accepted, stashed into the corresponding 1499 * buffer. 1500 */ 1501 void 1502 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1503 { 1504 struct bpf_d *d; 1505 u_int slen; 1506 int gottime; 1507 struct timeval tv; 1508 1509 gottime = 0; 1510 BPFIF_LOCK(bp); 1511 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1512 BPFD_LOCK(d); 1513 ++d->bd_rcount; 1514 #ifdef BPF_JITTER 1515 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1516 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1517 else 1518 #endif 1519 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1520 if (slen != 0) { 1521 d->bd_fcount++; 1522 if (!gottime) { 1523 microtime(&tv); 1524 gottime = 1; 1525 } 1526 #ifdef MAC 1527 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1528 #endif 1529 catchpacket(d, pkt, pktlen, slen, 1530 bpf_append_bytes, &tv); 1531 } 1532 BPFD_UNLOCK(d); 1533 } 1534 BPFIF_UNLOCK(bp); 1535 } 1536 1537 #define BPF_CHECK_DIRECTION(d, m) \ 1538 if (((d)->bd_direction == BPF_D_IN && (m)->m_pkthdr.rcvif == NULL) || \ 1539 ((d)->bd_direction == BPF_D_OUT && (m)->m_pkthdr.rcvif != NULL)) 1540 1541 /* 1542 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1543 */ 1544 void 1545 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1546 { 1547 struct bpf_d *d; 1548 u_int pktlen, slen; 1549 int gottime; 1550 struct timeval tv; 1551 1552 if (m->m_flags & M_SKIP_BPF) { 1553 m->m_flags &= ~M_SKIP_BPF; 1554 return; 1555 } 1556 1557 gottime = 0; 1558 1559 pktlen = m_length(m, NULL); 1560 1561 BPFIF_LOCK(bp); 1562 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1563 BPF_CHECK_DIRECTION(d, m) 1564 continue; 1565 BPFD_LOCK(d); 1566 ++d->bd_rcount; 1567 #ifdef BPF_JITTER 1568 /* XXX We cannot handle multiple mbufs. */ 1569 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1570 m->m_next == NULL) 1571 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1572 pktlen, pktlen); 1573 else 1574 #endif 1575 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1576 if (slen != 0) { 1577 d->bd_fcount++; 1578 if (!gottime) { 1579 microtime(&tv); 1580 gottime = 1; 1581 } 1582 #ifdef MAC 1583 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1584 #endif 1585 catchpacket(d, (u_char *)m, pktlen, slen, 1586 bpf_append_mbuf, &tv); 1587 } 1588 BPFD_UNLOCK(d); 1589 } 1590 BPFIF_UNLOCK(bp); 1591 } 1592 1593 /* 1594 * Incoming linkage from device drivers, when packet is in 1595 * an mbuf chain and to be prepended by a contiguous header. 1596 */ 1597 void 1598 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1599 { 1600 struct mbuf mb; 1601 struct bpf_d *d; 1602 u_int pktlen, slen; 1603 int gottime; 1604 struct timeval tv; 1605 1606 if (m->m_flags & M_SKIP_BPF) { 1607 m->m_flags &= ~M_SKIP_BPF; 1608 return; 1609 } 1610 1611 gottime = 0; 1612 1613 pktlen = m_length(m, NULL); 1614 /* 1615 * Craft on-stack mbuf suitable for passing to bpf_filter. 1616 * Note that we cut corners here; we only setup what's 1617 * absolutely needed--this mbuf should never go anywhere else. 1618 */ 1619 mb.m_next = m; 1620 mb.m_data = data; 1621 mb.m_len = dlen; 1622 pktlen += dlen; 1623 1624 BPFIF_LOCK(bp); 1625 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1626 BPF_CHECK_DIRECTION(d, m) 1627 continue; 1628 BPFD_LOCK(d); 1629 ++d->bd_rcount; 1630 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1631 if (slen != 0) { 1632 d->bd_fcount++; 1633 if (!gottime) { 1634 microtime(&tv); 1635 gottime = 1; 1636 } 1637 #ifdef MAC 1638 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1639 #endif 1640 catchpacket(d, (u_char *)&mb, pktlen, slen, 1641 bpf_append_mbuf, &tv); 1642 } 1643 BPFD_UNLOCK(d); 1644 } 1645 BPFIF_UNLOCK(bp); 1646 } 1647 1648 #undef BPF_CHECK_DIRECTION 1649 1650 /* 1651 * Move the packet data from interface memory (pkt) into the 1652 * store buffer. "cpfn" is the routine called to do the actual data 1653 * transfer. bcopy is passed in to copy contiguous chunks, while 1654 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1655 * pkt is really an mbuf. 1656 */ 1657 static void 1658 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1659 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1660 struct timeval *tv) 1661 { 1662 struct bpf_hdr hdr; 1663 int totlen, curlen; 1664 int hdrlen = d->bd_bif->bif_hdrlen; 1665 int do_wakeup = 0; 1666 1667 BPFD_LOCK_ASSERT(d); 1668 1669 /* 1670 * Detect whether user space has released a buffer back to us, and if 1671 * so, move it from being a hold buffer to a free buffer. This may 1672 * not be the best place to do it (for example, we might only want to 1673 * run this check if we need the space), but for now it's a reliable 1674 * spot to do it. 1675 */ 1676 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1677 d->bd_fbuf = d->bd_hbuf; 1678 d->bd_hbuf = NULL; 1679 d->bd_hlen = 0; 1680 } 1681 1682 /* 1683 * Figure out how many bytes to move. If the packet is 1684 * greater or equal to the snapshot length, transfer that 1685 * much. Otherwise, transfer the whole packet (unless 1686 * we hit the buffer size limit). 1687 */ 1688 totlen = hdrlen + min(snaplen, pktlen); 1689 if (totlen > d->bd_bufsize) 1690 totlen = d->bd_bufsize; 1691 1692 /* 1693 * Round up the end of the previous packet to the next longword. 1694 */ 1695 curlen = BPF_WORDALIGN(d->bd_slen); 1696 if (curlen + totlen > d->bd_bufsize) { 1697 /* 1698 * This packet will overflow the storage buffer. 1699 * Rotate the buffers if we can, then wakeup any 1700 * pending reads. 1701 */ 1702 if (d->bd_fbuf == NULL) { 1703 /* 1704 * We haven't completed the previous read yet, 1705 * so drop the packet. 1706 */ 1707 ++d->bd_dcount; 1708 return; 1709 } 1710 ROTATE_BUFFERS(d); 1711 do_wakeup = 1; 1712 curlen = 0; 1713 } 1714 else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1715 /* 1716 * Immediate mode is set, or the read timeout has already 1717 * expired during a select call. A packet arrived, so the 1718 * reader should be woken up. 1719 */ 1720 do_wakeup = 1; 1721 1722 /* 1723 * Append the bpf header. Note we append the actual header size, but 1724 * move forward the length of the header plus padding. 1725 */ 1726 bzero(&hdr, sizeof(hdr)); 1727 hdr.bh_tstamp = *tv; 1728 hdr.bh_datalen = pktlen; 1729 hdr.bh_hdrlen = hdrlen; 1730 hdr.bh_caplen = totlen - hdrlen; 1731 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1732 1733 /* 1734 * Copy the packet data into the store buffer and update its length. 1735 */ 1736 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1737 d->bd_slen = curlen + totlen; 1738 1739 if (do_wakeup) 1740 bpf_wakeup(d); 1741 } 1742 1743 /* 1744 * Free buffers currently in use by a descriptor. 1745 * Called on close. 1746 */ 1747 static void 1748 bpf_freed(struct bpf_d *d) 1749 { 1750 1751 /* 1752 * We don't need to lock out interrupts since this descriptor has 1753 * been detached from its interface and it yet hasn't been marked 1754 * free. 1755 */ 1756 bpf_free(d); 1757 if (d->bd_rfilter) { 1758 free((caddr_t)d->bd_rfilter, M_BPF); 1759 #ifdef BPF_JITTER 1760 bpf_destroy_jit_filter(d->bd_bfilter); 1761 #endif 1762 } 1763 if (d->bd_wfilter) 1764 free((caddr_t)d->bd_wfilter, M_BPF); 1765 mtx_destroy(&d->bd_mtx); 1766 } 1767 1768 /* 1769 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1770 * fixed size of the link header (variable length headers not yet supported). 1771 */ 1772 void 1773 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1774 { 1775 1776 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1777 } 1778 1779 /* 1780 * Attach an interface to bpf. ifp is a pointer to the structure 1781 * defining the interface to be attached, dlt is the link layer type, 1782 * and hdrlen is the fixed size of the link header (variable length 1783 * headers are not yet supporrted). 1784 */ 1785 void 1786 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1787 { 1788 struct bpf_if *bp; 1789 1790 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1791 if (bp == NULL) 1792 panic("bpfattach"); 1793 1794 LIST_INIT(&bp->bif_dlist); 1795 bp->bif_ifp = ifp; 1796 bp->bif_dlt = dlt; 1797 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1798 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1799 *driverp = bp; 1800 1801 mtx_lock(&bpf_mtx); 1802 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1803 mtx_unlock(&bpf_mtx); 1804 1805 /* 1806 * Compute the length of the bpf header. This is not necessarily 1807 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1808 * that the network layer header begins on a longword boundary (for 1809 * performance reasons and to alleviate alignment restrictions). 1810 */ 1811 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1812 1813 if (bootverbose) 1814 if_printf(ifp, "bpf attached\n"); 1815 } 1816 1817 /* 1818 * Detach bpf from an interface. This involves detaching each descriptor 1819 * associated with the interface, and leaving bd_bif NULL. Notify each 1820 * descriptor as it's detached so that any sleepers wake up and get 1821 * ENXIO. 1822 */ 1823 void 1824 bpfdetach(struct ifnet *ifp) 1825 { 1826 struct bpf_if *bp; 1827 struct bpf_d *d; 1828 1829 /* Locate BPF interface information */ 1830 mtx_lock(&bpf_mtx); 1831 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1832 if (ifp == bp->bif_ifp) 1833 break; 1834 } 1835 1836 /* Interface wasn't attached */ 1837 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1838 mtx_unlock(&bpf_mtx); 1839 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1840 return; 1841 } 1842 1843 LIST_REMOVE(bp, bif_next); 1844 mtx_unlock(&bpf_mtx); 1845 1846 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1847 bpf_detachd(d); 1848 BPFD_LOCK(d); 1849 bpf_wakeup(d); 1850 BPFD_UNLOCK(d); 1851 } 1852 1853 mtx_destroy(&bp->bif_mtx); 1854 free(bp, M_BPF); 1855 } 1856 1857 /* 1858 * Get a list of available data link type of the interface. 1859 */ 1860 static int 1861 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1862 { 1863 int n, error; 1864 struct ifnet *ifp; 1865 struct bpf_if *bp; 1866 1867 ifp = d->bd_bif->bif_ifp; 1868 n = 0; 1869 error = 0; 1870 mtx_lock(&bpf_mtx); 1871 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1872 if (bp->bif_ifp != ifp) 1873 continue; 1874 if (bfl->bfl_list != NULL) { 1875 if (n >= bfl->bfl_len) { 1876 mtx_unlock(&bpf_mtx); 1877 return (ENOMEM); 1878 } 1879 error = copyout(&bp->bif_dlt, 1880 bfl->bfl_list + n, sizeof(u_int)); 1881 } 1882 n++; 1883 } 1884 mtx_unlock(&bpf_mtx); 1885 bfl->bfl_len = n; 1886 return (error); 1887 } 1888 1889 /* 1890 * Set the data link type of a BPF instance. 1891 */ 1892 static int 1893 bpf_setdlt(struct bpf_d *d, u_int dlt) 1894 { 1895 int error, opromisc; 1896 struct ifnet *ifp; 1897 struct bpf_if *bp; 1898 1899 if (d->bd_bif->bif_dlt == dlt) 1900 return (0); 1901 ifp = d->bd_bif->bif_ifp; 1902 mtx_lock(&bpf_mtx); 1903 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1904 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1905 break; 1906 } 1907 mtx_unlock(&bpf_mtx); 1908 if (bp != NULL) { 1909 opromisc = d->bd_promisc; 1910 bpf_detachd(d); 1911 bpf_attachd(d, bp); 1912 BPFD_LOCK(d); 1913 reset_d(d); 1914 BPFD_UNLOCK(d); 1915 if (opromisc) { 1916 error = ifpromisc(bp->bif_ifp, 1); 1917 if (error) 1918 if_printf(bp->bif_ifp, 1919 "bpf_setdlt: ifpromisc failed (%d)\n", 1920 error); 1921 else 1922 d->bd_promisc = 1; 1923 } 1924 } 1925 return (bp == NULL ? EINVAL : 0); 1926 } 1927 1928 static void 1929 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1930 struct cdev **dev) 1931 { 1932 int u; 1933 1934 if (*dev != NULL) 1935 return; 1936 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 1937 return; 1938 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 1939 "bpf%d", u); 1940 dev_ref(*dev); 1941 (*dev)->si_flags |= SI_CHEAPCLONE; 1942 return; 1943 } 1944 1945 static void 1946 bpf_drvinit(void *unused) 1947 { 1948 1949 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 1950 LIST_INIT(&bpf_iflist); 1951 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 1952 } 1953 1954 static void 1955 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 1956 { 1957 1958 bzero(d, sizeof(*d)); 1959 BPFD_LOCK_ASSERT(bd); 1960 d->bd_structsize = sizeof(*d); 1961 d->bd_immediate = bd->bd_immediate; 1962 d->bd_promisc = bd->bd_promisc; 1963 d->bd_hdrcmplt = bd->bd_hdrcmplt; 1964 d->bd_direction = bd->bd_direction; 1965 d->bd_feedback = bd->bd_feedback; 1966 d->bd_async = bd->bd_async; 1967 d->bd_rcount = bd->bd_rcount; 1968 d->bd_dcount = bd->bd_dcount; 1969 d->bd_fcount = bd->bd_fcount; 1970 d->bd_sig = bd->bd_sig; 1971 d->bd_slen = bd->bd_slen; 1972 d->bd_hlen = bd->bd_hlen; 1973 d->bd_bufsize = bd->bd_bufsize; 1974 d->bd_pid = bd->bd_pid; 1975 strlcpy(d->bd_ifname, 1976 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 1977 d->bd_locked = bd->bd_locked; 1978 d->bd_wcount = bd->bd_wcount; 1979 d->bd_wdcount = bd->bd_wdcount; 1980 d->bd_wfcount = bd->bd_wfcount; 1981 d->bd_zcopy = bd->bd_zcopy; 1982 d->bd_bufmode = bd->bd_bufmode; 1983 } 1984 1985 static int 1986 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 1987 { 1988 struct xbpf_d *xbdbuf, *xbd; 1989 int index, error; 1990 struct bpf_if *bp; 1991 struct bpf_d *bd; 1992 1993 /* 1994 * XXX This is not technically correct. It is possible for non 1995 * privileged users to open bpf devices. It would make sense 1996 * if the users who opened the devices were able to retrieve 1997 * the statistics for them, too. 1998 */ 1999 error = priv_check(req->td, PRIV_NET_BPF); 2000 if (error) 2001 return (error); 2002 if (req->oldptr == NULL) 2003 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2004 if (bpf_bpfd_cnt == 0) 2005 return (SYSCTL_OUT(req, 0, 0)); 2006 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2007 mtx_lock(&bpf_mtx); 2008 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2009 mtx_unlock(&bpf_mtx); 2010 free(xbdbuf, M_BPF); 2011 return (ENOMEM); 2012 } 2013 index = 0; 2014 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2015 BPFIF_LOCK(bp); 2016 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2017 xbd = &xbdbuf[index++]; 2018 BPFD_LOCK(bd); 2019 bpfstats_fill_xbpf(xbd, bd); 2020 BPFD_UNLOCK(bd); 2021 } 2022 BPFIF_UNLOCK(bp); 2023 } 2024 mtx_unlock(&bpf_mtx); 2025 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2026 free(xbdbuf, M_BPF); 2027 return (error); 2028 } 2029 2030 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2031 2032 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2033 /* 2034 * NOP stubs to allow bpf-using drivers to load and function. 2035 * 2036 * A 'better' implementation would allow the core bpf functionality 2037 * to be loaded at runtime. 2038 */ 2039 static struct bpf_if bp_null; 2040 2041 void 2042 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2043 { 2044 } 2045 2046 void 2047 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2048 { 2049 } 2050 2051 void 2052 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2053 { 2054 } 2055 2056 void 2057 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2058 { 2059 2060 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2061 } 2062 2063 void 2064 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2065 { 2066 2067 *driverp = &bp_null; 2068 } 2069 2070 void 2071 bpfdetach(struct ifnet *ifp) 2072 { 2073 } 2074 2075 u_int 2076 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2077 { 2078 return -1; /* "no filter" behaviour */ 2079 } 2080 2081 int 2082 bpf_validate(const struct bpf_insn *f, int len) 2083 { 2084 return 0; /* false */ 2085 } 2086 2087 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 2088