1 /*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_bpf.h" 41 #include "opt_mac.h" 42 #include "opt_netgraph.h" 43 44 #include <sys/types.h> 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/time.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/signalvar.h> 55 #include <sys/filio.h> 56 #include <sys/sockio.h> 57 #include <sys/ttycom.h> 58 #include <sys/uio.h> 59 60 #include <sys/event.h> 61 #include <sys/file.h> 62 #include <sys/poll.h> 63 #include <sys/proc.h> 64 65 #include <sys/socket.h> 66 67 #include <net/if.h> 68 #include <net/bpf.h> 69 #include <net/bpf_buffer.h> 70 #ifdef BPF_JITTER 71 #include <net/bpf_jitter.h> 72 #endif 73 #include <net/bpf_zerocopy.h> 74 #include <net/bpfdesc.h> 75 76 #include <netinet/in.h> 77 #include <netinet/if_ether.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 81 #include <net80211/ieee80211_freebsd.h> 82 83 #include <security/mac/mac_framework.h> 84 85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89 #define PRINET 26 /* interruptible */ 90 91 /* 92 * bpf_iflist is a list of BPF interface structures, each corresponding to a 93 * specific DLT. The same network interface might have several BPF interface 94 * structures registered by different layers in the stack (i.e., 802.11 95 * frames, ethernet frames, etc). 96 */ 97 static LIST_HEAD(, bpf_if) bpf_iflist; 98 static struct mtx bpf_mtx; /* bpf global lock */ 99 static int bpf_bpfd_cnt; 100 101 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 102 static void bpf_detachd(struct bpf_d *); 103 static void bpf_freed(struct bpf_d *); 104 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 105 struct sockaddr *, int *, struct bpf_insn *); 106 static int bpf_setif(struct bpf_d *, struct ifreq *); 107 static void bpf_timed_out(void *); 108 static __inline void 109 bpf_wakeup(struct bpf_d *); 110 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 111 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 112 struct timeval *); 113 static void reset_d(struct bpf_d *); 114 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 115 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 116 static int bpf_setdlt(struct bpf_d *, u_int); 117 static void filt_bpfdetach(struct knote *); 118 static int filt_bpfread(struct knote *, long); 119 static void bpf_drvinit(void *); 120 static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 121 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 122 123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 124 static int bpf_maxinsns = BPF_MAXINSNS; 125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 126 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 127 static int bpf_zerocopy_enable = 0; 128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 129 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 131 bpf_stats_sysctl, "bpf statistics portal"); 132 133 static d_open_t bpfopen; 134 static d_close_t bpfclose; 135 static d_read_t bpfread; 136 static d_write_t bpfwrite; 137 static d_ioctl_t bpfioctl; 138 static d_poll_t bpfpoll; 139 static d_kqfilter_t bpfkqfilter; 140 141 static struct cdevsw bpf_cdevsw = { 142 .d_version = D_VERSION, 143 .d_flags = D_TRACKCLOSE, 144 .d_open = bpfopen, 145 .d_close = bpfclose, 146 .d_read = bpfread, 147 .d_write = bpfwrite, 148 .d_ioctl = bpfioctl, 149 .d_poll = bpfpoll, 150 .d_name = "bpf", 151 .d_kqfilter = bpfkqfilter, 152 }; 153 154 static struct filterops bpfread_filtops = 155 { 1, NULL, filt_bpfdetach, filt_bpfread }; 156 157 /* 158 * Wrapper functions for various buffering methods. If the set of buffer 159 * modes expands, we will probably want to introduce a switch data structure 160 * similar to protosw, et. 161 */ 162 static void 163 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 164 u_int len) 165 { 166 167 BPFD_LOCK_ASSERT(d); 168 169 switch (d->bd_bufmode) { 170 case BPF_BUFMODE_BUFFER: 171 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 172 173 case BPF_BUFMODE_ZBUF: 174 d->bd_zcopy++; 175 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 176 177 default: 178 panic("bpf_buf_append_bytes"); 179 } 180 } 181 182 static void 183 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 184 u_int len) 185 { 186 187 BPFD_LOCK_ASSERT(d); 188 189 switch (d->bd_bufmode) { 190 case BPF_BUFMODE_BUFFER: 191 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 192 193 case BPF_BUFMODE_ZBUF: 194 d->bd_zcopy++; 195 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 196 197 default: 198 panic("bpf_buf_append_mbuf"); 199 } 200 } 201 202 /* 203 * If the buffer mechanism has a way to decide that a held buffer can be made 204 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 205 * returned if the buffer can be discarded, (0) is returned if it cannot. 206 */ 207 static int 208 bpf_canfreebuf(struct bpf_d *d) 209 { 210 211 BPFD_LOCK_ASSERT(d); 212 213 switch (d->bd_bufmode) { 214 case BPF_BUFMODE_ZBUF: 215 return (bpf_zerocopy_canfreebuf(d)); 216 } 217 return (0); 218 } 219 220 /* 221 * Allow the buffer model to indicate that the current store buffer is 222 * immutable, regardless of the appearance of space. Return (1) if the 223 * buffer is writable, and (0) if not. 224 */ 225 static int 226 bpf_canwritebuf(struct bpf_d *d) 227 { 228 229 BPFD_LOCK_ASSERT(d); 230 231 switch (d->bd_bufmode) { 232 case BPF_BUFMODE_ZBUF: 233 return (bpf_zerocopy_canwritebuf(d)); 234 } 235 return (1); 236 } 237 238 /* 239 * Notify buffer model that an attempt to write to the store buffer has 240 * resulted in a dropped packet, in which case the buffer may be considered 241 * full. 242 */ 243 static void 244 bpf_buffull(struct bpf_d *d) 245 { 246 247 BPFD_LOCK_ASSERT(d); 248 249 switch (d->bd_bufmode) { 250 case BPF_BUFMODE_ZBUF: 251 bpf_zerocopy_buffull(d); 252 break; 253 } 254 } 255 256 /* 257 * Notify the buffer model that a buffer has moved into the hold position. 258 */ 259 void 260 bpf_bufheld(struct bpf_d *d) 261 { 262 263 BPFD_LOCK_ASSERT(d); 264 265 switch (d->bd_bufmode) { 266 case BPF_BUFMODE_ZBUF: 267 bpf_zerocopy_bufheld(d); 268 break; 269 } 270 } 271 272 static void 273 bpf_free(struct bpf_d *d) 274 { 275 276 switch (d->bd_bufmode) { 277 case BPF_BUFMODE_BUFFER: 278 return (bpf_buffer_free(d)); 279 280 case BPF_BUFMODE_ZBUF: 281 return (bpf_zerocopy_free(d)); 282 283 default: 284 panic("bpf_buf_free"); 285 } 286 } 287 288 static int 289 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 290 { 291 292 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 293 return (EOPNOTSUPP); 294 return (bpf_buffer_uiomove(d, buf, len, uio)); 295 } 296 297 static int 298 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 299 { 300 301 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 302 return (EOPNOTSUPP); 303 return (bpf_buffer_ioctl_sblen(d, i)); 304 } 305 306 static int 307 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 308 { 309 310 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 311 return (EOPNOTSUPP); 312 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 313 } 314 315 static int 316 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 317 { 318 319 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 320 return (EOPNOTSUPP); 321 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 322 } 323 324 static int 325 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 326 { 327 328 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 329 return (EOPNOTSUPP); 330 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 331 } 332 333 /* 334 * General BPF functions. 335 */ 336 static int 337 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 338 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 339 { 340 const struct ieee80211_bpf_params *p; 341 struct ether_header *eh; 342 struct mbuf *m; 343 int error; 344 int len; 345 int hlen; 346 int slen; 347 348 /* 349 * Build a sockaddr based on the data link layer type. 350 * We do this at this level because the ethernet header 351 * is copied directly into the data field of the sockaddr. 352 * In the case of SLIP, there is no header and the packet 353 * is forwarded as is. 354 * Also, we are careful to leave room at the front of the mbuf 355 * for the link level header. 356 */ 357 switch (linktype) { 358 359 case DLT_SLIP: 360 sockp->sa_family = AF_INET; 361 hlen = 0; 362 break; 363 364 case DLT_EN10MB: 365 sockp->sa_family = AF_UNSPEC; 366 /* XXX Would MAXLINKHDR be better? */ 367 hlen = ETHER_HDR_LEN; 368 break; 369 370 case DLT_FDDI: 371 sockp->sa_family = AF_IMPLINK; 372 hlen = 0; 373 break; 374 375 case DLT_RAW: 376 sockp->sa_family = AF_UNSPEC; 377 hlen = 0; 378 break; 379 380 case DLT_NULL: 381 /* 382 * null interface types require a 4 byte pseudo header which 383 * corresponds to the address family of the packet. 384 */ 385 sockp->sa_family = AF_UNSPEC; 386 hlen = 4; 387 break; 388 389 case DLT_ATM_RFC1483: 390 /* 391 * en atm driver requires 4-byte atm pseudo header. 392 * though it isn't standard, vpi:vci needs to be 393 * specified anyway. 394 */ 395 sockp->sa_family = AF_UNSPEC; 396 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 397 break; 398 399 case DLT_PPP: 400 sockp->sa_family = AF_UNSPEC; 401 hlen = 4; /* This should match PPP_HDRLEN */ 402 break; 403 404 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 405 sockp->sa_family = AF_IEEE80211; 406 hlen = 0; 407 break; 408 409 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 410 sockp->sa_family = AF_IEEE80211; 411 sockp->sa_len = 12; /* XXX != 0 */ 412 hlen = sizeof(struct ieee80211_bpf_params); 413 break; 414 415 default: 416 return (EIO); 417 } 418 419 len = uio->uio_resid; 420 421 if (len - hlen > ifp->if_mtu) 422 return (EMSGSIZE); 423 424 if ((unsigned)len > MCLBYTES) 425 return (EIO); 426 427 if (len > MHLEN) 428 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 429 else 430 MGETHDR(m, M_WAIT, MT_DATA); 431 m->m_pkthdr.len = m->m_len = len; 432 m->m_pkthdr.rcvif = NULL; 433 *mp = m; 434 435 if (m->m_len < hlen) { 436 error = EPERM; 437 goto bad; 438 } 439 440 error = uiomove(mtod(m, u_char *), len, uio); 441 if (error) 442 goto bad; 443 444 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 445 if (slen == 0) { 446 error = EPERM; 447 goto bad; 448 } 449 450 /* Check for multicast destination */ 451 switch (linktype) { 452 case DLT_EN10MB: 453 eh = mtod(m, struct ether_header *); 454 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 455 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 456 ETHER_ADDR_LEN) == 0) 457 m->m_flags |= M_BCAST; 458 else 459 m->m_flags |= M_MCAST; 460 } 461 break; 462 } 463 464 /* 465 * Make room for link header, and copy it to sockaddr 466 */ 467 if (hlen != 0) { 468 if (sockp->sa_family == AF_IEEE80211) { 469 /* 470 * Collect true length from the parameter header 471 * NB: sockp is known to be zero'd so if we do a 472 * short copy unspecified parameters will be 473 * zero. 474 * NB: packet may not be aligned after stripping 475 * bpf params 476 * XXX check ibp_vers 477 */ 478 p = mtod(m, const struct ieee80211_bpf_params *); 479 hlen = p->ibp_len; 480 if (hlen > sizeof(sockp->sa_data)) { 481 error = EINVAL; 482 goto bad; 483 } 484 } 485 bcopy(m->m_data, sockp->sa_data, hlen); 486 } 487 *hdrlen = hlen; 488 489 return (0); 490 bad: 491 m_freem(m); 492 return (error); 493 } 494 495 /* 496 * Attach file to the bpf interface, i.e. make d listen on bp. 497 */ 498 static void 499 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 500 { 501 /* 502 * Point d at bp, and add d to the interface's list of listeners. 503 * Finally, point the driver's bpf cookie at the interface so 504 * it will divert packets to bpf. 505 */ 506 BPFIF_LOCK(bp); 507 d->bd_bif = bp; 508 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 509 510 bpf_bpfd_cnt++; 511 BPFIF_UNLOCK(bp); 512 } 513 514 /* 515 * Detach a file from its interface. 516 */ 517 static void 518 bpf_detachd(struct bpf_d *d) 519 { 520 int error; 521 struct bpf_if *bp; 522 struct ifnet *ifp; 523 524 bp = d->bd_bif; 525 BPFIF_LOCK(bp); 526 BPFD_LOCK(d); 527 ifp = d->bd_bif->bif_ifp; 528 529 /* 530 * Remove d from the interface's descriptor list. 531 */ 532 LIST_REMOVE(d, bd_next); 533 534 bpf_bpfd_cnt--; 535 d->bd_bif = NULL; 536 BPFD_UNLOCK(d); 537 BPFIF_UNLOCK(bp); 538 539 /* 540 * Check if this descriptor had requested promiscuous mode. 541 * If so, turn it off. 542 */ 543 if (d->bd_promisc) { 544 d->bd_promisc = 0; 545 error = ifpromisc(ifp, 0); 546 if (error != 0 && error != ENXIO) { 547 /* 548 * ENXIO can happen if a pccard is unplugged 549 * Something is really wrong if we were able to put 550 * the driver into promiscuous mode, but can't 551 * take it out. 552 */ 553 if_printf(bp->bif_ifp, 554 "bpf_detach: ifpromisc failed (%d)\n", error); 555 } 556 } 557 } 558 559 /* 560 * Open ethernet device. Returns ENXIO for illegal minor device number, 561 * EBUSY if file is open by another process. 562 */ 563 /* ARGSUSED */ 564 static int 565 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 566 { 567 struct bpf_d *d; 568 569 mtx_lock(&bpf_mtx); 570 d = dev->si_drv1; 571 /* 572 * Each minor can be opened by only one process. If the requested 573 * minor is in use, return EBUSY. 574 */ 575 if (d != NULL) { 576 mtx_unlock(&bpf_mtx); 577 return (EBUSY); 578 } 579 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 580 mtx_unlock(&bpf_mtx); 581 582 if ((dev->si_flags & SI_NAMED) == 0) 583 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 584 "bpf%d", dev2unit(dev)); 585 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 586 dev->si_drv1 = d; 587 588 /* 589 * For historical reasons, perform a one-time initialization call to 590 * the buffer routines, even though we're not yet committed to a 591 * particular buffer method. 592 */ 593 bpf_buffer_init(d); 594 d->bd_bufmode = BPF_BUFMODE_BUFFER; 595 d->bd_sig = SIGIO; 596 d->bd_direction = BPF_D_INOUT; 597 d->bd_pid = td->td_proc->p_pid; 598 #ifdef MAC 599 mac_bpfdesc_init(d); 600 mac_bpfdesc_create(td->td_ucred, d); 601 #endif 602 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 603 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 604 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 605 606 return (0); 607 } 608 609 /* 610 * Close the descriptor by detaching it from its interface, 611 * deallocating its buffers, and marking it free. 612 */ 613 /* ARGSUSED */ 614 static int 615 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 616 { 617 struct bpf_d *d = dev->si_drv1; 618 619 BPFD_LOCK(d); 620 if (d->bd_state == BPF_WAITING) 621 callout_stop(&d->bd_callout); 622 d->bd_state = BPF_IDLE; 623 BPFD_UNLOCK(d); 624 funsetown(&d->bd_sigio); 625 mtx_lock(&bpf_mtx); 626 if (d->bd_bif) 627 bpf_detachd(d); 628 mtx_unlock(&bpf_mtx); 629 selwakeuppri(&d->bd_sel, PRINET); 630 #ifdef MAC 631 mac_bpfdesc_destroy(d); 632 #endif /* MAC */ 633 knlist_destroy(&d->bd_sel.si_note); 634 bpf_freed(d); 635 dev->si_drv1 = NULL; 636 free(d, M_BPF); 637 638 return (0); 639 } 640 641 /* 642 * bpfread - read next chunk of packets from buffers 643 */ 644 static int 645 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 646 { 647 struct bpf_d *d = dev->si_drv1; 648 int timed_out; 649 int error; 650 651 /* 652 * Restrict application to use a buffer the same size as 653 * as kernel buffers. 654 */ 655 if (uio->uio_resid != d->bd_bufsize) 656 return (EINVAL); 657 658 BPFD_LOCK(d); 659 d->bd_pid = curthread->td_proc->p_pid; 660 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 661 BPFD_UNLOCK(d); 662 return (EOPNOTSUPP); 663 } 664 if (d->bd_state == BPF_WAITING) 665 callout_stop(&d->bd_callout); 666 timed_out = (d->bd_state == BPF_TIMED_OUT); 667 d->bd_state = BPF_IDLE; 668 /* 669 * If the hold buffer is empty, then do a timed sleep, which 670 * ends when the timeout expires or when enough packets 671 * have arrived to fill the store buffer. 672 */ 673 while (d->bd_hbuf == NULL) { 674 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 675 /* 676 * A packet(s) either arrived since the previous 677 * read or arrived while we were asleep. 678 * Rotate the buffers and return what's here. 679 */ 680 ROTATE_BUFFERS(d); 681 break; 682 } 683 684 /* 685 * No data is available, check to see if the bpf device 686 * is still pointed at a real interface. If not, return 687 * ENXIO so that the userland process knows to rebind 688 * it before using it again. 689 */ 690 if (d->bd_bif == NULL) { 691 BPFD_UNLOCK(d); 692 return (ENXIO); 693 } 694 695 if (ioflag & O_NONBLOCK) { 696 BPFD_UNLOCK(d); 697 return (EWOULDBLOCK); 698 } 699 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 700 "bpf", d->bd_rtout); 701 if (error == EINTR || error == ERESTART) { 702 BPFD_UNLOCK(d); 703 return (error); 704 } 705 if (error == EWOULDBLOCK) { 706 /* 707 * On a timeout, return what's in the buffer, 708 * which may be nothing. If there is something 709 * in the store buffer, we can rotate the buffers. 710 */ 711 if (d->bd_hbuf) 712 /* 713 * We filled up the buffer in between 714 * getting the timeout and arriving 715 * here, so we don't need to rotate. 716 */ 717 break; 718 719 if (d->bd_slen == 0) { 720 BPFD_UNLOCK(d); 721 return (0); 722 } 723 ROTATE_BUFFERS(d); 724 break; 725 } 726 } 727 /* 728 * At this point, we know we have something in the hold slot. 729 */ 730 BPFD_UNLOCK(d); 731 732 /* 733 * Move data from hold buffer into user space. 734 * We know the entire buffer is transferred since 735 * we checked above that the read buffer is bpf_bufsize bytes. 736 * 737 * XXXRW: More synchronization needed here: what if a second thread 738 * issues a read on the same fd at the same time? Don't want this 739 * getting invalidated. 740 */ 741 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 742 743 BPFD_LOCK(d); 744 d->bd_fbuf = d->bd_hbuf; 745 d->bd_hbuf = NULL; 746 d->bd_hlen = 0; 747 BPFD_UNLOCK(d); 748 749 return (error); 750 } 751 752 /* 753 * If there are processes sleeping on this descriptor, wake them up. 754 */ 755 static __inline void 756 bpf_wakeup(struct bpf_d *d) 757 { 758 759 BPFD_LOCK_ASSERT(d); 760 if (d->bd_state == BPF_WAITING) { 761 callout_stop(&d->bd_callout); 762 d->bd_state = BPF_IDLE; 763 } 764 wakeup(d); 765 if (d->bd_async && d->bd_sig && d->bd_sigio) 766 pgsigio(&d->bd_sigio, d->bd_sig, 0); 767 768 selwakeuppri(&d->bd_sel, PRINET); 769 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 770 } 771 772 static void 773 bpf_timed_out(void *arg) 774 { 775 struct bpf_d *d = (struct bpf_d *)arg; 776 777 BPFD_LOCK(d); 778 if (d->bd_state == BPF_WAITING) { 779 d->bd_state = BPF_TIMED_OUT; 780 if (d->bd_slen != 0) 781 bpf_wakeup(d); 782 } 783 BPFD_UNLOCK(d); 784 } 785 786 static int 787 bpf_ready(struct bpf_d *d) 788 { 789 790 BPFD_LOCK_ASSERT(d); 791 792 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 793 return (1); 794 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 795 d->bd_slen != 0) 796 return (1); 797 return (0); 798 } 799 800 static int 801 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 802 { 803 struct bpf_d *d = dev->si_drv1; 804 struct ifnet *ifp; 805 struct mbuf *m, *mc; 806 struct sockaddr dst; 807 int error, hlen; 808 809 d->bd_pid = curthread->td_proc->p_pid; 810 d->bd_wcount++; 811 if (d->bd_bif == NULL) { 812 d->bd_wdcount++; 813 return (ENXIO); 814 } 815 816 ifp = d->bd_bif->bif_ifp; 817 818 if ((ifp->if_flags & IFF_UP) == 0) { 819 d->bd_wdcount++; 820 return (ENETDOWN); 821 } 822 823 if (uio->uio_resid == 0) { 824 d->bd_wdcount++; 825 return (0); 826 } 827 828 bzero(&dst, sizeof(dst)); 829 m = NULL; 830 hlen = 0; 831 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 832 &m, &dst, &hlen, d->bd_wfilter); 833 if (error) { 834 d->bd_wdcount++; 835 return (error); 836 } 837 d->bd_wfcount++; 838 if (d->bd_hdrcmplt) 839 dst.sa_family = pseudo_AF_HDRCMPLT; 840 841 if (d->bd_feedback) { 842 mc = m_dup(m, M_DONTWAIT); 843 if (mc != NULL) 844 mc->m_pkthdr.rcvif = ifp; 845 /* Set M_PROMISC for outgoing packets to be discarded. */ 846 if (d->bd_direction == BPF_D_INOUT) 847 m->m_flags |= M_PROMISC; 848 } else 849 mc = NULL; 850 851 m->m_pkthdr.len -= hlen; 852 m->m_len -= hlen; 853 m->m_data += hlen; /* XXX */ 854 855 #ifdef MAC 856 BPFD_LOCK(d); 857 mac_bpfdesc_create_mbuf(d, m); 858 if (mc != NULL) 859 mac_bpfdesc_create_mbuf(d, mc); 860 BPFD_UNLOCK(d); 861 #endif 862 863 error = (*ifp->if_output)(ifp, m, &dst, NULL); 864 if (error) 865 d->bd_wdcount++; 866 867 if (mc != NULL) { 868 if (error == 0) 869 (*ifp->if_input)(ifp, mc); 870 else 871 m_freem(mc); 872 } 873 874 return (error); 875 } 876 877 /* 878 * Reset a descriptor by flushing its packet buffer and clearing the 879 * receive and drop counts. 880 */ 881 static void 882 reset_d(struct bpf_d *d) 883 { 884 885 mtx_assert(&d->bd_mtx, MA_OWNED); 886 if (d->bd_hbuf) { 887 /* Free the hold buffer. */ 888 d->bd_fbuf = d->bd_hbuf; 889 d->bd_hbuf = NULL; 890 } 891 d->bd_slen = 0; 892 d->bd_hlen = 0; 893 d->bd_rcount = 0; 894 d->bd_dcount = 0; 895 d->bd_fcount = 0; 896 d->bd_wcount = 0; 897 d->bd_wfcount = 0; 898 d->bd_wdcount = 0; 899 d->bd_zcopy = 0; 900 } 901 902 /* 903 * FIONREAD Check for read packet available. 904 * SIOCGIFADDR Get interface address - convenient hook to driver. 905 * BIOCGBLEN Get buffer len [for read()]. 906 * BIOCSETF Set ethernet read filter. 907 * BIOCSETWF Set ethernet write filter. 908 * BIOCFLUSH Flush read packet buffer. 909 * BIOCPROMISC Put interface into promiscuous mode. 910 * BIOCGDLT Get link layer type. 911 * BIOCGETIF Get interface name. 912 * BIOCSETIF Set interface. 913 * BIOCSRTIMEOUT Set read timeout. 914 * BIOCGRTIMEOUT Get read timeout. 915 * BIOCGSTATS Get packet stats. 916 * BIOCIMMEDIATE Set immediate mode. 917 * BIOCVERSION Get filter language version. 918 * BIOCGHDRCMPLT Get "header already complete" flag 919 * BIOCSHDRCMPLT Set "header already complete" flag 920 * BIOCGDIRECTION Get packet direction flag 921 * BIOCSDIRECTION Set packet direction flag 922 * BIOCLOCK Set "locked" flag 923 * BIOCFEEDBACK Set packet feedback mode. 924 * BIOCSETZBUF Set current zero-copy buffer locations. 925 * BIOCGETZMAX Get maximum zero-copy buffer size. 926 * BIOCROTZBUF Force rotation of zero-copy buffer 927 * BIOCSETBUFMODE Set buffer mode. 928 * BIOCGETBUFMODE Get current buffer mode. 929 */ 930 /* ARGSUSED */ 931 static int 932 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 933 struct thread *td) 934 { 935 struct bpf_d *d = dev->si_drv1; 936 int error = 0; 937 938 /* 939 * Refresh PID associated with this descriptor. 940 */ 941 BPFD_LOCK(d); 942 d->bd_pid = td->td_proc->p_pid; 943 if (d->bd_state == BPF_WAITING) 944 callout_stop(&d->bd_callout); 945 d->bd_state = BPF_IDLE; 946 BPFD_UNLOCK(d); 947 948 if (d->bd_locked == 1) { 949 switch (cmd) { 950 case BIOCGBLEN: 951 case BIOCFLUSH: 952 case BIOCGDLT: 953 case BIOCGDLTLIST: 954 case BIOCGETIF: 955 case BIOCGRTIMEOUT: 956 case BIOCGSTATS: 957 case BIOCVERSION: 958 case BIOCGRSIG: 959 case BIOCGHDRCMPLT: 960 case BIOCFEEDBACK: 961 case FIONREAD: 962 case BIOCLOCK: 963 case BIOCSRTIMEOUT: 964 case BIOCIMMEDIATE: 965 case TIOCGPGRP: 966 case BIOCROTZBUF: 967 break; 968 default: 969 return (EPERM); 970 } 971 } 972 switch (cmd) { 973 974 default: 975 error = EINVAL; 976 break; 977 978 /* 979 * Check for read packet available. 980 */ 981 case FIONREAD: 982 { 983 int n; 984 985 BPFD_LOCK(d); 986 n = d->bd_slen; 987 if (d->bd_hbuf) 988 n += d->bd_hlen; 989 BPFD_UNLOCK(d); 990 991 *(int *)addr = n; 992 break; 993 } 994 995 case SIOCGIFADDR: 996 { 997 struct ifnet *ifp; 998 999 if (d->bd_bif == NULL) 1000 error = EINVAL; 1001 else { 1002 ifp = d->bd_bif->bif_ifp; 1003 error = (*ifp->if_ioctl)(ifp, cmd, addr); 1004 } 1005 break; 1006 } 1007 1008 /* 1009 * Get buffer len [for read()]. 1010 */ 1011 case BIOCGBLEN: 1012 *(u_int *)addr = d->bd_bufsize; 1013 break; 1014 1015 /* 1016 * Set buffer length. 1017 */ 1018 case BIOCSBLEN: 1019 error = bpf_ioctl_sblen(d, (u_int *)addr); 1020 break; 1021 1022 /* 1023 * Set link layer read filter. 1024 */ 1025 case BIOCSETF: 1026 case BIOCSETWF: 1027 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1028 break; 1029 1030 /* 1031 * Flush read packet buffer. 1032 */ 1033 case BIOCFLUSH: 1034 BPFD_LOCK(d); 1035 reset_d(d); 1036 BPFD_UNLOCK(d); 1037 break; 1038 1039 /* 1040 * Put interface into promiscuous mode. 1041 */ 1042 case BIOCPROMISC: 1043 if (d->bd_bif == NULL) { 1044 /* 1045 * No interface attached yet. 1046 */ 1047 error = EINVAL; 1048 break; 1049 } 1050 if (d->bd_promisc == 0) { 1051 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1052 if (error == 0) 1053 d->bd_promisc = 1; 1054 } 1055 break; 1056 1057 /* 1058 * Get current data link type. 1059 */ 1060 case BIOCGDLT: 1061 if (d->bd_bif == NULL) 1062 error = EINVAL; 1063 else 1064 *(u_int *)addr = d->bd_bif->bif_dlt; 1065 break; 1066 1067 /* 1068 * Get a list of supported data link types. 1069 */ 1070 case BIOCGDLTLIST: 1071 if (d->bd_bif == NULL) 1072 error = EINVAL; 1073 else 1074 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1075 break; 1076 1077 /* 1078 * Set data link type. 1079 */ 1080 case BIOCSDLT: 1081 if (d->bd_bif == NULL) 1082 error = EINVAL; 1083 else 1084 error = bpf_setdlt(d, *(u_int *)addr); 1085 break; 1086 1087 /* 1088 * Get interface name. 1089 */ 1090 case BIOCGETIF: 1091 if (d->bd_bif == NULL) 1092 error = EINVAL; 1093 else { 1094 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1095 struct ifreq *const ifr = (struct ifreq *)addr; 1096 1097 strlcpy(ifr->ifr_name, ifp->if_xname, 1098 sizeof(ifr->ifr_name)); 1099 } 1100 break; 1101 1102 /* 1103 * Set interface. 1104 */ 1105 case BIOCSETIF: 1106 error = bpf_setif(d, (struct ifreq *)addr); 1107 break; 1108 1109 /* 1110 * Set read timeout. 1111 */ 1112 case BIOCSRTIMEOUT: 1113 { 1114 struct timeval *tv = (struct timeval *)addr; 1115 1116 /* 1117 * Subtract 1 tick from tvtohz() since this isn't 1118 * a one-shot timer. 1119 */ 1120 if ((error = itimerfix(tv)) == 0) 1121 d->bd_rtout = tvtohz(tv) - 1; 1122 break; 1123 } 1124 1125 /* 1126 * Get read timeout. 1127 */ 1128 case BIOCGRTIMEOUT: 1129 { 1130 struct timeval *tv = (struct timeval *)addr; 1131 1132 tv->tv_sec = d->bd_rtout / hz; 1133 tv->tv_usec = (d->bd_rtout % hz) * tick; 1134 break; 1135 } 1136 1137 /* 1138 * Get packet stats. 1139 */ 1140 case BIOCGSTATS: 1141 { 1142 struct bpf_stat *bs = (struct bpf_stat *)addr; 1143 1144 /* XXXCSJP overflow */ 1145 bs->bs_recv = d->bd_rcount; 1146 bs->bs_drop = d->bd_dcount; 1147 break; 1148 } 1149 1150 /* 1151 * Set immediate mode. 1152 */ 1153 case BIOCIMMEDIATE: 1154 d->bd_immediate = *(u_int *)addr; 1155 break; 1156 1157 case BIOCVERSION: 1158 { 1159 struct bpf_version *bv = (struct bpf_version *)addr; 1160 1161 bv->bv_major = BPF_MAJOR_VERSION; 1162 bv->bv_minor = BPF_MINOR_VERSION; 1163 break; 1164 } 1165 1166 /* 1167 * Get "header already complete" flag 1168 */ 1169 case BIOCGHDRCMPLT: 1170 *(u_int *)addr = d->bd_hdrcmplt; 1171 break; 1172 1173 /* 1174 * Set "header already complete" flag 1175 */ 1176 case BIOCSHDRCMPLT: 1177 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1178 break; 1179 1180 /* 1181 * Get packet direction flag 1182 */ 1183 case BIOCGDIRECTION: 1184 *(u_int *)addr = d->bd_direction; 1185 break; 1186 1187 /* 1188 * Set packet direction flag 1189 */ 1190 case BIOCSDIRECTION: 1191 { 1192 u_int direction; 1193 1194 direction = *(u_int *)addr; 1195 switch (direction) { 1196 case BPF_D_IN: 1197 case BPF_D_INOUT: 1198 case BPF_D_OUT: 1199 d->bd_direction = direction; 1200 break; 1201 default: 1202 error = EINVAL; 1203 } 1204 } 1205 break; 1206 1207 case BIOCFEEDBACK: 1208 d->bd_feedback = *(u_int *)addr; 1209 break; 1210 1211 case BIOCLOCK: 1212 d->bd_locked = 1; 1213 break; 1214 1215 case FIONBIO: /* Non-blocking I/O */ 1216 break; 1217 1218 case FIOASYNC: /* Send signal on receive packets */ 1219 d->bd_async = *(int *)addr; 1220 break; 1221 1222 case FIOSETOWN: 1223 error = fsetown(*(int *)addr, &d->bd_sigio); 1224 break; 1225 1226 case FIOGETOWN: 1227 *(int *)addr = fgetown(&d->bd_sigio); 1228 break; 1229 1230 /* This is deprecated, FIOSETOWN should be used instead. */ 1231 case TIOCSPGRP: 1232 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1233 break; 1234 1235 /* This is deprecated, FIOGETOWN should be used instead. */ 1236 case TIOCGPGRP: 1237 *(int *)addr = -fgetown(&d->bd_sigio); 1238 break; 1239 1240 case BIOCSRSIG: /* Set receive signal */ 1241 { 1242 u_int sig; 1243 1244 sig = *(u_int *)addr; 1245 1246 if (sig >= NSIG) 1247 error = EINVAL; 1248 else 1249 d->bd_sig = sig; 1250 break; 1251 } 1252 case BIOCGRSIG: 1253 *(u_int *)addr = d->bd_sig; 1254 break; 1255 1256 case BIOCGETBUFMODE: 1257 *(u_int *)addr = d->bd_bufmode; 1258 break; 1259 1260 case BIOCSETBUFMODE: 1261 /* 1262 * Allow the buffering mode to be changed as long as we 1263 * haven't yet committed to a particular mode. Our 1264 * definition of commitment, for now, is whether or not a 1265 * buffer has been allocated or an interface attached, since 1266 * that's the point where things get tricky. 1267 */ 1268 switch (*(u_int *)addr) { 1269 case BPF_BUFMODE_BUFFER: 1270 break; 1271 1272 case BPF_BUFMODE_ZBUF: 1273 if (bpf_zerocopy_enable) 1274 break; 1275 /* FALLSTHROUGH */ 1276 1277 default: 1278 return (EINVAL); 1279 } 1280 1281 BPFD_LOCK(d); 1282 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1283 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1284 BPFD_UNLOCK(d); 1285 return (EBUSY); 1286 } 1287 d->bd_bufmode = *(u_int *)addr; 1288 BPFD_UNLOCK(d); 1289 break; 1290 1291 case BIOCGETZMAX: 1292 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1293 1294 case BIOCSETZBUF: 1295 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1296 1297 case BIOCROTZBUF: 1298 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1299 } 1300 return (error); 1301 } 1302 1303 /* 1304 * Set d's packet filter program to fp. If this file already has a filter, 1305 * free it and replace it. Returns EINVAL for bogus requests. 1306 */ 1307 static int 1308 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1309 { 1310 struct bpf_insn *fcode, *old; 1311 u_int wfilter, flen, size; 1312 #ifdef BPF_JITTER 1313 bpf_jit_filter *ofunc; 1314 #endif 1315 1316 if (cmd == BIOCSETWF) { 1317 old = d->bd_wfilter; 1318 wfilter = 1; 1319 #ifdef BPF_JITTER 1320 ofunc = NULL; 1321 #endif 1322 } else { 1323 wfilter = 0; 1324 old = d->bd_rfilter; 1325 #ifdef BPF_JITTER 1326 ofunc = d->bd_bfilter; 1327 #endif 1328 } 1329 if (fp->bf_insns == NULL) { 1330 if (fp->bf_len != 0) 1331 return (EINVAL); 1332 BPFD_LOCK(d); 1333 if (wfilter) 1334 d->bd_wfilter = NULL; 1335 else { 1336 d->bd_rfilter = NULL; 1337 #ifdef BPF_JITTER 1338 d->bd_bfilter = NULL; 1339 #endif 1340 } 1341 reset_d(d); 1342 BPFD_UNLOCK(d); 1343 if (old != NULL) 1344 free((caddr_t)old, M_BPF); 1345 #ifdef BPF_JITTER 1346 if (ofunc != NULL) 1347 bpf_destroy_jit_filter(ofunc); 1348 #endif 1349 return (0); 1350 } 1351 flen = fp->bf_len; 1352 if (flen > bpf_maxinsns) 1353 return (EINVAL); 1354 1355 size = flen * sizeof(*fp->bf_insns); 1356 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1357 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1358 bpf_validate(fcode, (int)flen)) { 1359 BPFD_LOCK(d); 1360 if (wfilter) 1361 d->bd_wfilter = fcode; 1362 else { 1363 d->bd_rfilter = fcode; 1364 #ifdef BPF_JITTER 1365 d->bd_bfilter = bpf_jitter(fcode, flen); 1366 #endif 1367 } 1368 reset_d(d); 1369 BPFD_UNLOCK(d); 1370 if (old != NULL) 1371 free((caddr_t)old, M_BPF); 1372 #ifdef BPF_JITTER 1373 if (ofunc != NULL) 1374 bpf_destroy_jit_filter(ofunc); 1375 #endif 1376 1377 return (0); 1378 } 1379 free((caddr_t)fcode, M_BPF); 1380 return (EINVAL); 1381 } 1382 1383 /* 1384 * Detach a file from its current interface (if attached at all) and attach 1385 * to the interface indicated by the name stored in ifr. 1386 * Return an errno or 0. 1387 */ 1388 static int 1389 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1390 { 1391 struct bpf_if *bp; 1392 struct ifnet *theywant; 1393 1394 theywant = ifunit(ifr->ifr_name); 1395 if (theywant == NULL || theywant->if_bpf == NULL) 1396 return (ENXIO); 1397 1398 bp = theywant->if_bpf; 1399 1400 /* 1401 * Behavior here depends on the buffering model. If we're using 1402 * kernel memory buffers, then we can allocate them here. If we're 1403 * using zero-copy, then the user process must have registered 1404 * buffers by the time we get here. If not, return an error. 1405 * 1406 * XXXRW: There are locking issues here with multi-threaded use: what 1407 * if two threads try to set the interface at once? 1408 */ 1409 switch (d->bd_bufmode) { 1410 case BPF_BUFMODE_BUFFER: 1411 if (d->bd_sbuf == NULL) 1412 bpf_buffer_alloc(d); 1413 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1414 break; 1415 1416 case BPF_BUFMODE_ZBUF: 1417 if (d->bd_sbuf == NULL) 1418 return (EINVAL); 1419 break; 1420 1421 default: 1422 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1423 } 1424 if (bp != d->bd_bif) { 1425 if (d->bd_bif) 1426 /* 1427 * Detach if attached to something else. 1428 */ 1429 bpf_detachd(d); 1430 1431 bpf_attachd(d, bp); 1432 } 1433 BPFD_LOCK(d); 1434 reset_d(d); 1435 BPFD_UNLOCK(d); 1436 return (0); 1437 } 1438 1439 /* 1440 * Support for select() and poll() system calls 1441 * 1442 * Return true iff the specific operation will not block indefinitely. 1443 * Otherwise, return false but make a note that a selwakeup() must be done. 1444 */ 1445 static int 1446 bpfpoll(struct cdev *dev, int events, struct thread *td) 1447 { 1448 struct bpf_d *d; 1449 int revents; 1450 1451 d = dev->si_drv1; 1452 if (d->bd_bif == NULL) 1453 return (ENXIO); 1454 1455 /* 1456 * Refresh PID associated with this descriptor. 1457 */ 1458 revents = events & (POLLOUT | POLLWRNORM); 1459 BPFD_LOCK(d); 1460 d->bd_pid = td->td_proc->p_pid; 1461 if (events & (POLLIN | POLLRDNORM)) { 1462 if (bpf_ready(d)) 1463 revents |= events & (POLLIN | POLLRDNORM); 1464 else { 1465 selrecord(td, &d->bd_sel); 1466 /* Start the read timeout if necessary. */ 1467 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1468 callout_reset(&d->bd_callout, d->bd_rtout, 1469 bpf_timed_out, d); 1470 d->bd_state = BPF_WAITING; 1471 } 1472 } 1473 } 1474 BPFD_UNLOCK(d); 1475 return (revents); 1476 } 1477 1478 /* 1479 * Support for kevent() system call. Register EVFILT_READ filters and 1480 * reject all others. 1481 */ 1482 int 1483 bpfkqfilter(struct cdev *dev, struct knote *kn) 1484 { 1485 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1486 1487 if (kn->kn_filter != EVFILT_READ) 1488 return (1); 1489 1490 /* 1491 * Refresh PID associated with this descriptor. 1492 */ 1493 BPFD_LOCK(d); 1494 d->bd_pid = curthread->td_proc->p_pid; 1495 kn->kn_fop = &bpfread_filtops; 1496 kn->kn_hook = d; 1497 knlist_add(&d->bd_sel.si_note, kn, 1); 1498 BPFD_UNLOCK(d); 1499 1500 return (0); 1501 } 1502 1503 static void 1504 filt_bpfdetach(struct knote *kn) 1505 { 1506 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1507 1508 knlist_remove(&d->bd_sel.si_note, kn, 0); 1509 } 1510 1511 static int 1512 filt_bpfread(struct knote *kn, long hint) 1513 { 1514 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1515 int ready; 1516 1517 BPFD_LOCK_ASSERT(d); 1518 ready = bpf_ready(d); 1519 if (ready) { 1520 kn->kn_data = d->bd_slen; 1521 if (d->bd_hbuf) 1522 kn->kn_data += d->bd_hlen; 1523 } 1524 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1525 callout_reset(&d->bd_callout, d->bd_rtout, 1526 bpf_timed_out, d); 1527 d->bd_state = BPF_WAITING; 1528 } 1529 1530 return (ready); 1531 } 1532 1533 /* 1534 * Incoming linkage from device drivers. Process the packet pkt, of length 1535 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1536 * by each process' filter, and if accepted, stashed into the corresponding 1537 * buffer. 1538 */ 1539 void 1540 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1541 { 1542 struct bpf_d *d; 1543 u_int slen; 1544 int gottime; 1545 struct timeval tv; 1546 1547 gottime = 0; 1548 BPFIF_LOCK(bp); 1549 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1550 BPFD_LOCK(d); 1551 ++d->bd_rcount; 1552 #ifdef BPF_JITTER 1553 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1554 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1555 else 1556 #endif 1557 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1558 if (slen != 0) { 1559 d->bd_fcount++; 1560 if (!gottime) { 1561 microtime(&tv); 1562 gottime = 1; 1563 } 1564 #ifdef MAC 1565 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1566 #endif 1567 catchpacket(d, pkt, pktlen, slen, 1568 bpf_append_bytes, &tv); 1569 } 1570 BPFD_UNLOCK(d); 1571 } 1572 BPFIF_UNLOCK(bp); 1573 } 1574 1575 #define BPF_CHECK_DIRECTION(d, r, i) \ 1576 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 1577 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 1578 1579 /* 1580 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1581 */ 1582 void 1583 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1584 { 1585 struct bpf_d *d; 1586 u_int pktlen, slen; 1587 int gottime; 1588 struct timeval tv; 1589 1590 /* Skip outgoing duplicate packets. */ 1591 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1592 m->m_flags &= ~M_PROMISC; 1593 return; 1594 } 1595 1596 gottime = 0; 1597 1598 pktlen = m_length(m, NULL); 1599 1600 BPFIF_LOCK(bp); 1601 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1602 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 1603 continue; 1604 BPFD_LOCK(d); 1605 ++d->bd_rcount; 1606 #ifdef BPF_JITTER 1607 /* XXX We cannot handle multiple mbufs. */ 1608 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1609 m->m_next == NULL) 1610 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1611 pktlen, pktlen); 1612 else 1613 #endif 1614 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1615 if (slen != 0) { 1616 d->bd_fcount++; 1617 if (!gottime) { 1618 microtime(&tv); 1619 gottime = 1; 1620 } 1621 #ifdef MAC 1622 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1623 #endif 1624 catchpacket(d, (u_char *)m, pktlen, slen, 1625 bpf_append_mbuf, &tv); 1626 } 1627 BPFD_UNLOCK(d); 1628 } 1629 BPFIF_UNLOCK(bp); 1630 } 1631 1632 /* 1633 * Incoming linkage from device drivers, when packet is in 1634 * an mbuf chain and to be prepended by a contiguous header. 1635 */ 1636 void 1637 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1638 { 1639 struct mbuf mb; 1640 struct bpf_d *d; 1641 u_int pktlen, slen; 1642 int gottime; 1643 struct timeval tv; 1644 1645 /* Skip outgoing duplicate packets. */ 1646 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1647 m->m_flags &= ~M_PROMISC; 1648 return; 1649 } 1650 1651 gottime = 0; 1652 1653 pktlen = m_length(m, NULL); 1654 /* 1655 * Craft on-stack mbuf suitable for passing to bpf_filter. 1656 * Note that we cut corners here; we only setup what's 1657 * absolutely needed--this mbuf should never go anywhere else. 1658 */ 1659 mb.m_next = m; 1660 mb.m_data = data; 1661 mb.m_len = dlen; 1662 pktlen += dlen; 1663 1664 BPFIF_LOCK(bp); 1665 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1666 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 1667 continue; 1668 BPFD_LOCK(d); 1669 ++d->bd_rcount; 1670 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1671 if (slen != 0) { 1672 d->bd_fcount++; 1673 if (!gottime) { 1674 microtime(&tv); 1675 gottime = 1; 1676 } 1677 #ifdef MAC 1678 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1679 #endif 1680 catchpacket(d, (u_char *)&mb, pktlen, slen, 1681 bpf_append_mbuf, &tv); 1682 } 1683 BPFD_UNLOCK(d); 1684 } 1685 BPFIF_UNLOCK(bp); 1686 } 1687 1688 #undef BPF_CHECK_DIRECTION 1689 1690 /* 1691 * Move the packet data from interface memory (pkt) into the 1692 * store buffer. "cpfn" is the routine called to do the actual data 1693 * transfer. bcopy is passed in to copy contiguous chunks, while 1694 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1695 * pkt is really an mbuf. 1696 */ 1697 static void 1698 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1699 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1700 struct timeval *tv) 1701 { 1702 struct bpf_hdr hdr; 1703 int totlen, curlen; 1704 int hdrlen = d->bd_bif->bif_hdrlen; 1705 int do_wakeup = 0; 1706 1707 BPFD_LOCK_ASSERT(d); 1708 1709 /* 1710 * Detect whether user space has released a buffer back to us, and if 1711 * so, move it from being a hold buffer to a free buffer. This may 1712 * not be the best place to do it (for example, we might only want to 1713 * run this check if we need the space), but for now it's a reliable 1714 * spot to do it. 1715 */ 1716 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1717 d->bd_fbuf = d->bd_hbuf; 1718 d->bd_hbuf = NULL; 1719 d->bd_hlen = 0; 1720 } 1721 1722 /* 1723 * Figure out how many bytes to move. If the packet is 1724 * greater or equal to the snapshot length, transfer that 1725 * much. Otherwise, transfer the whole packet (unless 1726 * we hit the buffer size limit). 1727 */ 1728 totlen = hdrlen + min(snaplen, pktlen); 1729 if (totlen > d->bd_bufsize) 1730 totlen = d->bd_bufsize; 1731 1732 /* 1733 * Round up the end of the previous packet to the next longword. 1734 * 1735 * Drop the packet if there's no room and no hope of room 1736 * If the packet would overflow the storage buffer or the storage 1737 * buffer is considered immutable by the buffer model, try to rotate 1738 * the buffer and wakeup pending processes. 1739 */ 1740 curlen = BPF_WORDALIGN(d->bd_slen); 1741 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 1742 if (d->bd_fbuf == NULL) { 1743 /* 1744 * There's no room in the store buffer, and no 1745 * prospect of room, so drop the packet. Notify the 1746 * buffer model. 1747 */ 1748 bpf_buffull(d); 1749 ++d->bd_dcount; 1750 return; 1751 } 1752 ROTATE_BUFFERS(d); 1753 do_wakeup = 1; 1754 curlen = 0; 1755 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1756 /* 1757 * Immediate mode is set, or the read timeout has already 1758 * expired during a select call. A packet arrived, so the 1759 * reader should be woken up. 1760 */ 1761 do_wakeup = 1; 1762 1763 /* 1764 * Append the bpf header. Note we append the actual header size, but 1765 * move forward the length of the header plus padding. 1766 */ 1767 bzero(&hdr, sizeof(hdr)); 1768 hdr.bh_tstamp = *tv; 1769 hdr.bh_datalen = pktlen; 1770 hdr.bh_hdrlen = hdrlen; 1771 hdr.bh_caplen = totlen - hdrlen; 1772 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1773 1774 /* 1775 * Copy the packet data into the store buffer and update its length. 1776 */ 1777 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1778 d->bd_slen = curlen + totlen; 1779 1780 if (do_wakeup) 1781 bpf_wakeup(d); 1782 } 1783 1784 /* 1785 * Free buffers currently in use by a descriptor. 1786 * Called on close. 1787 */ 1788 static void 1789 bpf_freed(struct bpf_d *d) 1790 { 1791 1792 /* 1793 * We don't need to lock out interrupts since this descriptor has 1794 * been detached from its interface and it yet hasn't been marked 1795 * free. 1796 */ 1797 bpf_free(d); 1798 if (d->bd_rfilter) { 1799 free((caddr_t)d->bd_rfilter, M_BPF); 1800 #ifdef BPF_JITTER 1801 bpf_destroy_jit_filter(d->bd_bfilter); 1802 #endif 1803 } 1804 if (d->bd_wfilter) 1805 free((caddr_t)d->bd_wfilter, M_BPF); 1806 mtx_destroy(&d->bd_mtx); 1807 } 1808 1809 /* 1810 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1811 * fixed size of the link header (variable length headers not yet supported). 1812 */ 1813 void 1814 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1815 { 1816 1817 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1818 } 1819 1820 /* 1821 * Attach an interface to bpf. ifp is a pointer to the structure 1822 * defining the interface to be attached, dlt is the link layer type, 1823 * and hdrlen is the fixed size of the link header (variable length 1824 * headers are not yet supporrted). 1825 */ 1826 void 1827 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1828 { 1829 struct bpf_if *bp; 1830 1831 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1832 if (bp == NULL) 1833 panic("bpfattach"); 1834 1835 LIST_INIT(&bp->bif_dlist); 1836 bp->bif_ifp = ifp; 1837 bp->bif_dlt = dlt; 1838 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1839 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1840 *driverp = bp; 1841 1842 mtx_lock(&bpf_mtx); 1843 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1844 mtx_unlock(&bpf_mtx); 1845 1846 /* 1847 * Compute the length of the bpf header. This is not necessarily 1848 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1849 * that the network layer header begins on a longword boundary (for 1850 * performance reasons and to alleviate alignment restrictions). 1851 */ 1852 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1853 1854 if (bootverbose) 1855 if_printf(ifp, "bpf attached\n"); 1856 } 1857 1858 /* 1859 * Detach bpf from an interface. This involves detaching each descriptor 1860 * associated with the interface, and leaving bd_bif NULL. Notify each 1861 * descriptor as it's detached so that any sleepers wake up and get 1862 * ENXIO. 1863 */ 1864 void 1865 bpfdetach(struct ifnet *ifp) 1866 { 1867 struct bpf_if *bp; 1868 struct bpf_d *d; 1869 1870 /* Locate BPF interface information */ 1871 mtx_lock(&bpf_mtx); 1872 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1873 if (ifp == bp->bif_ifp) 1874 break; 1875 } 1876 1877 /* Interface wasn't attached */ 1878 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1879 mtx_unlock(&bpf_mtx); 1880 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1881 return; 1882 } 1883 1884 LIST_REMOVE(bp, bif_next); 1885 mtx_unlock(&bpf_mtx); 1886 1887 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1888 bpf_detachd(d); 1889 BPFD_LOCK(d); 1890 bpf_wakeup(d); 1891 BPFD_UNLOCK(d); 1892 } 1893 1894 mtx_destroy(&bp->bif_mtx); 1895 free(bp, M_BPF); 1896 } 1897 1898 /* 1899 * Get a list of available data link type of the interface. 1900 */ 1901 static int 1902 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1903 { 1904 int n, error; 1905 struct ifnet *ifp; 1906 struct bpf_if *bp; 1907 1908 ifp = d->bd_bif->bif_ifp; 1909 n = 0; 1910 error = 0; 1911 mtx_lock(&bpf_mtx); 1912 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1913 if (bp->bif_ifp != ifp) 1914 continue; 1915 if (bfl->bfl_list != NULL) { 1916 if (n >= bfl->bfl_len) { 1917 mtx_unlock(&bpf_mtx); 1918 return (ENOMEM); 1919 } 1920 error = copyout(&bp->bif_dlt, 1921 bfl->bfl_list + n, sizeof(u_int)); 1922 } 1923 n++; 1924 } 1925 mtx_unlock(&bpf_mtx); 1926 bfl->bfl_len = n; 1927 return (error); 1928 } 1929 1930 /* 1931 * Set the data link type of a BPF instance. 1932 */ 1933 static int 1934 bpf_setdlt(struct bpf_d *d, u_int dlt) 1935 { 1936 int error, opromisc; 1937 struct ifnet *ifp; 1938 struct bpf_if *bp; 1939 1940 if (d->bd_bif->bif_dlt == dlt) 1941 return (0); 1942 ifp = d->bd_bif->bif_ifp; 1943 mtx_lock(&bpf_mtx); 1944 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1945 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1946 break; 1947 } 1948 mtx_unlock(&bpf_mtx); 1949 if (bp != NULL) { 1950 opromisc = d->bd_promisc; 1951 bpf_detachd(d); 1952 bpf_attachd(d, bp); 1953 BPFD_LOCK(d); 1954 reset_d(d); 1955 BPFD_UNLOCK(d); 1956 if (opromisc) { 1957 error = ifpromisc(bp->bif_ifp, 1); 1958 if (error) 1959 if_printf(bp->bif_ifp, 1960 "bpf_setdlt: ifpromisc failed (%d)\n", 1961 error); 1962 else 1963 d->bd_promisc = 1; 1964 } 1965 } 1966 return (bp == NULL ? EINVAL : 0); 1967 } 1968 1969 static void 1970 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1971 struct cdev **dev) 1972 { 1973 int u; 1974 1975 if (*dev != NULL) 1976 return; 1977 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 1978 return; 1979 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 1980 "bpf%d", u); 1981 dev_ref(*dev); 1982 (*dev)->si_flags |= SI_CHEAPCLONE; 1983 return; 1984 } 1985 1986 static void 1987 bpf_drvinit(void *unused) 1988 { 1989 1990 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 1991 LIST_INIT(&bpf_iflist); 1992 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 1993 } 1994 1995 static void 1996 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 1997 { 1998 1999 bzero(d, sizeof(*d)); 2000 BPFD_LOCK_ASSERT(bd); 2001 d->bd_structsize = sizeof(*d); 2002 d->bd_immediate = bd->bd_immediate; 2003 d->bd_promisc = bd->bd_promisc; 2004 d->bd_hdrcmplt = bd->bd_hdrcmplt; 2005 d->bd_direction = bd->bd_direction; 2006 d->bd_feedback = bd->bd_feedback; 2007 d->bd_async = bd->bd_async; 2008 d->bd_rcount = bd->bd_rcount; 2009 d->bd_dcount = bd->bd_dcount; 2010 d->bd_fcount = bd->bd_fcount; 2011 d->bd_sig = bd->bd_sig; 2012 d->bd_slen = bd->bd_slen; 2013 d->bd_hlen = bd->bd_hlen; 2014 d->bd_bufsize = bd->bd_bufsize; 2015 d->bd_pid = bd->bd_pid; 2016 strlcpy(d->bd_ifname, 2017 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2018 d->bd_locked = bd->bd_locked; 2019 d->bd_wcount = bd->bd_wcount; 2020 d->bd_wdcount = bd->bd_wdcount; 2021 d->bd_wfcount = bd->bd_wfcount; 2022 d->bd_zcopy = bd->bd_zcopy; 2023 d->bd_bufmode = bd->bd_bufmode; 2024 } 2025 2026 static int 2027 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2028 { 2029 struct xbpf_d *xbdbuf, *xbd; 2030 int index, error; 2031 struct bpf_if *bp; 2032 struct bpf_d *bd; 2033 2034 /* 2035 * XXX This is not technically correct. It is possible for non 2036 * privileged users to open bpf devices. It would make sense 2037 * if the users who opened the devices were able to retrieve 2038 * the statistics for them, too. 2039 */ 2040 error = priv_check(req->td, PRIV_NET_BPF); 2041 if (error) 2042 return (error); 2043 if (req->oldptr == NULL) 2044 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2045 if (bpf_bpfd_cnt == 0) 2046 return (SYSCTL_OUT(req, 0, 0)); 2047 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2048 mtx_lock(&bpf_mtx); 2049 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2050 mtx_unlock(&bpf_mtx); 2051 free(xbdbuf, M_BPF); 2052 return (ENOMEM); 2053 } 2054 index = 0; 2055 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2056 BPFIF_LOCK(bp); 2057 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2058 xbd = &xbdbuf[index++]; 2059 BPFD_LOCK(bd); 2060 bpfstats_fill_xbpf(xbd, bd); 2061 BPFD_UNLOCK(bd); 2062 } 2063 BPFIF_UNLOCK(bp); 2064 } 2065 mtx_unlock(&bpf_mtx); 2066 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2067 free(xbdbuf, M_BPF); 2068 return (error); 2069 } 2070 2071 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2072 2073 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2074 /* 2075 * NOP stubs to allow bpf-using drivers to load and function. 2076 * 2077 * A 'better' implementation would allow the core bpf functionality 2078 * to be loaded at runtime. 2079 */ 2080 static struct bpf_if bp_null; 2081 2082 void 2083 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2084 { 2085 } 2086 2087 void 2088 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2089 { 2090 } 2091 2092 void 2093 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2094 { 2095 } 2096 2097 void 2098 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2099 { 2100 2101 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2102 } 2103 2104 void 2105 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2106 { 2107 2108 *driverp = &bp_null; 2109 } 2110 2111 void 2112 bpfdetach(struct ifnet *ifp) 2113 { 2114 } 2115 2116 u_int 2117 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2118 { 2119 return -1; /* "no filter" behaviour */ 2120 } 2121 2122 int 2123 bpf_validate(const struct bpf_insn *f, int len) 2124 { 2125 return 0; /* false */ 2126 } 2127 2128 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 2129