1 /*- 2 * Copyright (c) 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from the Stanford/CMU enet packet filter, 6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 8 * Berkeley Laboratory. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_bpf.h" 41 #include "opt_mac.h" 42 #include "opt_netgraph.h" 43 44 #include <sys/types.h> 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/time.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/signalvar.h> 55 #include <sys/filio.h> 56 #include <sys/sockio.h> 57 #include <sys/ttycom.h> 58 #include <sys/uio.h> 59 60 #include <sys/event.h> 61 #include <sys/file.h> 62 #include <sys/poll.h> 63 #include <sys/proc.h> 64 65 #include <sys/socket.h> 66 67 #include <net/if.h> 68 #include <net/bpf.h> 69 #include <net/bpf_buffer.h> 70 #ifdef BPF_JITTER 71 #include <net/bpf_jitter.h> 72 #endif 73 #include <net/bpf_zerocopy.h> 74 #include <net/bpfdesc.h> 75 76 #include <netinet/in.h> 77 #include <netinet/if_ether.h> 78 #include <sys/kernel.h> 79 #include <sys/sysctl.h> 80 81 #include <net80211/ieee80211_freebsd.h> 82 83 #include <security/mac/mac_framework.h> 84 85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 86 87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 88 89 #define PRINET 26 /* interruptible */ 90 91 /* 92 * bpf_iflist is a list of BPF interface structures, each corresponding to a 93 * specific DLT. The same network interface might have several BPF interface 94 * structures registered by different layers in the stack (i.e., 802.11 95 * frames, ethernet frames, etc). 96 */ 97 static LIST_HEAD(, bpf_if) bpf_iflist; 98 static struct mtx bpf_mtx; /* bpf global lock */ 99 static int bpf_bpfd_cnt; 100 101 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 102 static void bpf_detachd(struct bpf_d *); 103 static void bpf_freed(struct bpf_d *); 104 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 105 struct sockaddr *, int *, struct bpf_insn *); 106 static int bpf_setif(struct bpf_d *, struct ifreq *); 107 static void bpf_timed_out(void *); 108 static __inline void 109 bpf_wakeup(struct bpf_d *); 110 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 111 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 112 struct timeval *); 113 static void reset_d(struct bpf_d *); 114 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 115 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 116 static int bpf_setdlt(struct bpf_d *, u_int); 117 static void filt_bpfdetach(struct knote *); 118 static int filt_bpfread(struct knote *, long); 119 static void bpf_drvinit(void *); 120 static void bpf_clone(void *, struct ucred *, char *, int, struct cdev **); 121 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 122 123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 124 static int bpf_maxinsns = BPF_MAXINSNS; 125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 126 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 127 static int bpf_zerocopy_enable = 0; 128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 129 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW, 131 bpf_stats_sysctl, "bpf statistics portal"); 132 133 static d_open_t bpfopen; 134 static d_close_t bpfclose; 135 static d_read_t bpfread; 136 static d_write_t bpfwrite; 137 static d_ioctl_t bpfioctl; 138 static d_poll_t bpfpoll; 139 static d_kqfilter_t bpfkqfilter; 140 141 static struct cdevsw bpf_cdevsw = { 142 .d_version = D_VERSION, 143 .d_open = bpfopen, 144 .d_close = bpfclose, 145 .d_read = bpfread, 146 .d_write = bpfwrite, 147 .d_ioctl = bpfioctl, 148 .d_poll = bpfpoll, 149 .d_name = "bpf", 150 .d_kqfilter = bpfkqfilter, 151 }; 152 153 static struct filterops bpfread_filtops = 154 { 1, NULL, filt_bpfdetach, filt_bpfread }; 155 156 /* 157 * Wrapper functions for various buffering methods. If the set of buffer 158 * modes expands, we will probably want to introduce a switch data structure 159 * similar to protosw, et. 160 */ 161 static void 162 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 163 u_int len) 164 { 165 166 BPFD_LOCK_ASSERT(d); 167 168 switch (d->bd_bufmode) { 169 case BPF_BUFMODE_BUFFER: 170 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 171 172 case BPF_BUFMODE_ZBUF: 173 d->bd_zcopy++; 174 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 175 176 default: 177 panic("bpf_buf_append_bytes"); 178 } 179 } 180 181 static void 182 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 183 u_int len) 184 { 185 186 BPFD_LOCK_ASSERT(d); 187 188 switch (d->bd_bufmode) { 189 case BPF_BUFMODE_BUFFER: 190 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 191 192 case BPF_BUFMODE_ZBUF: 193 d->bd_zcopy++; 194 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 195 196 default: 197 panic("bpf_buf_append_mbuf"); 198 } 199 } 200 201 /* 202 * If the buffer mechanism has a way to decide that a held buffer can be made 203 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 204 * returned if the buffer can be discarded, (0) is returned if it cannot. 205 */ 206 static int 207 bpf_canfreebuf(struct bpf_d *d) 208 { 209 210 BPFD_LOCK_ASSERT(d); 211 212 switch (d->bd_bufmode) { 213 case BPF_BUFMODE_ZBUF: 214 return (bpf_zerocopy_canfreebuf(d)); 215 } 216 return (0); 217 } 218 219 /* 220 * Allow the buffer model to indicate that the current store buffer is 221 * immutable, regardless of the appearance of space. Return (1) if the 222 * buffer is writable, and (0) if not. 223 */ 224 static int 225 bpf_canwritebuf(struct bpf_d *d) 226 { 227 228 BPFD_LOCK_ASSERT(d); 229 230 switch (d->bd_bufmode) { 231 case BPF_BUFMODE_ZBUF: 232 return (bpf_zerocopy_canwritebuf(d)); 233 } 234 return (1); 235 } 236 237 /* 238 * Notify buffer model that an attempt to write to the store buffer has 239 * resulted in a dropped packet, in which case the buffer may be considered 240 * full. 241 */ 242 static void 243 bpf_buffull(struct bpf_d *d) 244 { 245 246 BPFD_LOCK_ASSERT(d); 247 248 switch (d->bd_bufmode) { 249 case BPF_BUFMODE_ZBUF: 250 bpf_zerocopy_buffull(d); 251 break; 252 } 253 } 254 255 /* 256 * Notify the buffer model that a buffer has moved into the hold position. 257 */ 258 void 259 bpf_bufheld(struct bpf_d *d) 260 { 261 262 BPFD_LOCK_ASSERT(d); 263 264 switch (d->bd_bufmode) { 265 case BPF_BUFMODE_ZBUF: 266 bpf_zerocopy_bufheld(d); 267 break; 268 } 269 } 270 271 static void 272 bpf_free(struct bpf_d *d) 273 { 274 275 switch (d->bd_bufmode) { 276 case BPF_BUFMODE_BUFFER: 277 return (bpf_buffer_free(d)); 278 279 case BPF_BUFMODE_ZBUF: 280 return (bpf_zerocopy_free(d)); 281 282 default: 283 panic("bpf_buf_free"); 284 } 285 } 286 287 static int 288 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 289 { 290 291 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 292 return (EOPNOTSUPP); 293 return (bpf_buffer_uiomove(d, buf, len, uio)); 294 } 295 296 static int 297 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 298 { 299 300 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 301 return (EOPNOTSUPP); 302 return (bpf_buffer_ioctl_sblen(d, i)); 303 } 304 305 static int 306 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 307 { 308 309 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 310 return (EOPNOTSUPP); 311 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 312 } 313 314 static int 315 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 316 { 317 318 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 319 return (EOPNOTSUPP); 320 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 321 } 322 323 static int 324 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 325 { 326 327 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 328 return (EOPNOTSUPP); 329 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 330 } 331 332 /* 333 * General BPF functions. 334 */ 335 static int 336 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 337 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter) 338 { 339 const struct ieee80211_bpf_params *p; 340 struct ether_header *eh; 341 struct mbuf *m; 342 int error; 343 int len; 344 int hlen; 345 int slen; 346 347 /* 348 * Build a sockaddr based on the data link layer type. 349 * We do this at this level because the ethernet header 350 * is copied directly into the data field of the sockaddr. 351 * In the case of SLIP, there is no header and the packet 352 * is forwarded as is. 353 * Also, we are careful to leave room at the front of the mbuf 354 * for the link level header. 355 */ 356 switch (linktype) { 357 358 case DLT_SLIP: 359 sockp->sa_family = AF_INET; 360 hlen = 0; 361 break; 362 363 case DLT_EN10MB: 364 sockp->sa_family = AF_UNSPEC; 365 /* XXX Would MAXLINKHDR be better? */ 366 hlen = ETHER_HDR_LEN; 367 break; 368 369 case DLT_FDDI: 370 sockp->sa_family = AF_IMPLINK; 371 hlen = 0; 372 break; 373 374 case DLT_RAW: 375 sockp->sa_family = AF_UNSPEC; 376 hlen = 0; 377 break; 378 379 case DLT_NULL: 380 /* 381 * null interface types require a 4 byte pseudo header which 382 * corresponds to the address family of the packet. 383 */ 384 sockp->sa_family = AF_UNSPEC; 385 hlen = 4; 386 break; 387 388 case DLT_ATM_RFC1483: 389 /* 390 * en atm driver requires 4-byte atm pseudo header. 391 * though it isn't standard, vpi:vci needs to be 392 * specified anyway. 393 */ 394 sockp->sa_family = AF_UNSPEC; 395 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 396 break; 397 398 case DLT_PPP: 399 sockp->sa_family = AF_UNSPEC; 400 hlen = 4; /* This should match PPP_HDRLEN */ 401 break; 402 403 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 404 sockp->sa_family = AF_IEEE80211; 405 hlen = 0; 406 break; 407 408 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 409 sockp->sa_family = AF_IEEE80211; 410 sockp->sa_len = 12; /* XXX != 0 */ 411 hlen = sizeof(struct ieee80211_bpf_params); 412 break; 413 414 default: 415 return (EIO); 416 } 417 418 len = uio->uio_resid; 419 420 if (len - hlen > ifp->if_mtu) 421 return (EMSGSIZE); 422 423 if ((unsigned)len > MCLBYTES) 424 return (EIO); 425 426 if (len > MHLEN) 427 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 428 else 429 MGETHDR(m, M_WAIT, MT_DATA); 430 m->m_pkthdr.len = m->m_len = len; 431 m->m_pkthdr.rcvif = NULL; 432 *mp = m; 433 434 if (m->m_len < hlen) { 435 error = EPERM; 436 goto bad; 437 } 438 439 error = uiomove(mtod(m, u_char *), len, uio); 440 if (error) 441 goto bad; 442 443 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len); 444 if (slen == 0) { 445 error = EPERM; 446 goto bad; 447 } 448 449 /* Check for multicast destination */ 450 switch (linktype) { 451 case DLT_EN10MB: 452 eh = mtod(m, struct ether_header *); 453 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 454 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 455 ETHER_ADDR_LEN) == 0) 456 m->m_flags |= M_BCAST; 457 else 458 m->m_flags |= M_MCAST; 459 } 460 break; 461 } 462 463 /* 464 * Make room for link header, and copy it to sockaddr 465 */ 466 if (hlen != 0) { 467 if (sockp->sa_family == AF_IEEE80211) { 468 /* 469 * Collect true length from the parameter header 470 * NB: sockp is known to be zero'd so if we do a 471 * short copy unspecified parameters will be 472 * zero. 473 * NB: packet may not be aligned after stripping 474 * bpf params 475 * XXX check ibp_vers 476 */ 477 p = mtod(m, const struct ieee80211_bpf_params *); 478 hlen = p->ibp_len; 479 if (hlen > sizeof(sockp->sa_data)) { 480 error = EINVAL; 481 goto bad; 482 } 483 } 484 bcopy(m->m_data, sockp->sa_data, hlen); 485 } 486 *hdrlen = hlen; 487 488 return (0); 489 bad: 490 m_freem(m); 491 return (error); 492 } 493 494 /* 495 * Attach file to the bpf interface, i.e. make d listen on bp. 496 */ 497 static void 498 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 499 { 500 /* 501 * Point d at bp, and add d to the interface's list of listeners. 502 * Finally, point the driver's bpf cookie at the interface so 503 * it will divert packets to bpf. 504 */ 505 BPFIF_LOCK(bp); 506 d->bd_bif = bp; 507 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 508 509 bpf_bpfd_cnt++; 510 BPFIF_UNLOCK(bp); 511 } 512 513 /* 514 * Detach a file from its interface. 515 */ 516 static void 517 bpf_detachd(struct bpf_d *d) 518 { 519 int error; 520 struct bpf_if *bp; 521 struct ifnet *ifp; 522 523 bp = d->bd_bif; 524 BPFIF_LOCK(bp); 525 BPFD_LOCK(d); 526 ifp = d->bd_bif->bif_ifp; 527 528 /* 529 * Remove d from the interface's descriptor list. 530 */ 531 LIST_REMOVE(d, bd_next); 532 533 bpf_bpfd_cnt--; 534 d->bd_bif = NULL; 535 BPFD_UNLOCK(d); 536 BPFIF_UNLOCK(bp); 537 538 /* 539 * Check if this descriptor had requested promiscuous mode. 540 * If so, turn it off. 541 */ 542 if (d->bd_promisc) { 543 d->bd_promisc = 0; 544 error = ifpromisc(ifp, 0); 545 if (error != 0 && error != ENXIO) { 546 /* 547 * ENXIO can happen if a pccard is unplugged 548 * Something is really wrong if we were able to put 549 * the driver into promiscuous mode, but can't 550 * take it out. 551 */ 552 if_printf(bp->bif_ifp, 553 "bpf_detach: ifpromisc failed (%d)\n", error); 554 } 555 } 556 } 557 558 /* 559 * Open ethernet device. Returns ENXIO for illegal minor device number, 560 * EBUSY if file is open by another process. 561 */ 562 /* ARGSUSED */ 563 static int 564 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 565 { 566 struct bpf_d *d; 567 568 mtx_lock(&bpf_mtx); 569 d = dev->si_drv1; 570 /* 571 * Each minor can be opened by only one process. If the requested 572 * minor is in use, return EBUSY. 573 */ 574 if (d != NULL) { 575 mtx_unlock(&bpf_mtx); 576 return (EBUSY); 577 } 578 dev->si_drv1 = (struct bpf_d *)~0; /* mark device in use */ 579 mtx_unlock(&bpf_mtx); 580 581 if ((dev->si_flags & SI_NAMED) == 0) 582 make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600, 583 "bpf%d", dev2unit(dev)); 584 MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 585 dev->si_drv1 = d; 586 587 /* 588 * For historical reasons, perform a one-time initialization call to 589 * the buffer routines, even though we're not yet committed to a 590 * particular buffer method. 591 */ 592 bpf_buffer_init(d); 593 d->bd_bufmode = BPF_BUFMODE_BUFFER; 594 d->bd_sig = SIGIO; 595 d->bd_direction = BPF_D_INOUT; 596 d->bd_pid = td->td_proc->p_pid; 597 #ifdef MAC 598 mac_bpfdesc_init(d); 599 mac_bpfdesc_create(td->td_ucred, d); 600 #endif 601 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); 602 callout_init(&d->bd_callout, CALLOUT_MPSAFE); 603 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); 604 605 return (0); 606 } 607 608 /* 609 * Close the descriptor by detaching it from its interface, 610 * deallocating its buffers, and marking it free. 611 */ 612 /* ARGSUSED */ 613 static int 614 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td) 615 { 616 struct bpf_d *d = dev->si_drv1; 617 618 BPFD_LOCK(d); 619 if (d->bd_state == BPF_WAITING) 620 callout_stop(&d->bd_callout); 621 d->bd_state = BPF_IDLE; 622 BPFD_UNLOCK(d); 623 funsetown(&d->bd_sigio); 624 mtx_lock(&bpf_mtx); 625 if (d->bd_bif) 626 bpf_detachd(d); 627 mtx_unlock(&bpf_mtx); 628 selwakeuppri(&d->bd_sel, PRINET); 629 #ifdef MAC 630 mac_bpfdesc_destroy(d); 631 #endif /* MAC */ 632 knlist_destroy(&d->bd_sel.si_note); 633 bpf_freed(d); 634 dev->si_drv1 = NULL; 635 free(d, M_BPF); 636 637 return (0); 638 } 639 640 /* 641 * bpfread - read next chunk of packets from buffers 642 */ 643 static int 644 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 645 { 646 struct bpf_d *d = dev->si_drv1; 647 int timed_out; 648 int error; 649 650 /* 651 * Restrict application to use a buffer the same size as 652 * as kernel buffers. 653 */ 654 if (uio->uio_resid != d->bd_bufsize) 655 return (EINVAL); 656 657 BPFD_LOCK(d); 658 d->bd_pid = curthread->td_proc->p_pid; 659 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 660 BPFD_UNLOCK(d); 661 return (EOPNOTSUPP); 662 } 663 if (d->bd_state == BPF_WAITING) 664 callout_stop(&d->bd_callout); 665 timed_out = (d->bd_state == BPF_TIMED_OUT); 666 d->bd_state = BPF_IDLE; 667 /* 668 * If the hold buffer is empty, then do a timed sleep, which 669 * ends when the timeout expires or when enough packets 670 * have arrived to fill the store buffer. 671 */ 672 while (d->bd_hbuf == NULL) { 673 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 674 /* 675 * A packet(s) either arrived since the previous 676 * read or arrived while we were asleep. 677 * Rotate the buffers and return what's here. 678 */ 679 ROTATE_BUFFERS(d); 680 break; 681 } 682 683 /* 684 * No data is available, check to see if the bpf device 685 * is still pointed at a real interface. If not, return 686 * ENXIO so that the userland process knows to rebind 687 * it before using it again. 688 */ 689 if (d->bd_bif == NULL) { 690 BPFD_UNLOCK(d); 691 return (ENXIO); 692 } 693 694 if (ioflag & O_NONBLOCK) { 695 BPFD_UNLOCK(d); 696 return (EWOULDBLOCK); 697 } 698 error = msleep(d, &d->bd_mtx, PRINET|PCATCH, 699 "bpf", d->bd_rtout); 700 if (error == EINTR || error == ERESTART) { 701 BPFD_UNLOCK(d); 702 return (error); 703 } 704 if (error == EWOULDBLOCK) { 705 /* 706 * On a timeout, return what's in the buffer, 707 * which may be nothing. If there is something 708 * in the store buffer, we can rotate the buffers. 709 */ 710 if (d->bd_hbuf) 711 /* 712 * We filled up the buffer in between 713 * getting the timeout and arriving 714 * here, so we don't need to rotate. 715 */ 716 break; 717 718 if (d->bd_slen == 0) { 719 BPFD_UNLOCK(d); 720 return (0); 721 } 722 ROTATE_BUFFERS(d); 723 break; 724 } 725 } 726 /* 727 * At this point, we know we have something in the hold slot. 728 */ 729 BPFD_UNLOCK(d); 730 731 /* 732 * Move data from hold buffer into user space. 733 * We know the entire buffer is transferred since 734 * we checked above that the read buffer is bpf_bufsize bytes. 735 * 736 * XXXRW: More synchronization needed here: what if a second thread 737 * issues a read on the same fd at the same time? Don't want this 738 * getting invalidated. 739 */ 740 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 741 742 BPFD_LOCK(d); 743 d->bd_fbuf = d->bd_hbuf; 744 d->bd_hbuf = NULL; 745 d->bd_hlen = 0; 746 BPFD_UNLOCK(d); 747 748 return (error); 749 } 750 751 /* 752 * If there are processes sleeping on this descriptor, wake them up. 753 */ 754 static __inline void 755 bpf_wakeup(struct bpf_d *d) 756 { 757 758 BPFD_LOCK_ASSERT(d); 759 if (d->bd_state == BPF_WAITING) { 760 callout_stop(&d->bd_callout); 761 d->bd_state = BPF_IDLE; 762 } 763 wakeup(d); 764 if (d->bd_async && d->bd_sig && d->bd_sigio) 765 pgsigio(&d->bd_sigio, d->bd_sig, 0); 766 767 selwakeuppri(&d->bd_sel, PRINET); 768 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 769 } 770 771 static void 772 bpf_timed_out(void *arg) 773 { 774 struct bpf_d *d = (struct bpf_d *)arg; 775 776 BPFD_LOCK(d); 777 if (d->bd_state == BPF_WAITING) { 778 d->bd_state = BPF_TIMED_OUT; 779 if (d->bd_slen != 0) 780 bpf_wakeup(d); 781 } 782 BPFD_UNLOCK(d); 783 } 784 785 static int 786 bpf_ready(struct bpf_d *d) 787 { 788 789 BPFD_LOCK_ASSERT(d); 790 791 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 792 return (1); 793 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 794 d->bd_slen != 0) 795 return (1); 796 return (0); 797 } 798 799 static int 800 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 801 { 802 struct bpf_d *d = dev->si_drv1; 803 struct ifnet *ifp; 804 struct mbuf *m, *mc; 805 struct sockaddr dst; 806 int error, hlen; 807 808 d->bd_pid = curthread->td_proc->p_pid; 809 d->bd_wcount++; 810 if (d->bd_bif == NULL) { 811 d->bd_wdcount++; 812 return (ENXIO); 813 } 814 815 ifp = d->bd_bif->bif_ifp; 816 817 if ((ifp->if_flags & IFF_UP) == 0) { 818 d->bd_wdcount++; 819 return (ENETDOWN); 820 } 821 822 if (uio->uio_resid == 0) { 823 d->bd_wdcount++; 824 return (0); 825 } 826 827 bzero(&dst, sizeof(dst)); 828 m = NULL; 829 hlen = 0; 830 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 831 &m, &dst, &hlen, d->bd_wfilter); 832 if (error) { 833 d->bd_wdcount++; 834 return (error); 835 } 836 d->bd_wfcount++; 837 if (d->bd_hdrcmplt) 838 dst.sa_family = pseudo_AF_HDRCMPLT; 839 840 if (d->bd_feedback) { 841 mc = m_dup(m, M_DONTWAIT); 842 if (mc != NULL) 843 mc->m_pkthdr.rcvif = ifp; 844 /* Set M_PROMISC for outgoing packets to be discarded. */ 845 if (d->bd_direction == BPF_D_INOUT) 846 m->m_flags |= M_PROMISC; 847 } else 848 mc = NULL; 849 850 m->m_pkthdr.len -= hlen; 851 m->m_len -= hlen; 852 m->m_data += hlen; /* XXX */ 853 854 #ifdef MAC 855 BPFD_LOCK(d); 856 mac_bpfdesc_create_mbuf(d, m); 857 if (mc != NULL) 858 mac_bpfdesc_create_mbuf(d, mc); 859 BPFD_UNLOCK(d); 860 #endif 861 862 error = (*ifp->if_output)(ifp, m, &dst, NULL); 863 if (error) 864 d->bd_wdcount++; 865 866 if (mc != NULL) { 867 if (error == 0) 868 (*ifp->if_input)(ifp, mc); 869 else 870 m_freem(mc); 871 } 872 873 return (error); 874 } 875 876 /* 877 * Reset a descriptor by flushing its packet buffer and clearing the 878 * receive and drop counts. 879 */ 880 static void 881 reset_d(struct bpf_d *d) 882 { 883 884 mtx_assert(&d->bd_mtx, MA_OWNED); 885 if (d->bd_hbuf) { 886 /* Free the hold buffer. */ 887 d->bd_fbuf = d->bd_hbuf; 888 d->bd_hbuf = NULL; 889 } 890 d->bd_slen = 0; 891 d->bd_hlen = 0; 892 d->bd_rcount = 0; 893 d->bd_dcount = 0; 894 d->bd_fcount = 0; 895 d->bd_wcount = 0; 896 d->bd_wfcount = 0; 897 d->bd_wdcount = 0; 898 d->bd_zcopy = 0; 899 } 900 901 /* 902 * FIONREAD Check for read packet available. 903 * SIOCGIFADDR Get interface address - convenient hook to driver. 904 * BIOCGBLEN Get buffer len [for read()]. 905 * BIOCSETF Set ethernet read filter. 906 * BIOCSETWF Set ethernet write filter. 907 * BIOCFLUSH Flush read packet buffer. 908 * BIOCPROMISC Put interface into promiscuous mode. 909 * BIOCGDLT Get link layer type. 910 * BIOCGETIF Get interface name. 911 * BIOCSETIF Set interface. 912 * BIOCSRTIMEOUT Set read timeout. 913 * BIOCGRTIMEOUT Get read timeout. 914 * BIOCGSTATS Get packet stats. 915 * BIOCIMMEDIATE Set immediate mode. 916 * BIOCVERSION Get filter language version. 917 * BIOCGHDRCMPLT Get "header already complete" flag 918 * BIOCSHDRCMPLT Set "header already complete" flag 919 * BIOCGDIRECTION Get packet direction flag 920 * BIOCSDIRECTION Set packet direction flag 921 * BIOCLOCK Set "locked" flag 922 * BIOCFEEDBACK Set packet feedback mode. 923 * BIOCSETZBUF Set current zero-copy buffer locations. 924 * BIOCGETZMAX Get maximum zero-copy buffer size. 925 * BIOCROTZBUF Force rotation of zero-copy buffer 926 * BIOCSETBUFMODE Set buffer mode. 927 * BIOCGETBUFMODE Get current buffer mode. 928 */ 929 /* ARGSUSED */ 930 static int 931 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 932 struct thread *td) 933 { 934 struct bpf_d *d = dev->si_drv1; 935 int error = 0; 936 937 /* 938 * Refresh PID associated with this descriptor. 939 */ 940 BPFD_LOCK(d); 941 d->bd_pid = td->td_proc->p_pid; 942 if (d->bd_state == BPF_WAITING) 943 callout_stop(&d->bd_callout); 944 d->bd_state = BPF_IDLE; 945 BPFD_UNLOCK(d); 946 947 if (d->bd_locked == 1) { 948 switch (cmd) { 949 case BIOCGBLEN: 950 case BIOCFLUSH: 951 case BIOCGDLT: 952 case BIOCGDLTLIST: 953 case BIOCGETIF: 954 case BIOCGRTIMEOUT: 955 case BIOCGSTATS: 956 case BIOCVERSION: 957 case BIOCGRSIG: 958 case BIOCGHDRCMPLT: 959 case BIOCFEEDBACK: 960 case FIONREAD: 961 case BIOCLOCK: 962 case BIOCSRTIMEOUT: 963 case BIOCIMMEDIATE: 964 case TIOCGPGRP: 965 case BIOCROTZBUF: 966 break; 967 default: 968 return (EPERM); 969 } 970 } 971 switch (cmd) { 972 973 default: 974 error = EINVAL; 975 break; 976 977 /* 978 * Check for read packet available. 979 */ 980 case FIONREAD: 981 { 982 int n; 983 984 BPFD_LOCK(d); 985 n = d->bd_slen; 986 if (d->bd_hbuf) 987 n += d->bd_hlen; 988 BPFD_UNLOCK(d); 989 990 *(int *)addr = n; 991 break; 992 } 993 994 case SIOCGIFADDR: 995 { 996 struct ifnet *ifp; 997 998 if (d->bd_bif == NULL) 999 error = EINVAL; 1000 else { 1001 ifp = d->bd_bif->bif_ifp; 1002 error = (*ifp->if_ioctl)(ifp, cmd, addr); 1003 } 1004 break; 1005 } 1006 1007 /* 1008 * Get buffer len [for read()]. 1009 */ 1010 case BIOCGBLEN: 1011 *(u_int *)addr = d->bd_bufsize; 1012 break; 1013 1014 /* 1015 * Set buffer length. 1016 */ 1017 case BIOCSBLEN: 1018 error = bpf_ioctl_sblen(d, (u_int *)addr); 1019 break; 1020 1021 /* 1022 * Set link layer read filter. 1023 */ 1024 case BIOCSETF: 1025 case BIOCSETWF: 1026 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1027 break; 1028 1029 /* 1030 * Flush read packet buffer. 1031 */ 1032 case BIOCFLUSH: 1033 BPFD_LOCK(d); 1034 reset_d(d); 1035 BPFD_UNLOCK(d); 1036 break; 1037 1038 /* 1039 * Put interface into promiscuous mode. 1040 */ 1041 case BIOCPROMISC: 1042 if (d->bd_bif == NULL) { 1043 /* 1044 * No interface attached yet. 1045 */ 1046 error = EINVAL; 1047 break; 1048 } 1049 if (d->bd_promisc == 0) { 1050 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1051 if (error == 0) 1052 d->bd_promisc = 1; 1053 } 1054 break; 1055 1056 /* 1057 * Get current data link type. 1058 */ 1059 case BIOCGDLT: 1060 if (d->bd_bif == NULL) 1061 error = EINVAL; 1062 else 1063 *(u_int *)addr = d->bd_bif->bif_dlt; 1064 break; 1065 1066 /* 1067 * Get a list of supported data link types. 1068 */ 1069 case BIOCGDLTLIST: 1070 if (d->bd_bif == NULL) 1071 error = EINVAL; 1072 else 1073 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1074 break; 1075 1076 /* 1077 * Set data link type. 1078 */ 1079 case BIOCSDLT: 1080 if (d->bd_bif == NULL) 1081 error = EINVAL; 1082 else 1083 error = bpf_setdlt(d, *(u_int *)addr); 1084 break; 1085 1086 /* 1087 * Get interface name. 1088 */ 1089 case BIOCGETIF: 1090 if (d->bd_bif == NULL) 1091 error = EINVAL; 1092 else { 1093 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1094 struct ifreq *const ifr = (struct ifreq *)addr; 1095 1096 strlcpy(ifr->ifr_name, ifp->if_xname, 1097 sizeof(ifr->ifr_name)); 1098 } 1099 break; 1100 1101 /* 1102 * Set interface. 1103 */ 1104 case BIOCSETIF: 1105 error = bpf_setif(d, (struct ifreq *)addr); 1106 break; 1107 1108 /* 1109 * Set read timeout. 1110 */ 1111 case BIOCSRTIMEOUT: 1112 { 1113 struct timeval *tv = (struct timeval *)addr; 1114 1115 /* 1116 * Subtract 1 tick from tvtohz() since this isn't 1117 * a one-shot timer. 1118 */ 1119 if ((error = itimerfix(tv)) == 0) 1120 d->bd_rtout = tvtohz(tv) - 1; 1121 break; 1122 } 1123 1124 /* 1125 * Get read timeout. 1126 */ 1127 case BIOCGRTIMEOUT: 1128 { 1129 struct timeval *tv = (struct timeval *)addr; 1130 1131 tv->tv_sec = d->bd_rtout / hz; 1132 tv->tv_usec = (d->bd_rtout % hz) * tick; 1133 break; 1134 } 1135 1136 /* 1137 * Get packet stats. 1138 */ 1139 case BIOCGSTATS: 1140 { 1141 struct bpf_stat *bs = (struct bpf_stat *)addr; 1142 1143 /* XXXCSJP overflow */ 1144 bs->bs_recv = d->bd_rcount; 1145 bs->bs_drop = d->bd_dcount; 1146 break; 1147 } 1148 1149 /* 1150 * Set immediate mode. 1151 */ 1152 case BIOCIMMEDIATE: 1153 d->bd_immediate = *(u_int *)addr; 1154 break; 1155 1156 case BIOCVERSION: 1157 { 1158 struct bpf_version *bv = (struct bpf_version *)addr; 1159 1160 bv->bv_major = BPF_MAJOR_VERSION; 1161 bv->bv_minor = BPF_MINOR_VERSION; 1162 break; 1163 } 1164 1165 /* 1166 * Get "header already complete" flag 1167 */ 1168 case BIOCGHDRCMPLT: 1169 *(u_int *)addr = d->bd_hdrcmplt; 1170 break; 1171 1172 /* 1173 * Set "header already complete" flag 1174 */ 1175 case BIOCSHDRCMPLT: 1176 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1177 break; 1178 1179 /* 1180 * Get packet direction flag 1181 */ 1182 case BIOCGDIRECTION: 1183 *(u_int *)addr = d->bd_direction; 1184 break; 1185 1186 /* 1187 * Set packet direction flag 1188 */ 1189 case BIOCSDIRECTION: 1190 { 1191 u_int direction; 1192 1193 direction = *(u_int *)addr; 1194 switch (direction) { 1195 case BPF_D_IN: 1196 case BPF_D_INOUT: 1197 case BPF_D_OUT: 1198 d->bd_direction = direction; 1199 break; 1200 default: 1201 error = EINVAL; 1202 } 1203 } 1204 break; 1205 1206 case BIOCFEEDBACK: 1207 d->bd_feedback = *(u_int *)addr; 1208 break; 1209 1210 case BIOCLOCK: 1211 d->bd_locked = 1; 1212 break; 1213 1214 case FIONBIO: /* Non-blocking I/O */ 1215 break; 1216 1217 case FIOASYNC: /* Send signal on receive packets */ 1218 d->bd_async = *(int *)addr; 1219 break; 1220 1221 case FIOSETOWN: 1222 error = fsetown(*(int *)addr, &d->bd_sigio); 1223 break; 1224 1225 case FIOGETOWN: 1226 *(int *)addr = fgetown(&d->bd_sigio); 1227 break; 1228 1229 /* This is deprecated, FIOSETOWN should be used instead. */ 1230 case TIOCSPGRP: 1231 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1232 break; 1233 1234 /* This is deprecated, FIOGETOWN should be used instead. */ 1235 case TIOCGPGRP: 1236 *(int *)addr = -fgetown(&d->bd_sigio); 1237 break; 1238 1239 case BIOCSRSIG: /* Set receive signal */ 1240 { 1241 u_int sig; 1242 1243 sig = *(u_int *)addr; 1244 1245 if (sig >= NSIG) 1246 error = EINVAL; 1247 else 1248 d->bd_sig = sig; 1249 break; 1250 } 1251 case BIOCGRSIG: 1252 *(u_int *)addr = d->bd_sig; 1253 break; 1254 1255 case BIOCGETBUFMODE: 1256 *(u_int *)addr = d->bd_bufmode; 1257 break; 1258 1259 case BIOCSETBUFMODE: 1260 /* 1261 * Allow the buffering mode to be changed as long as we 1262 * haven't yet committed to a particular mode. Our 1263 * definition of commitment, for now, is whether or not a 1264 * buffer has been allocated or an interface attached, since 1265 * that's the point where things get tricky. 1266 */ 1267 switch (*(u_int *)addr) { 1268 case BPF_BUFMODE_BUFFER: 1269 break; 1270 1271 case BPF_BUFMODE_ZBUF: 1272 if (bpf_zerocopy_enable) 1273 break; 1274 /* FALLSTHROUGH */ 1275 1276 default: 1277 return (EINVAL); 1278 } 1279 1280 BPFD_LOCK(d); 1281 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1282 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1283 BPFD_UNLOCK(d); 1284 return (EBUSY); 1285 } 1286 d->bd_bufmode = *(u_int *)addr; 1287 BPFD_UNLOCK(d); 1288 break; 1289 1290 case BIOCGETZMAX: 1291 return (bpf_ioctl_getzmax(td, d, (size_t *)addr)); 1292 1293 case BIOCSETZBUF: 1294 return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr)); 1295 1296 case BIOCROTZBUF: 1297 return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr)); 1298 } 1299 return (error); 1300 } 1301 1302 /* 1303 * Set d's packet filter program to fp. If this file already has a filter, 1304 * free it and replace it. Returns EINVAL for bogus requests. 1305 */ 1306 static int 1307 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1308 { 1309 struct bpf_insn *fcode, *old; 1310 u_int wfilter, flen, size; 1311 #ifdef BPF_JITTER 1312 bpf_jit_filter *ofunc; 1313 #endif 1314 1315 if (cmd == BIOCSETWF) { 1316 old = d->bd_wfilter; 1317 wfilter = 1; 1318 #ifdef BPF_JITTER 1319 ofunc = NULL; 1320 #endif 1321 } else { 1322 wfilter = 0; 1323 old = d->bd_rfilter; 1324 #ifdef BPF_JITTER 1325 ofunc = d->bd_bfilter; 1326 #endif 1327 } 1328 if (fp->bf_insns == NULL) { 1329 if (fp->bf_len != 0) 1330 return (EINVAL); 1331 BPFD_LOCK(d); 1332 if (wfilter) 1333 d->bd_wfilter = NULL; 1334 else { 1335 d->bd_rfilter = NULL; 1336 #ifdef BPF_JITTER 1337 d->bd_bfilter = NULL; 1338 #endif 1339 } 1340 reset_d(d); 1341 BPFD_UNLOCK(d); 1342 if (old != NULL) 1343 free((caddr_t)old, M_BPF); 1344 #ifdef BPF_JITTER 1345 if (ofunc != NULL) 1346 bpf_destroy_jit_filter(ofunc); 1347 #endif 1348 return (0); 1349 } 1350 flen = fp->bf_len; 1351 if (flen > bpf_maxinsns) 1352 return (EINVAL); 1353 1354 size = flen * sizeof(*fp->bf_insns); 1355 fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK); 1356 if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && 1357 bpf_validate(fcode, (int)flen)) { 1358 BPFD_LOCK(d); 1359 if (wfilter) 1360 d->bd_wfilter = fcode; 1361 else { 1362 d->bd_rfilter = fcode; 1363 #ifdef BPF_JITTER 1364 d->bd_bfilter = bpf_jitter(fcode, flen); 1365 #endif 1366 } 1367 reset_d(d); 1368 BPFD_UNLOCK(d); 1369 if (old != NULL) 1370 free((caddr_t)old, M_BPF); 1371 #ifdef BPF_JITTER 1372 if (ofunc != NULL) 1373 bpf_destroy_jit_filter(ofunc); 1374 #endif 1375 1376 return (0); 1377 } 1378 free((caddr_t)fcode, M_BPF); 1379 return (EINVAL); 1380 } 1381 1382 /* 1383 * Detach a file from its current interface (if attached at all) and attach 1384 * to the interface indicated by the name stored in ifr. 1385 * Return an errno or 0. 1386 */ 1387 static int 1388 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1389 { 1390 struct bpf_if *bp; 1391 struct ifnet *theywant; 1392 1393 theywant = ifunit(ifr->ifr_name); 1394 if (theywant == NULL || theywant->if_bpf == NULL) 1395 return (ENXIO); 1396 1397 bp = theywant->if_bpf; 1398 1399 /* 1400 * Behavior here depends on the buffering model. If we're using 1401 * kernel memory buffers, then we can allocate them here. If we're 1402 * using zero-copy, then the user process must have registered 1403 * buffers by the time we get here. If not, return an error. 1404 * 1405 * XXXRW: There are locking issues here with multi-threaded use: what 1406 * if two threads try to set the interface at once? 1407 */ 1408 switch (d->bd_bufmode) { 1409 case BPF_BUFMODE_BUFFER: 1410 if (d->bd_sbuf == NULL) 1411 bpf_buffer_alloc(d); 1412 KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL")); 1413 break; 1414 1415 case BPF_BUFMODE_ZBUF: 1416 if (d->bd_sbuf == NULL) 1417 return (EINVAL); 1418 break; 1419 1420 default: 1421 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1422 } 1423 if (bp != d->bd_bif) { 1424 if (d->bd_bif) 1425 /* 1426 * Detach if attached to something else. 1427 */ 1428 bpf_detachd(d); 1429 1430 bpf_attachd(d, bp); 1431 } 1432 BPFD_LOCK(d); 1433 reset_d(d); 1434 BPFD_UNLOCK(d); 1435 return (0); 1436 } 1437 1438 /* 1439 * Support for select() and poll() system calls 1440 * 1441 * Return true iff the specific operation will not block indefinitely. 1442 * Otherwise, return false but make a note that a selwakeup() must be done. 1443 */ 1444 static int 1445 bpfpoll(struct cdev *dev, int events, struct thread *td) 1446 { 1447 struct bpf_d *d; 1448 int revents; 1449 1450 d = dev->si_drv1; 1451 if (d->bd_bif == NULL) 1452 return (ENXIO); 1453 1454 /* 1455 * Refresh PID associated with this descriptor. 1456 */ 1457 revents = events & (POLLOUT | POLLWRNORM); 1458 BPFD_LOCK(d); 1459 d->bd_pid = td->td_proc->p_pid; 1460 if (events & (POLLIN | POLLRDNORM)) { 1461 if (bpf_ready(d)) 1462 revents |= events & (POLLIN | POLLRDNORM); 1463 else { 1464 selrecord(td, &d->bd_sel); 1465 /* Start the read timeout if necessary. */ 1466 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1467 callout_reset(&d->bd_callout, d->bd_rtout, 1468 bpf_timed_out, d); 1469 d->bd_state = BPF_WAITING; 1470 } 1471 } 1472 } 1473 BPFD_UNLOCK(d); 1474 return (revents); 1475 } 1476 1477 /* 1478 * Support for kevent() system call. Register EVFILT_READ filters and 1479 * reject all others. 1480 */ 1481 int 1482 bpfkqfilter(struct cdev *dev, struct knote *kn) 1483 { 1484 struct bpf_d *d = (struct bpf_d *)dev->si_drv1; 1485 1486 if (kn->kn_filter != EVFILT_READ) 1487 return (1); 1488 1489 /* 1490 * Refresh PID associated with this descriptor. 1491 */ 1492 BPFD_LOCK(d); 1493 d->bd_pid = curthread->td_proc->p_pid; 1494 kn->kn_fop = &bpfread_filtops; 1495 kn->kn_hook = d; 1496 knlist_add(&d->bd_sel.si_note, kn, 1); 1497 BPFD_UNLOCK(d); 1498 1499 return (0); 1500 } 1501 1502 static void 1503 filt_bpfdetach(struct knote *kn) 1504 { 1505 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1506 1507 knlist_remove(&d->bd_sel.si_note, kn, 0); 1508 } 1509 1510 static int 1511 filt_bpfread(struct knote *kn, long hint) 1512 { 1513 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 1514 int ready; 1515 1516 BPFD_LOCK_ASSERT(d); 1517 ready = bpf_ready(d); 1518 if (ready) { 1519 kn->kn_data = d->bd_slen; 1520 if (d->bd_hbuf) 1521 kn->kn_data += d->bd_hlen; 1522 } 1523 else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1524 callout_reset(&d->bd_callout, d->bd_rtout, 1525 bpf_timed_out, d); 1526 d->bd_state = BPF_WAITING; 1527 } 1528 1529 return (ready); 1530 } 1531 1532 /* 1533 * Incoming linkage from device drivers. Process the packet pkt, of length 1534 * pktlen, which is stored in a contiguous buffer. The packet is parsed 1535 * by each process' filter, and if accepted, stashed into the corresponding 1536 * buffer. 1537 */ 1538 void 1539 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 1540 { 1541 struct bpf_d *d; 1542 u_int slen; 1543 int gottime; 1544 struct timeval tv; 1545 1546 gottime = 0; 1547 BPFIF_LOCK(bp); 1548 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1549 BPFD_LOCK(d); 1550 ++d->bd_rcount; 1551 #ifdef BPF_JITTER 1552 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL) 1553 slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen); 1554 else 1555 #endif 1556 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 1557 if (slen != 0) { 1558 d->bd_fcount++; 1559 if (!gottime) { 1560 microtime(&tv); 1561 gottime = 1; 1562 } 1563 #ifdef MAC 1564 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1565 #endif 1566 catchpacket(d, pkt, pktlen, slen, 1567 bpf_append_bytes, &tv); 1568 } 1569 BPFD_UNLOCK(d); 1570 } 1571 BPFIF_UNLOCK(bp); 1572 } 1573 1574 #define BPF_CHECK_DIRECTION(d, i) \ 1575 (((d)->bd_direction == BPF_D_IN && (i) == NULL) || \ 1576 ((d)->bd_direction == BPF_D_OUT && (i) != NULL)) 1577 1578 /* 1579 * Incoming linkage from device drivers, when packet is in an mbuf chain. 1580 */ 1581 void 1582 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 1583 { 1584 struct bpf_d *d; 1585 u_int pktlen, slen; 1586 int gottime; 1587 struct timeval tv; 1588 1589 /* Skip outgoing duplicate packets. */ 1590 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1591 m->m_flags &= ~M_PROMISC; 1592 return; 1593 } 1594 1595 gottime = 0; 1596 1597 pktlen = m_length(m, NULL); 1598 1599 BPFIF_LOCK(bp); 1600 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1601 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif)) 1602 continue; 1603 BPFD_LOCK(d); 1604 ++d->bd_rcount; 1605 #ifdef BPF_JITTER 1606 /* XXX We cannot handle multiple mbufs. */ 1607 if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL && 1608 m->m_next == NULL) 1609 slen = (*(d->bd_bfilter->func))(mtod(m, u_char *), 1610 pktlen, pktlen); 1611 else 1612 #endif 1613 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 1614 if (slen != 0) { 1615 d->bd_fcount++; 1616 if (!gottime) { 1617 microtime(&tv); 1618 gottime = 1; 1619 } 1620 #ifdef MAC 1621 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1622 #endif 1623 catchpacket(d, (u_char *)m, pktlen, slen, 1624 bpf_append_mbuf, &tv); 1625 } 1626 BPFD_UNLOCK(d); 1627 } 1628 BPFIF_UNLOCK(bp); 1629 } 1630 1631 /* 1632 * Incoming linkage from device drivers, when packet is in 1633 * an mbuf chain and to be prepended by a contiguous header. 1634 */ 1635 void 1636 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 1637 { 1638 struct mbuf mb; 1639 struct bpf_d *d; 1640 u_int pktlen, slen; 1641 int gottime; 1642 struct timeval tv; 1643 1644 /* Skip outgoing duplicate packets. */ 1645 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 1646 m->m_flags &= ~M_PROMISC; 1647 return; 1648 } 1649 1650 gottime = 0; 1651 1652 pktlen = m_length(m, NULL); 1653 /* 1654 * Craft on-stack mbuf suitable for passing to bpf_filter. 1655 * Note that we cut corners here; we only setup what's 1656 * absolutely needed--this mbuf should never go anywhere else. 1657 */ 1658 mb.m_next = m; 1659 mb.m_data = data; 1660 mb.m_len = dlen; 1661 pktlen += dlen; 1662 1663 BPFIF_LOCK(bp); 1664 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1665 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif)) 1666 continue; 1667 BPFD_LOCK(d); 1668 ++d->bd_rcount; 1669 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 1670 if (slen != 0) { 1671 d->bd_fcount++; 1672 if (!gottime) { 1673 microtime(&tv); 1674 gottime = 1; 1675 } 1676 #ifdef MAC 1677 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 1678 #endif 1679 catchpacket(d, (u_char *)&mb, pktlen, slen, 1680 bpf_append_mbuf, &tv); 1681 } 1682 BPFD_UNLOCK(d); 1683 } 1684 BPFIF_UNLOCK(bp); 1685 } 1686 1687 #undef BPF_CHECK_DIRECTION 1688 1689 /* 1690 * Move the packet data from interface memory (pkt) into the 1691 * store buffer. "cpfn" is the routine called to do the actual data 1692 * transfer. bcopy is passed in to copy contiguous chunks, while 1693 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 1694 * pkt is really an mbuf. 1695 */ 1696 static void 1697 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 1698 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 1699 struct timeval *tv) 1700 { 1701 struct bpf_hdr hdr; 1702 int totlen, curlen; 1703 int hdrlen = d->bd_bif->bif_hdrlen; 1704 int do_wakeup = 0; 1705 1706 BPFD_LOCK_ASSERT(d); 1707 1708 /* 1709 * Detect whether user space has released a buffer back to us, and if 1710 * so, move it from being a hold buffer to a free buffer. This may 1711 * not be the best place to do it (for example, we might only want to 1712 * run this check if we need the space), but for now it's a reliable 1713 * spot to do it. 1714 */ 1715 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 1716 d->bd_fbuf = d->bd_hbuf; 1717 d->bd_hbuf = NULL; 1718 d->bd_hlen = 0; 1719 } 1720 1721 /* 1722 * Figure out how many bytes to move. If the packet is 1723 * greater or equal to the snapshot length, transfer that 1724 * much. Otherwise, transfer the whole packet (unless 1725 * we hit the buffer size limit). 1726 */ 1727 totlen = hdrlen + min(snaplen, pktlen); 1728 if (totlen > d->bd_bufsize) 1729 totlen = d->bd_bufsize; 1730 1731 /* 1732 * Round up the end of the previous packet to the next longword. 1733 * 1734 * Drop the packet if there's no room and no hope of room 1735 * If the packet would overflow the storage buffer or the storage 1736 * buffer is considered immutable by the buffer model, try to rotate 1737 * the buffer and wakeup pending processes. 1738 */ 1739 curlen = BPF_WORDALIGN(d->bd_slen); 1740 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 1741 if (d->bd_fbuf == NULL) { 1742 /* 1743 * There's no room in the store buffer, and no 1744 * prospect of room, so drop the packet. Notify the 1745 * buffer model. 1746 */ 1747 bpf_buffull(d); 1748 ++d->bd_dcount; 1749 return; 1750 } 1751 ROTATE_BUFFERS(d); 1752 do_wakeup = 1; 1753 curlen = 0; 1754 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 1755 /* 1756 * Immediate mode is set, or the read timeout has already 1757 * expired during a select call. A packet arrived, so the 1758 * reader should be woken up. 1759 */ 1760 do_wakeup = 1; 1761 1762 /* 1763 * Append the bpf header. Note we append the actual header size, but 1764 * move forward the length of the header plus padding. 1765 */ 1766 bzero(&hdr, sizeof(hdr)); 1767 hdr.bh_tstamp = *tv; 1768 hdr.bh_datalen = pktlen; 1769 hdr.bh_hdrlen = hdrlen; 1770 hdr.bh_caplen = totlen - hdrlen; 1771 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 1772 1773 /* 1774 * Copy the packet data into the store buffer and update its length. 1775 */ 1776 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen); 1777 d->bd_slen = curlen + totlen; 1778 1779 if (do_wakeup) 1780 bpf_wakeup(d); 1781 } 1782 1783 /* 1784 * Free buffers currently in use by a descriptor. 1785 * Called on close. 1786 */ 1787 static void 1788 bpf_freed(struct bpf_d *d) 1789 { 1790 1791 /* 1792 * We don't need to lock out interrupts since this descriptor has 1793 * been detached from its interface and it yet hasn't been marked 1794 * free. 1795 */ 1796 bpf_free(d); 1797 if (d->bd_rfilter) { 1798 free((caddr_t)d->bd_rfilter, M_BPF); 1799 #ifdef BPF_JITTER 1800 bpf_destroy_jit_filter(d->bd_bfilter); 1801 #endif 1802 } 1803 if (d->bd_wfilter) 1804 free((caddr_t)d->bd_wfilter, M_BPF); 1805 mtx_destroy(&d->bd_mtx); 1806 } 1807 1808 /* 1809 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 1810 * fixed size of the link header (variable length headers not yet supported). 1811 */ 1812 void 1813 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 1814 { 1815 1816 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 1817 } 1818 1819 /* 1820 * Attach an interface to bpf. ifp is a pointer to the structure 1821 * defining the interface to be attached, dlt is the link layer type, 1822 * and hdrlen is the fixed size of the link header (variable length 1823 * headers are not yet supporrted). 1824 */ 1825 void 1826 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 1827 { 1828 struct bpf_if *bp; 1829 1830 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 1831 if (bp == NULL) 1832 panic("bpfattach"); 1833 1834 LIST_INIT(&bp->bif_dlist); 1835 bp->bif_ifp = ifp; 1836 bp->bif_dlt = dlt; 1837 mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF); 1838 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 1839 *driverp = bp; 1840 1841 mtx_lock(&bpf_mtx); 1842 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 1843 mtx_unlock(&bpf_mtx); 1844 1845 /* 1846 * Compute the length of the bpf header. This is not necessarily 1847 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1848 * that the network layer header begins on a longword boundary (for 1849 * performance reasons and to alleviate alignment restrictions). 1850 */ 1851 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1852 1853 if (bootverbose) 1854 if_printf(ifp, "bpf attached\n"); 1855 } 1856 1857 /* 1858 * Detach bpf from an interface. This involves detaching each descriptor 1859 * associated with the interface, and leaving bd_bif NULL. Notify each 1860 * descriptor as it's detached so that any sleepers wake up and get 1861 * ENXIO. 1862 */ 1863 void 1864 bpfdetach(struct ifnet *ifp) 1865 { 1866 struct bpf_if *bp; 1867 struct bpf_d *d; 1868 1869 /* Locate BPF interface information */ 1870 mtx_lock(&bpf_mtx); 1871 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1872 if (ifp == bp->bif_ifp) 1873 break; 1874 } 1875 1876 /* Interface wasn't attached */ 1877 if ((bp == NULL) || (bp->bif_ifp == NULL)) { 1878 mtx_unlock(&bpf_mtx); 1879 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 1880 return; 1881 } 1882 1883 LIST_REMOVE(bp, bif_next); 1884 mtx_unlock(&bpf_mtx); 1885 1886 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 1887 bpf_detachd(d); 1888 BPFD_LOCK(d); 1889 bpf_wakeup(d); 1890 BPFD_UNLOCK(d); 1891 } 1892 1893 mtx_destroy(&bp->bif_mtx); 1894 free(bp, M_BPF); 1895 } 1896 1897 /* 1898 * Get a list of available data link type of the interface. 1899 */ 1900 static int 1901 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1902 { 1903 int n, error; 1904 struct ifnet *ifp; 1905 struct bpf_if *bp; 1906 1907 ifp = d->bd_bif->bif_ifp; 1908 n = 0; 1909 error = 0; 1910 mtx_lock(&bpf_mtx); 1911 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1912 if (bp->bif_ifp != ifp) 1913 continue; 1914 if (bfl->bfl_list != NULL) { 1915 if (n >= bfl->bfl_len) { 1916 mtx_unlock(&bpf_mtx); 1917 return (ENOMEM); 1918 } 1919 error = copyout(&bp->bif_dlt, 1920 bfl->bfl_list + n, sizeof(u_int)); 1921 } 1922 n++; 1923 } 1924 mtx_unlock(&bpf_mtx); 1925 bfl->bfl_len = n; 1926 return (error); 1927 } 1928 1929 /* 1930 * Set the data link type of a BPF instance. 1931 */ 1932 static int 1933 bpf_setdlt(struct bpf_d *d, u_int dlt) 1934 { 1935 int error, opromisc; 1936 struct ifnet *ifp; 1937 struct bpf_if *bp; 1938 1939 if (d->bd_bif->bif_dlt == dlt) 1940 return (0); 1941 ifp = d->bd_bif->bif_ifp; 1942 mtx_lock(&bpf_mtx); 1943 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1944 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 1945 break; 1946 } 1947 mtx_unlock(&bpf_mtx); 1948 if (bp != NULL) { 1949 opromisc = d->bd_promisc; 1950 bpf_detachd(d); 1951 bpf_attachd(d, bp); 1952 BPFD_LOCK(d); 1953 reset_d(d); 1954 BPFD_UNLOCK(d); 1955 if (opromisc) { 1956 error = ifpromisc(bp->bif_ifp, 1); 1957 if (error) 1958 if_printf(bp->bif_ifp, 1959 "bpf_setdlt: ifpromisc failed (%d)\n", 1960 error); 1961 else 1962 d->bd_promisc = 1; 1963 } 1964 } 1965 return (bp == NULL ? EINVAL : 0); 1966 } 1967 1968 static void 1969 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen, 1970 struct cdev **dev) 1971 { 1972 int u; 1973 1974 if (*dev != NULL) 1975 return; 1976 if (dev_stdclone(name, NULL, "bpf", &u) != 1) 1977 return; 1978 *dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600, 1979 "bpf%d", u); 1980 dev_ref(*dev); 1981 (*dev)->si_flags |= SI_CHEAPCLONE; 1982 return; 1983 } 1984 1985 static void 1986 bpf_drvinit(void *unused) 1987 { 1988 1989 mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF); 1990 LIST_INIT(&bpf_iflist); 1991 EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000); 1992 } 1993 1994 static void 1995 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 1996 { 1997 1998 bzero(d, sizeof(*d)); 1999 BPFD_LOCK_ASSERT(bd); 2000 d->bd_structsize = sizeof(*d); 2001 d->bd_immediate = bd->bd_immediate; 2002 d->bd_promisc = bd->bd_promisc; 2003 d->bd_hdrcmplt = bd->bd_hdrcmplt; 2004 d->bd_direction = bd->bd_direction; 2005 d->bd_feedback = bd->bd_feedback; 2006 d->bd_async = bd->bd_async; 2007 d->bd_rcount = bd->bd_rcount; 2008 d->bd_dcount = bd->bd_dcount; 2009 d->bd_fcount = bd->bd_fcount; 2010 d->bd_sig = bd->bd_sig; 2011 d->bd_slen = bd->bd_slen; 2012 d->bd_hlen = bd->bd_hlen; 2013 d->bd_bufsize = bd->bd_bufsize; 2014 d->bd_pid = bd->bd_pid; 2015 strlcpy(d->bd_ifname, 2016 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2017 d->bd_locked = bd->bd_locked; 2018 d->bd_wcount = bd->bd_wcount; 2019 d->bd_wdcount = bd->bd_wdcount; 2020 d->bd_wfcount = bd->bd_wfcount; 2021 d->bd_zcopy = bd->bd_zcopy; 2022 d->bd_bufmode = bd->bd_bufmode; 2023 } 2024 2025 static int 2026 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2027 { 2028 struct xbpf_d *xbdbuf, *xbd; 2029 int index, error; 2030 struct bpf_if *bp; 2031 struct bpf_d *bd; 2032 2033 /* 2034 * XXX This is not technically correct. It is possible for non 2035 * privileged users to open bpf devices. It would make sense 2036 * if the users who opened the devices were able to retrieve 2037 * the statistics for them, too. 2038 */ 2039 error = priv_check(req->td, PRIV_NET_BPF); 2040 if (error) 2041 return (error); 2042 if (req->oldptr == NULL) 2043 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2044 if (bpf_bpfd_cnt == 0) 2045 return (SYSCTL_OUT(req, 0, 0)); 2046 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2047 mtx_lock(&bpf_mtx); 2048 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2049 mtx_unlock(&bpf_mtx); 2050 free(xbdbuf, M_BPF); 2051 return (ENOMEM); 2052 } 2053 index = 0; 2054 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2055 BPFIF_LOCK(bp); 2056 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2057 xbd = &xbdbuf[index++]; 2058 BPFD_LOCK(bd); 2059 bpfstats_fill_xbpf(xbd, bd); 2060 BPFD_UNLOCK(bd); 2061 } 2062 BPFIF_UNLOCK(bp); 2063 } 2064 mtx_unlock(&bpf_mtx); 2065 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2066 free(xbdbuf, M_BPF); 2067 return (error); 2068 } 2069 2070 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2071 2072 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2073 /* 2074 * NOP stubs to allow bpf-using drivers to load and function. 2075 * 2076 * A 'better' implementation would allow the core bpf functionality 2077 * to be loaded at runtime. 2078 */ 2079 static struct bpf_if bp_null; 2080 2081 void 2082 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2083 { 2084 } 2085 2086 void 2087 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2088 { 2089 } 2090 2091 void 2092 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 2093 { 2094 } 2095 2096 void 2097 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2098 { 2099 2100 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2101 } 2102 2103 void 2104 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2105 { 2106 2107 *driverp = &bp_null; 2108 } 2109 2110 void 2111 bpfdetach(struct ifnet *ifp) 2112 { 2113 } 2114 2115 u_int 2116 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 2117 { 2118 return -1; /* "no filter" behaviour */ 2119 } 2120 2121 int 2122 bpf_validate(const struct bpf_insn *f, int len) 2123 { 2124 return 0; /* false */ 2125 } 2126 2127 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 2128