1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_bpf.h" 43 #include "opt_ddb.h" 44 #include "opt_netgraph.h" 45 46 #include <sys/types.h> 47 #include <sys/param.h> 48 #include <sys/lock.h> 49 #include <sys/rwlock.h> 50 #include <sys/systm.h> 51 #include <sys/conf.h> 52 #include <sys/fcntl.h> 53 #include <sys/jail.h> 54 #include <sys/malloc.h> 55 #include <sys/mbuf.h> 56 #include <sys/time.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/signalvar.h> 60 #include <sys/filio.h> 61 #include <sys/sockio.h> 62 #include <sys/ttycom.h> 63 #include <sys/uio.h> 64 #include <sys/sysent.h> 65 66 #include <sys/event.h> 67 #include <sys/file.h> 68 #include <sys/poll.h> 69 #include <sys/proc.h> 70 71 #include <sys/socket.h> 72 73 #ifdef DDB 74 #include <ddb/ddb.h> 75 #endif 76 77 #include <net/if.h> 78 #include <net/if_var.h> 79 #include <net/if_dl.h> 80 #include <net/bpf.h> 81 #include <net/bpf_buffer.h> 82 #ifdef BPF_JITTER 83 #include <net/bpf_jitter.h> 84 #endif 85 #include <net/bpf_zerocopy.h> 86 #include <net/bpfdesc.h> 87 #include <net/route.h> 88 #include <net/vnet.h> 89 90 #include <netinet/in.h> 91 #include <netinet/if_ether.h> 92 #include <sys/kernel.h> 93 #include <sys/sysctl.h> 94 95 #include <net80211/ieee80211_freebsd.h> 96 97 #include <security/mac/mac_framework.h> 98 99 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 100 101 static struct bpf_if_ext dead_bpf_if = { 102 .bif_dlist = LIST_HEAD_INITIALIZER() 103 }; 104 105 struct bpf_if { 106 #define bif_next bif_ext.bif_next 107 #define bif_dlist bif_ext.bif_dlist 108 struct bpf_if_ext bif_ext; /* public members */ 109 u_int bif_dlt; /* link layer type */ 110 u_int bif_hdrlen; /* length of link header */ 111 struct ifnet *bif_ifp; /* corresponding interface */ 112 struct rwlock bif_lock; /* interface lock */ 113 LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */ 114 int bif_flags; /* Interface flags */ 115 struct bpf_if **bif_bpf; /* Pointer to pointer to us */ 116 }; 117 118 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); 119 120 #define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock) 121 #define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock) 122 #define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock) 123 #define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock) 124 125 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 126 127 #define PRINET 26 /* interruptible */ 128 129 #define SIZEOF_BPF_HDR(type) \ 130 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) 131 132 #ifdef COMPAT_FREEBSD32 133 #include <sys/mount.h> 134 #include <compat/freebsd32/freebsd32.h> 135 #define BPF_ALIGNMENT32 sizeof(int32_t) 136 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) 137 138 #ifndef BURN_BRIDGES 139 /* 140 * 32-bit version of structure prepended to each packet. We use this header 141 * instead of the standard one for 32-bit streams. We mark the a stream as 142 * 32-bit the first time we see a 32-bit compat ioctl request. 143 */ 144 struct bpf_hdr32 { 145 struct timeval32 bh_tstamp; /* time stamp */ 146 uint32_t bh_caplen; /* length of captured portion */ 147 uint32_t bh_datalen; /* original length of packet */ 148 uint16_t bh_hdrlen; /* length of bpf header (this struct 149 plus alignment padding) */ 150 }; 151 #endif 152 153 struct bpf_program32 { 154 u_int bf_len; 155 uint32_t bf_insns; 156 }; 157 158 struct bpf_dltlist32 { 159 u_int bfl_len; 160 u_int bfl_list; 161 }; 162 163 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) 164 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) 165 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) 166 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) 167 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) 168 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) 169 #endif 170 171 #define BPF_LOCK() sx_xlock(&bpf_sx) 172 #define BPF_UNLOCK() sx_xunlock(&bpf_sx) 173 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) 174 /* 175 * bpf_iflist is a list of BPF interface structures, each corresponding to a 176 * specific DLT. The same network interface might have several BPF interface 177 * structures registered by different layers in the stack (i.e., 802.11 178 * frames, ethernet frames, etc). 179 */ 180 static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist; 181 static struct sx bpf_sx; /* bpf global lock */ 182 static int bpf_bpfd_cnt; 183 184 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 185 static void bpf_detachd(struct bpf_d *); 186 static void bpf_detachd_locked(struct bpf_d *); 187 static void bpf_freed(struct bpf_d *); 188 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 189 struct sockaddr *, int *, struct bpf_d *); 190 static int bpf_setif(struct bpf_d *, struct ifreq *); 191 static void bpf_timed_out(void *); 192 static __inline void 193 bpf_wakeup(struct bpf_d *); 194 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 195 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 196 struct bintime *); 197 static void reset_d(struct bpf_d *); 198 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 199 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 200 static int bpf_setdlt(struct bpf_d *, u_int); 201 static void filt_bpfdetach(struct knote *); 202 static int filt_bpfread(struct knote *, long); 203 static void bpf_drvinit(void *); 204 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 205 206 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 207 int bpf_maxinsns = BPF_MAXINSNS; 208 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 209 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 210 static int bpf_zerocopy_enable = 0; 211 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 212 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 213 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, 214 bpf_stats_sysctl, "bpf statistics portal"); 215 216 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; 217 #define V_bpf_optimize_writers VNET(bpf_optimize_writers) 218 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW, 219 &VNET_NAME(bpf_optimize_writers), 0, 220 "Do not send packets until BPF program is set"); 221 222 static d_open_t bpfopen; 223 static d_read_t bpfread; 224 static d_write_t bpfwrite; 225 static d_ioctl_t bpfioctl; 226 static d_poll_t bpfpoll; 227 static d_kqfilter_t bpfkqfilter; 228 229 static struct cdevsw bpf_cdevsw = { 230 .d_version = D_VERSION, 231 .d_open = bpfopen, 232 .d_read = bpfread, 233 .d_write = bpfwrite, 234 .d_ioctl = bpfioctl, 235 .d_poll = bpfpoll, 236 .d_name = "bpf", 237 .d_kqfilter = bpfkqfilter, 238 }; 239 240 static struct filterops bpfread_filtops = { 241 .f_isfd = 1, 242 .f_detach = filt_bpfdetach, 243 .f_event = filt_bpfread, 244 }; 245 246 eventhandler_tag bpf_ifdetach_cookie = NULL; 247 248 /* 249 * LOCKING MODEL USED BY BPF: 250 * Locks: 251 * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal, 252 * some global counters and every bpf_if reference. 253 * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters. 254 * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields 255 * used by bpf_mtap code. 256 * 257 * Lock order: 258 * 259 * Global lock, interface lock, descriptor lock 260 * 261 * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2] 262 * working model. In many places (like bpf_detachd) we start with BPF descriptor 263 * (and we need to at least rlock it to get reliable interface pointer). This 264 * gives us potential LOR. As a result, we use global lock to protect from bpf_if 265 * change in every such place. 266 * 267 * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and 268 * 3) descriptor main wlock. 269 * Reading bd_bif can be protected by any of these locks, typically global lock. 270 * 271 * Changing read/write BPF filter is protected by the same three locks, 272 * the same applies for reading. 273 * 274 * Sleeping in global lock is not allowed due to bpfdetach() using it. 275 */ 276 277 /* 278 * Wrapper functions for various buffering methods. If the set of buffer 279 * modes expands, we will probably want to introduce a switch data structure 280 * similar to protosw, et. 281 */ 282 static void 283 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 284 u_int len) 285 { 286 287 BPFD_LOCK_ASSERT(d); 288 289 switch (d->bd_bufmode) { 290 case BPF_BUFMODE_BUFFER: 291 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 292 293 case BPF_BUFMODE_ZBUF: 294 counter_u64_add(d->bd_zcopy, 1); 295 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 296 297 default: 298 panic("bpf_buf_append_bytes"); 299 } 300 } 301 302 static void 303 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 304 u_int len) 305 { 306 307 BPFD_LOCK_ASSERT(d); 308 309 switch (d->bd_bufmode) { 310 case BPF_BUFMODE_BUFFER: 311 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 312 313 case BPF_BUFMODE_ZBUF: 314 counter_u64_add(d->bd_zcopy, 1); 315 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 316 317 default: 318 panic("bpf_buf_append_mbuf"); 319 } 320 } 321 322 /* 323 * This function gets called when the free buffer is re-assigned. 324 */ 325 static void 326 bpf_buf_reclaimed(struct bpf_d *d) 327 { 328 329 BPFD_LOCK_ASSERT(d); 330 331 switch (d->bd_bufmode) { 332 case BPF_BUFMODE_BUFFER: 333 return; 334 335 case BPF_BUFMODE_ZBUF: 336 bpf_zerocopy_buf_reclaimed(d); 337 return; 338 339 default: 340 panic("bpf_buf_reclaimed"); 341 } 342 } 343 344 /* 345 * If the buffer mechanism has a way to decide that a held buffer can be made 346 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 347 * returned if the buffer can be discarded, (0) is returned if it cannot. 348 */ 349 static int 350 bpf_canfreebuf(struct bpf_d *d) 351 { 352 353 BPFD_LOCK_ASSERT(d); 354 355 switch (d->bd_bufmode) { 356 case BPF_BUFMODE_ZBUF: 357 return (bpf_zerocopy_canfreebuf(d)); 358 } 359 return (0); 360 } 361 362 /* 363 * Allow the buffer model to indicate that the current store buffer is 364 * immutable, regardless of the appearance of space. Return (1) if the 365 * buffer is writable, and (0) if not. 366 */ 367 static int 368 bpf_canwritebuf(struct bpf_d *d) 369 { 370 BPFD_LOCK_ASSERT(d); 371 372 switch (d->bd_bufmode) { 373 case BPF_BUFMODE_ZBUF: 374 return (bpf_zerocopy_canwritebuf(d)); 375 } 376 return (1); 377 } 378 379 /* 380 * Notify buffer model that an attempt to write to the store buffer has 381 * resulted in a dropped packet, in which case the buffer may be considered 382 * full. 383 */ 384 static void 385 bpf_buffull(struct bpf_d *d) 386 { 387 388 BPFD_LOCK_ASSERT(d); 389 390 switch (d->bd_bufmode) { 391 case BPF_BUFMODE_ZBUF: 392 bpf_zerocopy_buffull(d); 393 break; 394 } 395 } 396 397 /* 398 * Notify the buffer model that a buffer has moved into the hold position. 399 */ 400 void 401 bpf_bufheld(struct bpf_d *d) 402 { 403 404 BPFD_LOCK_ASSERT(d); 405 406 switch (d->bd_bufmode) { 407 case BPF_BUFMODE_ZBUF: 408 bpf_zerocopy_bufheld(d); 409 break; 410 } 411 } 412 413 static void 414 bpf_free(struct bpf_d *d) 415 { 416 417 switch (d->bd_bufmode) { 418 case BPF_BUFMODE_BUFFER: 419 return (bpf_buffer_free(d)); 420 421 case BPF_BUFMODE_ZBUF: 422 return (bpf_zerocopy_free(d)); 423 424 default: 425 panic("bpf_buf_free"); 426 } 427 } 428 429 static int 430 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 431 { 432 433 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 434 return (EOPNOTSUPP); 435 return (bpf_buffer_uiomove(d, buf, len, uio)); 436 } 437 438 static int 439 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 440 { 441 442 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 443 return (EOPNOTSUPP); 444 return (bpf_buffer_ioctl_sblen(d, i)); 445 } 446 447 static int 448 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 449 { 450 451 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 452 return (EOPNOTSUPP); 453 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 454 } 455 456 static int 457 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 458 { 459 460 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 461 return (EOPNOTSUPP); 462 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 463 } 464 465 static int 466 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 467 { 468 469 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 470 return (EOPNOTSUPP); 471 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 472 } 473 474 /* 475 * General BPF functions. 476 */ 477 static int 478 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 479 struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) 480 { 481 const struct ieee80211_bpf_params *p; 482 struct ether_header *eh; 483 struct mbuf *m; 484 int error; 485 int len; 486 int hlen; 487 int slen; 488 489 /* 490 * Build a sockaddr based on the data link layer type. 491 * We do this at this level because the ethernet header 492 * is copied directly into the data field of the sockaddr. 493 * In the case of SLIP, there is no header and the packet 494 * is forwarded as is. 495 * Also, we are careful to leave room at the front of the mbuf 496 * for the link level header. 497 */ 498 switch (linktype) { 499 500 case DLT_SLIP: 501 sockp->sa_family = AF_INET; 502 hlen = 0; 503 break; 504 505 case DLT_EN10MB: 506 sockp->sa_family = AF_UNSPEC; 507 /* XXX Would MAXLINKHDR be better? */ 508 hlen = ETHER_HDR_LEN; 509 break; 510 511 case DLT_FDDI: 512 sockp->sa_family = AF_IMPLINK; 513 hlen = 0; 514 break; 515 516 case DLT_RAW: 517 sockp->sa_family = AF_UNSPEC; 518 hlen = 0; 519 break; 520 521 case DLT_NULL: 522 /* 523 * null interface types require a 4 byte pseudo header which 524 * corresponds to the address family of the packet. 525 */ 526 sockp->sa_family = AF_UNSPEC; 527 hlen = 4; 528 break; 529 530 case DLT_ATM_RFC1483: 531 /* 532 * en atm driver requires 4-byte atm pseudo header. 533 * though it isn't standard, vpi:vci needs to be 534 * specified anyway. 535 */ 536 sockp->sa_family = AF_UNSPEC; 537 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 538 break; 539 540 case DLT_PPP: 541 sockp->sa_family = AF_UNSPEC; 542 hlen = 4; /* This should match PPP_HDRLEN */ 543 break; 544 545 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 546 sockp->sa_family = AF_IEEE80211; 547 hlen = 0; 548 break; 549 550 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 551 sockp->sa_family = AF_IEEE80211; 552 sockp->sa_len = 12; /* XXX != 0 */ 553 hlen = sizeof(struct ieee80211_bpf_params); 554 break; 555 556 default: 557 return (EIO); 558 } 559 560 len = uio->uio_resid; 561 if (len < hlen || len - hlen > ifp->if_mtu) 562 return (EMSGSIZE); 563 564 m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR); 565 if (m == NULL) 566 return (EIO); 567 m->m_pkthdr.len = m->m_len = len; 568 *mp = m; 569 570 error = uiomove(mtod(m, u_char *), len, uio); 571 if (error) 572 goto bad; 573 574 slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); 575 if (slen == 0) { 576 error = EPERM; 577 goto bad; 578 } 579 580 /* Check for multicast destination */ 581 switch (linktype) { 582 case DLT_EN10MB: 583 eh = mtod(m, struct ether_header *); 584 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 585 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 586 ETHER_ADDR_LEN) == 0) 587 m->m_flags |= M_BCAST; 588 else 589 m->m_flags |= M_MCAST; 590 } 591 if (d->bd_hdrcmplt == 0) { 592 memcpy(eh->ether_shost, IF_LLADDR(ifp), 593 sizeof(eh->ether_shost)); 594 } 595 break; 596 } 597 598 /* 599 * Make room for link header, and copy it to sockaddr 600 */ 601 if (hlen != 0) { 602 if (sockp->sa_family == AF_IEEE80211) { 603 /* 604 * Collect true length from the parameter header 605 * NB: sockp is known to be zero'd so if we do a 606 * short copy unspecified parameters will be 607 * zero. 608 * NB: packet may not be aligned after stripping 609 * bpf params 610 * XXX check ibp_vers 611 */ 612 p = mtod(m, const struct ieee80211_bpf_params *); 613 hlen = p->ibp_len; 614 if (hlen > sizeof(sockp->sa_data)) { 615 error = EINVAL; 616 goto bad; 617 } 618 } 619 bcopy(mtod(m, const void *), sockp->sa_data, hlen); 620 } 621 *hdrlen = hlen; 622 623 return (0); 624 bad: 625 m_freem(m); 626 return (error); 627 } 628 629 /* 630 * Attach file to the bpf interface, i.e. make d listen on bp. 631 */ 632 static void 633 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 634 { 635 int op_w; 636 637 BPF_LOCK_ASSERT(); 638 639 /* 640 * Save sysctl value to protect from sysctl change 641 * between reads 642 */ 643 op_w = V_bpf_optimize_writers || d->bd_writer; 644 645 if (d->bd_bif != NULL) 646 bpf_detachd_locked(d); 647 /* 648 * Point d at bp, and add d to the interface's list. 649 * Since there are many applications using BPF for 650 * sending raw packets only (dhcpd, cdpd are good examples) 651 * we can delay adding d to the list of active listeners until 652 * some filter is configured. 653 */ 654 655 BPFIF_WLOCK(bp); 656 BPFD_LOCK(d); 657 658 d->bd_bif = bp; 659 660 if (op_w != 0) { 661 /* Add to writers-only list */ 662 LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); 663 /* 664 * We decrement bd_writer on every filter set operation. 665 * First BIOCSETF is done by pcap_open_live() to set up 666 * snap length. After that appliation usually sets its own filter 667 */ 668 d->bd_writer = 2; 669 } else 670 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 671 672 BPFD_UNLOCK(d); 673 BPFIF_WUNLOCK(bp); 674 675 bpf_bpfd_cnt++; 676 677 CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", 678 __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); 679 680 if (op_w == 0) 681 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 682 } 683 684 /* 685 * Check if we need to upgrade our descriptor @d from write-only mode. 686 */ 687 static int 688 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen) 689 { 690 int is_snap, need_upgrade; 691 692 /* 693 * Check if we've already upgraded or new filter is empty. 694 */ 695 if (d->bd_writer == 0 || fcode == NULL) 696 return (0); 697 698 need_upgrade = 0; 699 700 /* 701 * Check if cmd looks like snaplen setting from 702 * pcap_bpf.c:pcap_open_live(). 703 * Note we're not checking .k value here: 704 * while pcap_open_live() definitely sets to non-zero value, 705 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. 706 * do not consider upgrading immediately 707 */ 708 if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K)) 709 is_snap = 1; 710 else 711 is_snap = 0; 712 713 if (is_snap == 0) { 714 /* 715 * We're setting first filter and it doesn't look like 716 * setting snaplen. We're probably using bpf directly. 717 * Upgrade immediately. 718 */ 719 need_upgrade = 1; 720 } else { 721 /* 722 * Do not require upgrade by first BIOCSETF 723 * (used to set snaplen) by pcap_open_live(). 724 */ 725 726 if (--d->bd_writer == 0) { 727 /* 728 * First snaplen filter has already 729 * been set. This is probably catch-all 730 * filter 731 */ 732 need_upgrade = 1; 733 } 734 } 735 736 CTR5(KTR_NET, 737 "%s: filter function set by pid %d, " 738 "bd_writer counter %d, snap %d upgrade %d", 739 __func__, d->bd_pid, d->bd_writer, 740 is_snap, need_upgrade); 741 742 return (need_upgrade); 743 } 744 745 /* 746 * Add d to the list of active bp filters. 747 * Requires bpf_attachd() to be called before. 748 */ 749 static void 750 bpf_upgraded(struct bpf_d *d) 751 { 752 struct bpf_if *bp; 753 754 BPF_LOCK_ASSERT(); 755 756 bp = d->bd_bif; 757 758 /* 759 * Filter can be set several times without specifying interface. 760 * Mark d as reader and exit. 761 */ 762 if (bp == NULL) { 763 BPFD_LOCK(d); 764 d->bd_writer = 0; 765 BPFD_UNLOCK(d); 766 return; 767 } 768 769 BPFIF_WLOCK(bp); 770 BPFD_LOCK(d); 771 772 /* Remove from writers-only list */ 773 LIST_REMOVE(d, bd_next); 774 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 775 /* Mark d as reader */ 776 d->bd_writer = 0; 777 778 BPFD_UNLOCK(d); 779 BPFIF_WUNLOCK(bp); 780 781 CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid); 782 783 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 784 } 785 786 /* 787 * Detach a file from its interface. 788 */ 789 static void 790 bpf_detachd(struct bpf_d *d) 791 { 792 BPF_LOCK(); 793 bpf_detachd_locked(d); 794 BPF_UNLOCK(); 795 } 796 797 static void 798 bpf_detachd_locked(struct bpf_d *d) 799 { 800 int error; 801 struct bpf_if *bp; 802 struct ifnet *ifp; 803 804 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); 805 806 BPF_LOCK_ASSERT(); 807 808 /* Check if descriptor is attached */ 809 if ((bp = d->bd_bif) == NULL) 810 return; 811 812 BPFIF_WLOCK(bp); 813 BPFD_LOCK(d); 814 815 /* Save bd_writer value */ 816 error = d->bd_writer; 817 818 /* 819 * Remove d from the interface's descriptor list. 820 */ 821 LIST_REMOVE(d, bd_next); 822 823 ifp = bp->bif_ifp; 824 d->bd_bif = NULL; 825 BPFD_UNLOCK(d); 826 BPFIF_WUNLOCK(bp); 827 828 bpf_bpfd_cnt--; 829 830 /* Call event handler iff d is attached */ 831 if (error == 0) 832 EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); 833 834 /* 835 * Check if this descriptor had requested promiscuous mode. 836 * If so, turn it off. 837 */ 838 if (d->bd_promisc) { 839 d->bd_promisc = 0; 840 CURVNET_SET(ifp->if_vnet); 841 error = ifpromisc(ifp, 0); 842 CURVNET_RESTORE(); 843 if (error != 0 && error != ENXIO) { 844 /* 845 * ENXIO can happen if a pccard is unplugged 846 * Something is really wrong if we were able to put 847 * the driver into promiscuous mode, but can't 848 * take it out. 849 */ 850 if_printf(bp->bif_ifp, 851 "bpf_detach: ifpromisc failed (%d)\n", error); 852 } 853 } 854 } 855 856 /* 857 * Close the descriptor by detaching it from its interface, 858 * deallocating its buffers, and marking it free. 859 */ 860 static void 861 bpf_dtor(void *data) 862 { 863 struct bpf_d *d = data; 864 865 BPFD_LOCK(d); 866 if (d->bd_state == BPF_WAITING) 867 callout_stop(&d->bd_callout); 868 d->bd_state = BPF_IDLE; 869 BPFD_UNLOCK(d); 870 funsetown(&d->bd_sigio); 871 bpf_detachd(d); 872 #ifdef MAC 873 mac_bpfdesc_destroy(d); 874 #endif /* MAC */ 875 seldrain(&d->bd_sel); 876 knlist_destroy(&d->bd_sel.si_note); 877 callout_drain(&d->bd_callout); 878 bpf_freed(d); 879 free(d, M_BPF); 880 } 881 882 /* 883 * Open ethernet device. Returns ENXIO for illegal minor device number, 884 * EBUSY if file is open by another process. 885 */ 886 /* ARGSUSED */ 887 static int 888 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 889 { 890 struct bpf_d *d; 891 int error; 892 893 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 894 error = devfs_set_cdevpriv(d, bpf_dtor); 895 if (error != 0) { 896 free(d, M_BPF); 897 return (error); 898 } 899 900 /* Setup counters */ 901 d->bd_rcount = counter_u64_alloc(M_WAITOK); 902 d->bd_dcount = counter_u64_alloc(M_WAITOK); 903 d->bd_fcount = counter_u64_alloc(M_WAITOK); 904 d->bd_wcount = counter_u64_alloc(M_WAITOK); 905 d->bd_wfcount = counter_u64_alloc(M_WAITOK); 906 d->bd_wdcount = counter_u64_alloc(M_WAITOK); 907 d->bd_zcopy = counter_u64_alloc(M_WAITOK); 908 909 /* 910 * For historical reasons, perform a one-time initialization call to 911 * the buffer routines, even though we're not yet committed to a 912 * particular buffer method. 913 */ 914 bpf_buffer_init(d); 915 if ((flags & FREAD) == 0) 916 d->bd_writer = 2; 917 d->bd_hbuf_in_use = 0; 918 d->bd_bufmode = BPF_BUFMODE_BUFFER; 919 d->bd_sig = SIGIO; 920 d->bd_direction = BPF_D_INOUT; 921 BPF_PID_REFRESH(d, td); 922 #ifdef MAC 923 mac_bpfdesc_init(d); 924 mac_bpfdesc_create(td->td_ucred, d); 925 #endif 926 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); 927 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); 928 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); 929 930 return (0); 931 } 932 933 /* 934 * bpfread - read next chunk of packets from buffers 935 */ 936 static int 937 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 938 { 939 struct bpf_d *d; 940 int error; 941 int non_block; 942 int timed_out; 943 944 error = devfs_get_cdevpriv((void **)&d); 945 if (error != 0) 946 return (error); 947 948 /* 949 * Restrict application to use a buffer the same size as 950 * as kernel buffers. 951 */ 952 if (uio->uio_resid != d->bd_bufsize) 953 return (EINVAL); 954 955 non_block = ((ioflag & O_NONBLOCK) != 0); 956 957 BPFD_LOCK(d); 958 BPF_PID_REFRESH_CUR(d); 959 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 960 BPFD_UNLOCK(d); 961 return (EOPNOTSUPP); 962 } 963 if (d->bd_state == BPF_WAITING) 964 callout_stop(&d->bd_callout); 965 timed_out = (d->bd_state == BPF_TIMED_OUT); 966 d->bd_state = BPF_IDLE; 967 while (d->bd_hbuf_in_use) { 968 error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 969 PRINET|PCATCH, "bd_hbuf", 0); 970 if (error != 0) { 971 BPFD_UNLOCK(d); 972 return (error); 973 } 974 } 975 /* 976 * If the hold buffer is empty, then do a timed sleep, which 977 * ends when the timeout expires or when enough packets 978 * have arrived to fill the store buffer. 979 */ 980 while (d->bd_hbuf == NULL) { 981 if (d->bd_slen != 0) { 982 /* 983 * A packet(s) either arrived since the previous 984 * read or arrived while we were asleep. 985 */ 986 if (d->bd_immediate || non_block || timed_out) { 987 /* 988 * Rotate the buffers and return what's here 989 * if we are in immediate mode, non-blocking 990 * flag is set, or this descriptor timed out. 991 */ 992 ROTATE_BUFFERS(d); 993 break; 994 } 995 } 996 997 /* 998 * No data is available, check to see if the bpf device 999 * is still pointed at a real interface. If not, return 1000 * ENXIO so that the userland process knows to rebind 1001 * it before using it again. 1002 */ 1003 if (d->bd_bif == NULL) { 1004 BPFD_UNLOCK(d); 1005 return (ENXIO); 1006 } 1007 1008 if (non_block) { 1009 BPFD_UNLOCK(d); 1010 return (EWOULDBLOCK); 1011 } 1012 error = msleep(d, &d->bd_lock, PRINET|PCATCH, 1013 "bpf", d->bd_rtout); 1014 if (error == EINTR || error == ERESTART) { 1015 BPFD_UNLOCK(d); 1016 return (error); 1017 } 1018 if (error == EWOULDBLOCK) { 1019 /* 1020 * On a timeout, return what's in the buffer, 1021 * which may be nothing. If there is something 1022 * in the store buffer, we can rotate the buffers. 1023 */ 1024 if (d->bd_hbuf) 1025 /* 1026 * We filled up the buffer in between 1027 * getting the timeout and arriving 1028 * here, so we don't need to rotate. 1029 */ 1030 break; 1031 1032 if (d->bd_slen == 0) { 1033 BPFD_UNLOCK(d); 1034 return (0); 1035 } 1036 ROTATE_BUFFERS(d); 1037 break; 1038 } 1039 } 1040 /* 1041 * At this point, we know we have something in the hold slot. 1042 */ 1043 d->bd_hbuf_in_use = 1; 1044 BPFD_UNLOCK(d); 1045 1046 /* 1047 * Move data from hold buffer into user space. 1048 * We know the entire buffer is transferred since 1049 * we checked above that the read buffer is bpf_bufsize bytes. 1050 * 1051 * We do not have to worry about simultaneous reads because 1052 * we waited for sole access to the hold buffer above. 1053 */ 1054 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 1055 1056 BPFD_LOCK(d); 1057 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); 1058 d->bd_fbuf = d->bd_hbuf; 1059 d->bd_hbuf = NULL; 1060 d->bd_hlen = 0; 1061 bpf_buf_reclaimed(d); 1062 d->bd_hbuf_in_use = 0; 1063 wakeup(&d->bd_hbuf_in_use); 1064 BPFD_UNLOCK(d); 1065 1066 return (error); 1067 } 1068 1069 /* 1070 * If there are processes sleeping on this descriptor, wake them up. 1071 */ 1072 static __inline void 1073 bpf_wakeup(struct bpf_d *d) 1074 { 1075 1076 BPFD_LOCK_ASSERT(d); 1077 if (d->bd_state == BPF_WAITING) { 1078 callout_stop(&d->bd_callout); 1079 d->bd_state = BPF_IDLE; 1080 } 1081 wakeup(d); 1082 if (d->bd_async && d->bd_sig && d->bd_sigio) 1083 pgsigio(&d->bd_sigio, d->bd_sig, 0); 1084 1085 selwakeuppri(&d->bd_sel, PRINET); 1086 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 1087 } 1088 1089 static void 1090 bpf_timed_out(void *arg) 1091 { 1092 struct bpf_d *d = (struct bpf_d *)arg; 1093 1094 BPFD_LOCK_ASSERT(d); 1095 1096 if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout)) 1097 return; 1098 if (d->bd_state == BPF_WAITING) { 1099 d->bd_state = BPF_TIMED_OUT; 1100 if (d->bd_slen != 0) 1101 bpf_wakeup(d); 1102 } 1103 } 1104 1105 static int 1106 bpf_ready(struct bpf_d *d) 1107 { 1108 1109 BPFD_LOCK_ASSERT(d); 1110 1111 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 1112 return (1); 1113 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1114 d->bd_slen != 0) 1115 return (1); 1116 return (0); 1117 } 1118 1119 static int 1120 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 1121 { 1122 struct bpf_d *d; 1123 struct ifnet *ifp; 1124 struct mbuf *m, *mc; 1125 struct sockaddr dst; 1126 struct route ro; 1127 int error, hlen; 1128 1129 error = devfs_get_cdevpriv((void **)&d); 1130 if (error != 0) 1131 return (error); 1132 1133 BPF_PID_REFRESH_CUR(d); 1134 counter_u64_add(d->bd_wcount, 1); 1135 /* XXX: locking required */ 1136 if (d->bd_bif == NULL) { 1137 counter_u64_add(d->bd_wdcount, 1); 1138 return (ENXIO); 1139 } 1140 1141 ifp = d->bd_bif->bif_ifp; 1142 1143 if ((ifp->if_flags & IFF_UP) == 0) { 1144 counter_u64_add(d->bd_wdcount, 1); 1145 return (ENETDOWN); 1146 } 1147 1148 if (uio->uio_resid == 0) { 1149 counter_u64_add(d->bd_wdcount, 1); 1150 return (0); 1151 } 1152 1153 bzero(&dst, sizeof(dst)); 1154 m = NULL; 1155 hlen = 0; 1156 /* XXX: bpf_movein() can sleep */ 1157 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 1158 &m, &dst, &hlen, d); 1159 if (error) { 1160 counter_u64_add(d->bd_wdcount, 1); 1161 return (error); 1162 } 1163 counter_u64_add(d->bd_wfcount, 1); 1164 if (d->bd_hdrcmplt) 1165 dst.sa_family = pseudo_AF_HDRCMPLT; 1166 1167 if (d->bd_feedback) { 1168 mc = m_dup(m, M_NOWAIT); 1169 if (mc != NULL) 1170 mc->m_pkthdr.rcvif = ifp; 1171 /* Set M_PROMISC for outgoing packets to be discarded. */ 1172 if (d->bd_direction == BPF_D_INOUT) 1173 m->m_flags |= M_PROMISC; 1174 } else 1175 mc = NULL; 1176 1177 m->m_pkthdr.len -= hlen; 1178 m->m_len -= hlen; 1179 m->m_data += hlen; /* XXX */ 1180 1181 CURVNET_SET(ifp->if_vnet); 1182 #ifdef MAC 1183 BPFD_LOCK(d); 1184 mac_bpfdesc_create_mbuf(d, m); 1185 if (mc != NULL) 1186 mac_bpfdesc_create_mbuf(d, mc); 1187 BPFD_UNLOCK(d); 1188 #endif 1189 1190 bzero(&ro, sizeof(ro)); 1191 if (hlen != 0) { 1192 ro.ro_prepend = (u_char *)&dst.sa_data; 1193 ro.ro_plen = hlen; 1194 ro.ro_flags = RT_HAS_HEADER; 1195 } 1196 1197 error = (*ifp->if_output)(ifp, m, &dst, &ro); 1198 if (error) 1199 counter_u64_add(d->bd_wdcount, 1); 1200 1201 if (mc != NULL) { 1202 if (error == 0) 1203 (*ifp->if_input)(ifp, mc); 1204 else 1205 m_freem(mc); 1206 } 1207 CURVNET_RESTORE(); 1208 1209 return (error); 1210 } 1211 1212 /* 1213 * Reset a descriptor by flushing its packet buffer and clearing the receive 1214 * and drop counts. This is doable for kernel-only buffers, but with 1215 * zero-copy buffers, we can't write to (or rotate) buffers that are 1216 * currently owned by userspace. It would be nice if we could encapsulate 1217 * this logic in the buffer code rather than here. 1218 */ 1219 static void 1220 reset_d(struct bpf_d *d) 1221 { 1222 1223 BPFD_LOCK_ASSERT(d); 1224 1225 while (d->bd_hbuf_in_use) 1226 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, 1227 "bd_hbuf", 0); 1228 if ((d->bd_hbuf != NULL) && 1229 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { 1230 /* Free the hold buffer. */ 1231 d->bd_fbuf = d->bd_hbuf; 1232 d->bd_hbuf = NULL; 1233 d->bd_hlen = 0; 1234 bpf_buf_reclaimed(d); 1235 } 1236 if (bpf_canwritebuf(d)) 1237 d->bd_slen = 0; 1238 counter_u64_zero(d->bd_rcount); 1239 counter_u64_zero(d->bd_dcount); 1240 counter_u64_zero(d->bd_fcount); 1241 counter_u64_zero(d->bd_wcount); 1242 counter_u64_zero(d->bd_wfcount); 1243 counter_u64_zero(d->bd_wdcount); 1244 counter_u64_zero(d->bd_zcopy); 1245 } 1246 1247 /* 1248 * FIONREAD Check for read packet available. 1249 * BIOCGBLEN Get buffer len [for read()]. 1250 * BIOCSETF Set read filter. 1251 * BIOCSETFNR Set read filter without resetting descriptor. 1252 * BIOCSETWF Set write filter. 1253 * BIOCFLUSH Flush read packet buffer. 1254 * BIOCPROMISC Put interface into promiscuous mode. 1255 * BIOCGDLT Get link layer type. 1256 * BIOCGETIF Get interface name. 1257 * BIOCSETIF Set interface. 1258 * BIOCSRTIMEOUT Set read timeout. 1259 * BIOCGRTIMEOUT Get read timeout. 1260 * BIOCGSTATS Get packet stats. 1261 * BIOCIMMEDIATE Set immediate mode. 1262 * BIOCVERSION Get filter language version. 1263 * BIOCGHDRCMPLT Get "header already complete" flag 1264 * BIOCSHDRCMPLT Set "header already complete" flag 1265 * BIOCGDIRECTION Get packet direction flag 1266 * BIOCSDIRECTION Set packet direction flag 1267 * BIOCGTSTAMP Get time stamp format and resolution. 1268 * BIOCSTSTAMP Set time stamp format and resolution. 1269 * BIOCLOCK Set "locked" flag 1270 * BIOCFEEDBACK Set packet feedback mode. 1271 * BIOCSETZBUF Set current zero-copy buffer locations. 1272 * BIOCGETZMAX Get maximum zero-copy buffer size. 1273 * BIOCROTZBUF Force rotation of zero-copy buffer 1274 * BIOCSETBUFMODE Set buffer mode. 1275 * BIOCGETBUFMODE Get current buffer mode. 1276 */ 1277 /* ARGSUSED */ 1278 static int 1279 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1280 struct thread *td) 1281 { 1282 struct bpf_d *d; 1283 int error; 1284 1285 error = devfs_get_cdevpriv((void **)&d); 1286 if (error != 0) 1287 return (error); 1288 1289 /* 1290 * Refresh PID associated with this descriptor. 1291 */ 1292 BPFD_LOCK(d); 1293 BPF_PID_REFRESH(d, td); 1294 if (d->bd_state == BPF_WAITING) 1295 callout_stop(&d->bd_callout); 1296 d->bd_state = BPF_IDLE; 1297 BPFD_UNLOCK(d); 1298 1299 if (d->bd_locked == 1) { 1300 switch (cmd) { 1301 case BIOCGBLEN: 1302 case BIOCFLUSH: 1303 case BIOCGDLT: 1304 case BIOCGDLTLIST: 1305 #ifdef COMPAT_FREEBSD32 1306 case BIOCGDLTLIST32: 1307 #endif 1308 case BIOCGETIF: 1309 case BIOCGRTIMEOUT: 1310 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1311 case BIOCGRTIMEOUT32: 1312 #endif 1313 case BIOCGSTATS: 1314 case BIOCVERSION: 1315 case BIOCGRSIG: 1316 case BIOCGHDRCMPLT: 1317 case BIOCSTSTAMP: 1318 case BIOCFEEDBACK: 1319 case FIONREAD: 1320 case BIOCLOCK: 1321 case BIOCSRTIMEOUT: 1322 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1323 case BIOCSRTIMEOUT32: 1324 #endif 1325 case BIOCIMMEDIATE: 1326 case TIOCGPGRP: 1327 case BIOCROTZBUF: 1328 break; 1329 default: 1330 return (EPERM); 1331 } 1332 } 1333 #ifdef COMPAT_FREEBSD32 1334 /* 1335 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so 1336 * that it will get 32-bit packet headers. 1337 */ 1338 switch (cmd) { 1339 case BIOCSETF32: 1340 case BIOCSETFNR32: 1341 case BIOCSETWF32: 1342 case BIOCGDLTLIST32: 1343 case BIOCGRTIMEOUT32: 1344 case BIOCSRTIMEOUT32: 1345 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 1346 BPFD_LOCK(d); 1347 d->bd_compat32 = 1; 1348 BPFD_UNLOCK(d); 1349 } 1350 } 1351 #endif 1352 1353 CURVNET_SET(TD_TO_VNET(td)); 1354 switch (cmd) { 1355 1356 default: 1357 error = EINVAL; 1358 break; 1359 1360 /* 1361 * Check for read packet available. 1362 */ 1363 case FIONREAD: 1364 { 1365 int n; 1366 1367 BPFD_LOCK(d); 1368 n = d->bd_slen; 1369 while (d->bd_hbuf_in_use) 1370 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1371 PRINET, "bd_hbuf", 0); 1372 if (d->bd_hbuf) 1373 n += d->bd_hlen; 1374 BPFD_UNLOCK(d); 1375 1376 *(int *)addr = n; 1377 break; 1378 } 1379 1380 /* 1381 * Get buffer len [for read()]. 1382 */ 1383 case BIOCGBLEN: 1384 BPFD_LOCK(d); 1385 *(u_int *)addr = d->bd_bufsize; 1386 BPFD_UNLOCK(d); 1387 break; 1388 1389 /* 1390 * Set buffer length. 1391 */ 1392 case BIOCSBLEN: 1393 error = bpf_ioctl_sblen(d, (u_int *)addr); 1394 break; 1395 1396 /* 1397 * Set link layer read filter. 1398 */ 1399 case BIOCSETF: 1400 case BIOCSETFNR: 1401 case BIOCSETWF: 1402 #ifdef COMPAT_FREEBSD32 1403 case BIOCSETF32: 1404 case BIOCSETFNR32: 1405 case BIOCSETWF32: 1406 #endif 1407 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1408 break; 1409 1410 /* 1411 * Flush read packet buffer. 1412 */ 1413 case BIOCFLUSH: 1414 BPFD_LOCK(d); 1415 reset_d(d); 1416 BPFD_UNLOCK(d); 1417 break; 1418 1419 /* 1420 * Put interface into promiscuous mode. 1421 */ 1422 case BIOCPROMISC: 1423 if (d->bd_bif == NULL) { 1424 /* 1425 * No interface attached yet. 1426 */ 1427 error = EINVAL; 1428 break; 1429 } 1430 if (d->bd_promisc == 0) { 1431 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1432 if (error == 0) 1433 d->bd_promisc = 1; 1434 } 1435 break; 1436 1437 /* 1438 * Get current data link type. 1439 */ 1440 case BIOCGDLT: 1441 BPF_LOCK(); 1442 if (d->bd_bif == NULL) 1443 error = EINVAL; 1444 else 1445 *(u_int *)addr = d->bd_bif->bif_dlt; 1446 BPF_UNLOCK(); 1447 break; 1448 1449 /* 1450 * Get a list of supported data link types. 1451 */ 1452 #ifdef COMPAT_FREEBSD32 1453 case BIOCGDLTLIST32: 1454 { 1455 struct bpf_dltlist32 *list32; 1456 struct bpf_dltlist dltlist; 1457 1458 list32 = (struct bpf_dltlist32 *)addr; 1459 dltlist.bfl_len = list32->bfl_len; 1460 dltlist.bfl_list = PTRIN(list32->bfl_list); 1461 BPF_LOCK(); 1462 if (d->bd_bif == NULL) 1463 error = EINVAL; 1464 else { 1465 error = bpf_getdltlist(d, &dltlist); 1466 if (error == 0) 1467 list32->bfl_len = dltlist.bfl_len; 1468 } 1469 BPF_UNLOCK(); 1470 break; 1471 } 1472 #endif 1473 1474 case BIOCGDLTLIST: 1475 BPF_LOCK(); 1476 if (d->bd_bif == NULL) 1477 error = EINVAL; 1478 else 1479 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1480 BPF_UNLOCK(); 1481 break; 1482 1483 /* 1484 * Set data link type. 1485 */ 1486 case BIOCSDLT: 1487 BPF_LOCK(); 1488 if (d->bd_bif == NULL) 1489 error = EINVAL; 1490 else 1491 error = bpf_setdlt(d, *(u_int *)addr); 1492 BPF_UNLOCK(); 1493 break; 1494 1495 /* 1496 * Get interface name. 1497 */ 1498 case BIOCGETIF: 1499 BPF_LOCK(); 1500 if (d->bd_bif == NULL) 1501 error = EINVAL; 1502 else { 1503 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1504 struct ifreq *const ifr = (struct ifreq *)addr; 1505 1506 strlcpy(ifr->ifr_name, ifp->if_xname, 1507 sizeof(ifr->ifr_name)); 1508 } 1509 BPF_UNLOCK(); 1510 break; 1511 1512 /* 1513 * Set interface. 1514 */ 1515 case BIOCSETIF: 1516 { 1517 int alloc_buf, size; 1518 1519 /* 1520 * Behavior here depends on the buffering model. If 1521 * we're using kernel memory buffers, then we can 1522 * allocate them here. If we're using zero-copy, 1523 * then the user process must have registered buffers 1524 * by the time we get here. 1525 */ 1526 alloc_buf = 0; 1527 BPFD_LOCK(d); 1528 if (d->bd_bufmode == BPF_BUFMODE_BUFFER && 1529 d->bd_sbuf == NULL) 1530 alloc_buf = 1; 1531 BPFD_UNLOCK(d); 1532 if (alloc_buf) { 1533 size = d->bd_bufsize; 1534 error = bpf_buffer_ioctl_sblen(d, &size); 1535 if (error != 0) 1536 break; 1537 } 1538 BPF_LOCK(); 1539 error = bpf_setif(d, (struct ifreq *)addr); 1540 BPF_UNLOCK(); 1541 break; 1542 } 1543 1544 /* 1545 * Set read timeout. 1546 */ 1547 case BIOCSRTIMEOUT: 1548 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1549 case BIOCSRTIMEOUT32: 1550 #endif 1551 { 1552 struct timeval *tv = (struct timeval *)addr; 1553 #if defined(COMPAT_FREEBSD32) && !defined(__mips__) 1554 struct timeval32 *tv32; 1555 struct timeval tv64; 1556 1557 if (cmd == BIOCSRTIMEOUT32) { 1558 tv32 = (struct timeval32 *)addr; 1559 tv = &tv64; 1560 tv->tv_sec = tv32->tv_sec; 1561 tv->tv_usec = tv32->tv_usec; 1562 } else 1563 #endif 1564 tv = (struct timeval *)addr; 1565 1566 /* 1567 * Subtract 1 tick from tvtohz() since this isn't 1568 * a one-shot timer. 1569 */ 1570 if ((error = itimerfix(tv)) == 0) 1571 d->bd_rtout = tvtohz(tv) - 1; 1572 break; 1573 } 1574 1575 /* 1576 * Get read timeout. 1577 */ 1578 case BIOCGRTIMEOUT: 1579 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1580 case BIOCGRTIMEOUT32: 1581 #endif 1582 { 1583 struct timeval *tv; 1584 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1585 struct timeval32 *tv32; 1586 struct timeval tv64; 1587 1588 if (cmd == BIOCGRTIMEOUT32) 1589 tv = &tv64; 1590 else 1591 #endif 1592 tv = (struct timeval *)addr; 1593 1594 tv->tv_sec = d->bd_rtout / hz; 1595 tv->tv_usec = (d->bd_rtout % hz) * tick; 1596 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1597 if (cmd == BIOCGRTIMEOUT32) { 1598 tv32 = (struct timeval32 *)addr; 1599 tv32->tv_sec = tv->tv_sec; 1600 tv32->tv_usec = tv->tv_usec; 1601 } 1602 #endif 1603 1604 break; 1605 } 1606 1607 /* 1608 * Get packet stats. 1609 */ 1610 case BIOCGSTATS: 1611 { 1612 struct bpf_stat *bs = (struct bpf_stat *)addr; 1613 1614 /* XXXCSJP overflow */ 1615 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); 1616 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); 1617 break; 1618 } 1619 1620 /* 1621 * Set immediate mode. 1622 */ 1623 case BIOCIMMEDIATE: 1624 BPFD_LOCK(d); 1625 d->bd_immediate = *(u_int *)addr; 1626 BPFD_UNLOCK(d); 1627 break; 1628 1629 case BIOCVERSION: 1630 { 1631 struct bpf_version *bv = (struct bpf_version *)addr; 1632 1633 bv->bv_major = BPF_MAJOR_VERSION; 1634 bv->bv_minor = BPF_MINOR_VERSION; 1635 break; 1636 } 1637 1638 /* 1639 * Get "header already complete" flag 1640 */ 1641 case BIOCGHDRCMPLT: 1642 BPFD_LOCK(d); 1643 *(u_int *)addr = d->bd_hdrcmplt; 1644 BPFD_UNLOCK(d); 1645 break; 1646 1647 /* 1648 * Set "header already complete" flag 1649 */ 1650 case BIOCSHDRCMPLT: 1651 BPFD_LOCK(d); 1652 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1653 BPFD_UNLOCK(d); 1654 break; 1655 1656 /* 1657 * Get packet direction flag 1658 */ 1659 case BIOCGDIRECTION: 1660 BPFD_LOCK(d); 1661 *(u_int *)addr = d->bd_direction; 1662 BPFD_UNLOCK(d); 1663 break; 1664 1665 /* 1666 * Set packet direction flag 1667 */ 1668 case BIOCSDIRECTION: 1669 { 1670 u_int direction; 1671 1672 direction = *(u_int *)addr; 1673 switch (direction) { 1674 case BPF_D_IN: 1675 case BPF_D_INOUT: 1676 case BPF_D_OUT: 1677 BPFD_LOCK(d); 1678 d->bd_direction = direction; 1679 BPFD_UNLOCK(d); 1680 break; 1681 default: 1682 error = EINVAL; 1683 } 1684 } 1685 break; 1686 1687 /* 1688 * Get packet timestamp format and resolution. 1689 */ 1690 case BIOCGTSTAMP: 1691 BPFD_LOCK(d); 1692 *(u_int *)addr = d->bd_tstamp; 1693 BPFD_UNLOCK(d); 1694 break; 1695 1696 /* 1697 * Set packet timestamp format and resolution. 1698 */ 1699 case BIOCSTSTAMP: 1700 { 1701 u_int func; 1702 1703 func = *(u_int *)addr; 1704 if (BPF_T_VALID(func)) 1705 d->bd_tstamp = func; 1706 else 1707 error = EINVAL; 1708 } 1709 break; 1710 1711 case BIOCFEEDBACK: 1712 BPFD_LOCK(d); 1713 d->bd_feedback = *(u_int *)addr; 1714 BPFD_UNLOCK(d); 1715 break; 1716 1717 case BIOCLOCK: 1718 BPFD_LOCK(d); 1719 d->bd_locked = 1; 1720 BPFD_UNLOCK(d); 1721 break; 1722 1723 case FIONBIO: /* Non-blocking I/O */ 1724 break; 1725 1726 case FIOASYNC: /* Send signal on receive packets */ 1727 BPFD_LOCK(d); 1728 d->bd_async = *(int *)addr; 1729 BPFD_UNLOCK(d); 1730 break; 1731 1732 case FIOSETOWN: 1733 /* 1734 * XXX: Add some sort of locking here? 1735 * fsetown() can sleep. 1736 */ 1737 error = fsetown(*(int *)addr, &d->bd_sigio); 1738 break; 1739 1740 case FIOGETOWN: 1741 BPFD_LOCK(d); 1742 *(int *)addr = fgetown(&d->bd_sigio); 1743 BPFD_UNLOCK(d); 1744 break; 1745 1746 /* This is deprecated, FIOSETOWN should be used instead. */ 1747 case TIOCSPGRP: 1748 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1749 break; 1750 1751 /* This is deprecated, FIOGETOWN should be used instead. */ 1752 case TIOCGPGRP: 1753 *(int *)addr = -fgetown(&d->bd_sigio); 1754 break; 1755 1756 case BIOCSRSIG: /* Set receive signal */ 1757 { 1758 u_int sig; 1759 1760 sig = *(u_int *)addr; 1761 1762 if (sig >= NSIG) 1763 error = EINVAL; 1764 else { 1765 BPFD_LOCK(d); 1766 d->bd_sig = sig; 1767 BPFD_UNLOCK(d); 1768 } 1769 break; 1770 } 1771 case BIOCGRSIG: 1772 BPFD_LOCK(d); 1773 *(u_int *)addr = d->bd_sig; 1774 BPFD_UNLOCK(d); 1775 break; 1776 1777 case BIOCGETBUFMODE: 1778 BPFD_LOCK(d); 1779 *(u_int *)addr = d->bd_bufmode; 1780 BPFD_UNLOCK(d); 1781 break; 1782 1783 case BIOCSETBUFMODE: 1784 /* 1785 * Allow the buffering mode to be changed as long as we 1786 * haven't yet committed to a particular mode. Our 1787 * definition of commitment, for now, is whether or not a 1788 * buffer has been allocated or an interface attached, since 1789 * that's the point where things get tricky. 1790 */ 1791 switch (*(u_int *)addr) { 1792 case BPF_BUFMODE_BUFFER: 1793 break; 1794 1795 case BPF_BUFMODE_ZBUF: 1796 if (bpf_zerocopy_enable) 1797 break; 1798 /* FALLSTHROUGH */ 1799 1800 default: 1801 CURVNET_RESTORE(); 1802 return (EINVAL); 1803 } 1804 1805 BPFD_LOCK(d); 1806 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1807 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1808 BPFD_UNLOCK(d); 1809 CURVNET_RESTORE(); 1810 return (EBUSY); 1811 } 1812 d->bd_bufmode = *(u_int *)addr; 1813 BPFD_UNLOCK(d); 1814 break; 1815 1816 case BIOCGETZMAX: 1817 error = bpf_ioctl_getzmax(td, d, (size_t *)addr); 1818 break; 1819 1820 case BIOCSETZBUF: 1821 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); 1822 break; 1823 1824 case BIOCROTZBUF: 1825 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); 1826 break; 1827 } 1828 CURVNET_RESTORE(); 1829 return (error); 1830 } 1831 1832 /* 1833 * Set d's packet filter program to fp. If this file already has a filter, 1834 * free it and replace it. Returns EINVAL for bogus requests. 1835 * 1836 * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls 1837 * since reading d->bd_bif can't be protected by d or interface lock due to 1838 * lock order. 1839 * 1840 * Additionally, we have to acquire interface write lock due to bpf_mtap() uses 1841 * interface read lock to read all filers. 1842 * 1843 */ 1844 static int 1845 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1846 { 1847 #ifdef COMPAT_FREEBSD32 1848 struct bpf_program fp_swab; 1849 struct bpf_program32 *fp32; 1850 #endif 1851 struct bpf_insn *fcode, *old; 1852 #ifdef BPF_JITTER 1853 bpf_jit_filter *jfunc, *ofunc; 1854 #endif 1855 size_t size; 1856 u_int flen; 1857 int need_upgrade; 1858 1859 #ifdef COMPAT_FREEBSD32 1860 switch (cmd) { 1861 case BIOCSETF32: 1862 case BIOCSETWF32: 1863 case BIOCSETFNR32: 1864 fp32 = (struct bpf_program32 *)fp; 1865 fp_swab.bf_len = fp32->bf_len; 1866 fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns; 1867 fp = &fp_swab; 1868 switch (cmd) { 1869 case BIOCSETF32: 1870 cmd = BIOCSETF; 1871 break; 1872 case BIOCSETWF32: 1873 cmd = BIOCSETWF; 1874 break; 1875 } 1876 break; 1877 } 1878 #endif 1879 1880 fcode = NULL; 1881 #ifdef BPF_JITTER 1882 jfunc = ofunc = NULL; 1883 #endif 1884 need_upgrade = 0; 1885 1886 /* 1887 * Check new filter validness before acquiring any locks. 1888 * Allocate memory for new filter, if needed. 1889 */ 1890 flen = fp->bf_len; 1891 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) 1892 return (EINVAL); 1893 size = flen * sizeof(*fp->bf_insns); 1894 if (size > 0) { 1895 /* We're setting up new filter. Copy and check actual data. */ 1896 fcode = malloc(size, M_BPF, M_WAITOK); 1897 if (copyin(fp->bf_insns, fcode, size) != 0 || 1898 !bpf_validate(fcode, flen)) { 1899 free(fcode, M_BPF); 1900 return (EINVAL); 1901 } 1902 #ifdef BPF_JITTER 1903 if (cmd != BIOCSETWF) { 1904 /* 1905 * Filter is copied inside fcode and is 1906 * perfectly valid. 1907 */ 1908 jfunc = bpf_jitter(fcode, flen); 1909 } 1910 #endif 1911 } 1912 1913 BPF_LOCK(); 1914 1915 /* 1916 * Set up new filter. 1917 * Protect filter change by interface lock. 1918 * Additionally, we are protected by global lock here. 1919 */ 1920 if (d->bd_bif != NULL) 1921 BPFIF_WLOCK(d->bd_bif); 1922 BPFD_LOCK(d); 1923 if (cmd == BIOCSETWF) { 1924 old = d->bd_wfilter; 1925 d->bd_wfilter = fcode; 1926 } else { 1927 old = d->bd_rfilter; 1928 d->bd_rfilter = fcode; 1929 #ifdef BPF_JITTER 1930 ofunc = d->bd_bfilter; 1931 d->bd_bfilter = jfunc; 1932 #endif 1933 if (cmd == BIOCSETF) 1934 reset_d(d); 1935 1936 need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen); 1937 } 1938 BPFD_UNLOCK(d); 1939 if (d->bd_bif != NULL) 1940 BPFIF_WUNLOCK(d->bd_bif); 1941 if (old != NULL) 1942 free(old, M_BPF); 1943 #ifdef BPF_JITTER 1944 if (ofunc != NULL) 1945 bpf_destroy_jit_filter(ofunc); 1946 #endif 1947 1948 /* Move d to active readers list. */ 1949 if (need_upgrade != 0) 1950 bpf_upgraded(d); 1951 1952 BPF_UNLOCK(); 1953 return (0); 1954 } 1955 1956 /* 1957 * Detach a file from its current interface (if attached at all) and attach 1958 * to the interface indicated by the name stored in ifr. 1959 * Return an errno or 0. 1960 */ 1961 static int 1962 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1963 { 1964 struct bpf_if *bp; 1965 struct ifnet *theywant; 1966 1967 BPF_LOCK_ASSERT(); 1968 1969 theywant = ifunit(ifr->ifr_name); 1970 if (theywant == NULL || theywant->if_bpf == NULL) 1971 return (ENXIO); 1972 1973 bp = theywant->if_bpf; 1974 1975 /* Check if interface is not being detached from BPF */ 1976 BPFIF_RLOCK(bp); 1977 if (bp->bif_flags & BPFIF_FLAG_DYING) { 1978 BPFIF_RUNLOCK(bp); 1979 return (ENXIO); 1980 } 1981 BPFIF_RUNLOCK(bp); 1982 1983 /* 1984 * At this point, we expect the buffer is already allocated. If not, 1985 * return an error. 1986 */ 1987 switch (d->bd_bufmode) { 1988 case BPF_BUFMODE_BUFFER: 1989 case BPF_BUFMODE_ZBUF: 1990 if (d->bd_sbuf == NULL) 1991 return (EINVAL); 1992 break; 1993 1994 default: 1995 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1996 } 1997 if (bp != d->bd_bif) 1998 bpf_attachd(d, bp); 1999 BPFD_LOCK(d); 2000 reset_d(d); 2001 BPFD_UNLOCK(d); 2002 return (0); 2003 } 2004 2005 /* 2006 * Support for select() and poll() system calls 2007 * 2008 * Return true iff the specific operation will not block indefinitely. 2009 * Otherwise, return false but make a note that a selwakeup() must be done. 2010 */ 2011 static int 2012 bpfpoll(struct cdev *dev, int events, struct thread *td) 2013 { 2014 struct bpf_d *d; 2015 int revents; 2016 2017 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) 2018 return (events & 2019 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 2020 2021 /* 2022 * Refresh PID associated with this descriptor. 2023 */ 2024 revents = events & (POLLOUT | POLLWRNORM); 2025 BPFD_LOCK(d); 2026 BPF_PID_REFRESH(d, td); 2027 if (events & (POLLIN | POLLRDNORM)) { 2028 if (bpf_ready(d)) 2029 revents |= events & (POLLIN | POLLRDNORM); 2030 else { 2031 selrecord(td, &d->bd_sel); 2032 /* Start the read timeout if necessary. */ 2033 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2034 callout_reset(&d->bd_callout, d->bd_rtout, 2035 bpf_timed_out, d); 2036 d->bd_state = BPF_WAITING; 2037 } 2038 } 2039 } 2040 BPFD_UNLOCK(d); 2041 return (revents); 2042 } 2043 2044 /* 2045 * Support for kevent() system call. Register EVFILT_READ filters and 2046 * reject all others. 2047 */ 2048 int 2049 bpfkqfilter(struct cdev *dev, struct knote *kn) 2050 { 2051 struct bpf_d *d; 2052 2053 if (devfs_get_cdevpriv((void **)&d) != 0 || 2054 kn->kn_filter != EVFILT_READ) 2055 return (1); 2056 2057 /* 2058 * Refresh PID associated with this descriptor. 2059 */ 2060 BPFD_LOCK(d); 2061 BPF_PID_REFRESH_CUR(d); 2062 kn->kn_fop = &bpfread_filtops; 2063 kn->kn_hook = d; 2064 knlist_add(&d->bd_sel.si_note, kn, 1); 2065 BPFD_UNLOCK(d); 2066 2067 return (0); 2068 } 2069 2070 static void 2071 filt_bpfdetach(struct knote *kn) 2072 { 2073 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2074 2075 knlist_remove(&d->bd_sel.si_note, kn, 0); 2076 } 2077 2078 static int 2079 filt_bpfread(struct knote *kn, long hint) 2080 { 2081 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2082 int ready; 2083 2084 BPFD_LOCK_ASSERT(d); 2085 ready = bpf_ready(d); 2086 if (ready) { 2087 kn->kn_data = d->bd_slen; 2088 /* 2089 * Ignore the hold buffer if it is being copied to user space. 2090 */ 2091 if (!d->bd_hbuf_in_use && d->bd_hbuf) 2092 kn->kn_data += d->bd_hlen; 2093 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2094 callout_reset(&d->bd_callout, d->bd_rtout, 2095 bpf_timed_out, d); 2096 d->bd_state = BPF_WAITING; 2097 } 2098 2099 return (ready); 2100 } 2101 2102 #define BPF_TSTAMP_NONE 0 2103 #define BPF_TSTAMP_FAST 1 2104 #define BPF_TSTAMP_NORMAL 2 2105 #define BPF_TSTAMP_EXTERN 3 2106 2107 static int 2108 bpf_ts_quality(int tstype) 2109 { 2110 2111 if (tstype == BPF_T_NONE) 2112 return (BPF_TSTAMP_NONE); 2113 if ((tstype & BPF_T_FAST) != 0) 2114 return (BPF_TSTAMP_FAST); 2115 2116 return (BPF_TSTAMP_NORMAL); 2117 } 2118 2119 static int 2120 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) 2121 { 2122 struct m_tag *tag; 2123 int quality; 2124 2125 quality = bpf_ts_quality(tstype); 2126 if (quality == BPF_TSTAMP_NONE) 2127 return (quality); 2128 2129 if (m != NULL) { 2130 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); 2131 if (tag != NULL) { 2132 *bt = *(struct bintime *)(tag + 1); 2133 return (BPF_TSTAMP_EXTERN); 2134 } 2135 } 2136 if (quality == BPF_TSTAMP_NORMAL) 2137 binuptime(bt); 2138 else 2139 getbinuptime(bt); 2140 2141 return (quality); 2142 } 2143 2144 /* 2145 * Incoming linkage from device drivers. Process the packet pkt, of length 2146 * pktlen, which is stored in a contiguous buffer. The packet is parsed 2147 * by each process' filter, and if accepted, stashed into the corresponding 2148 * buffer. 2149 */ 2150 void 2151 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2152 { 2153 struct bintime bt; 2154 struct bpf_d *d; 2155 #ifdef BPF_JITTER 2156 bpf_jit_filter *bf; 2157 #endif 2158 u_int slen; 2159 int gottime; 2160 2161 gottime = BPF_TSTAMP_NONE; 2162 2163 BPFIF_RLOCK(bp); 2164 2165 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2166 /* 2167 * We are not using any locks for d here because: 2168 * 1) any filter change is protected by interface 2169 * write lock 2170 * 2) destroying/detaching d is protected by interface 2171 * write lock, too 2172 */ 2173 2174 counter_u64_add(d->bd_rcount, 1); 2175 /* 2176 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no 2177 * way for the caller to indiciate to us whether this packet 2178 * is inbound or outbound. In the bpf_mtap() routines, we use 2179 * the interface pointers on the mbuf to figure it out. 2180 */ 2181 #ifdef BPF_JITTER 2182 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2183 if (bf != NULL) 2184 slen = (*(bf->func))(pkt, pktlen, pktlen); 2185 else 2186 #endif 2187 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 2188 if (slen != 0) { 2189 /* 2190 * Filter matches. Let's to acquire write lock. 2191 */ 2192 BPFD_LOCK(d); 2193 2194 counter_u64_add(d->bd_fcount, 1); 2195 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2196 gottime = bpf_gettime(&bt, d->bd_tstamp, NULL); 2197 #ifdef MAC 2198 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2199 #endif 2200 catchpacket(d, pkt, pktlen, slen, 2201 bpf_append_bytes, &bt); 2202 BPFD_UNLOCK(d); 2203 } 2204 } 2205 BPFIF_RUNLOCK(bp); 2206 } 2207 2208 #define BPF_CHECK_DIRECTION(d, r, i) \ 2209 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 2210 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 2211 2212 /* 2213 * Incoming linkage from device drivers, when packet is in an mbuf chain. 2214 * Locking model is explained in bpf_tap(). 2215 */ 2216 void 2217 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2218 { 2219 struct bintime bt; 2220 struct bpf_d *d; 2221 #ifdef BPF_JITTER 2222 bpf_jit_filter *bf; 2223 #endif 2224 u_int pktlen, slen; 2225 int gottime; 2226 2227 /* Skip outgoing duplicate packets. */ 2228 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2229 m->m_flags &= ~M_PROMISC; 2230 return; 2231 } 2232 2233 pktlen = m_length(m, NULL); 2234 gottime = BPF_TSTAMP_NONE; 2235 2236 BPFIF_RLOCK(bp); 2237 2238 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2239 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2240 continue; 2241 counter_u64_add(d->bd_rcount, 1); 2242 #ifdef BPF_JITTER 2243 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2244 /* XXX We cannot handle multiple mbufs. */ 2245 if (bf != NULL && m->m_next == NULL) 2246 slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen); 2247 else 2248 #endif 2249 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 2250 if (slen != 0) { 2251 BPFD_LOCK(d); 2252 2253 counter_u64_add(d->bd_fcount, 1); 2254 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2255 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2256 #ifdef MAC 2257 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2258 #endif 2259 catchpacket(d, (u_char *)m, pktlen, slen, 2260 bpf_append_mbuf, &bt); 2261 BPFD_UNLOCK(d); 2262 } 2263 } 2264 BPFIF_RUNLOCK(bp); 2265 } 2266 2267 /* 2268 * Incoming linkage from device drivers, when packet is in 2269 * an mbuf chain and to be prepended by a contiguous header. 2270 */ 2271 void 2272 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 2273 { 2274 struct bintime bt; 2275 struct mbuf mb; 2276 struct bpf_d *d; 2277 u_int pktlen, slen; 2278 int gottime; 2279 2280 /* Skip outgoing duplicate packets. */ 2281 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2282 m->m_flags &= ~M_PROMISC; 2283 return; 2284 } 2285 2286 pktlen = m_length(m, NULL); 2287 /* 2288 * Craft on-stack mbuf suitable for passing to bpf_filter. 2289 * Note that we cut corners here; we only setup what's 2290 * absolutely needed--this mbuf should never go anywhere else. 2291 */ 2292 mb.m_next = m; 2293 mb.m_data = data; 2294 mb.m_len = dlen; 2295 pktlen += dlen; 2296 2297 gottime = BPF_TSTAMP_NONE; 2298 2299 BPFIF_RLOCK(bp); 2300 2301 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2302 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2303 continue; 2304 counter_u64_add(d->bd_rcount, 1); 2305 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 2306 if (slen != 0) { 2307 BPFD_LOCK(d); 2308 2309 counter_u64_add(d->bd_fcount, 1); 2310 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2311 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2312 #ifdef MAC 2313 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2314 #endif 2315 catchpacket(d, (u_char *)&mb, pktlen, slen, 2316 bpf_append_mbuf, &bt); 2317 BPFD_UNLOCK(d); 2318 } 2319 } 2320 BPFIF_RUNLOCK(bp); 2321 } 2322 2323 #undef BPF_CHECK_DIRECTION 2324 2325 #undef BPF_TSTAMP_NONE 2326 #undef BPF_TSTAMP_FAST 2327 #undef BPF_TSTAMP_NORMAL 2328 #undef BPF_TSTAMP_EXTERN 2329 2330 static int 2331 bpf_hdrlen(struct bpf_d *d) 2332 { 2333 int hdrlen; 2334 2335 hdrlen = d->bd_bif->bif_hdrlen; 2336 #ifndef BURN_BRIDGES 2337 if (d->bd_tstamp == BPF_T_NONE || 2338 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) 2339 #ifdef COMPAT_FREEBSD32 2340 if (d->bd_compat32) 2341 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); 2342 else 2343 #endif 2344 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); 2345 else 2346 #endif 2347 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); 2348 #ifdef COMPAT_FREEBSD32 2349 if (d->bd_compat32) 2350 hdrlen = BPF_WORDALIGN32(hdrlen); 2351 else 2352 #endif 2353 hdrlen = BPF_WORDALIGN(hdrlen); 2354 2355 return (hdrlen - d->bd_bif->bif_hdrlen); 2356 } 2357 2358 static void 2359 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) 2360 { 2361 struct bintime bt2, boottimebin; 2362 struct timeval tsm; 2363 struct timespec tsn; 2364 2365 if ((tstype & BPF_T_MONOTONIC) == 0) { 2366 bt2 = *bt; 2367 getboottimebin(&boottimebin); 2368 bintime_add(&bt2, &boottimebin); 2369 bt = &bt2; 2370 } 2371 switch (BPF_T_FORMAT(tstype)) { 2372 case BPF_T_MICROTIME: 2373 bintime2timeval(bt, &tsm); 2374 ts->bt_sec = tsm.tv_sec; 2375 ts->bt_frac = tsm.tv_usec; 2376 break; 2377 case BPF_T_NANOTIME: 2378 bintime2timespec(bt, &tsn); 2379 ts->bt_sec = tsn.tv_sec; 2380 ts->bt_frac = tsn.tv_nsec; 2381 break; 2382 case BPF_T_BINTIME: 2383 ts->bt_sec = bt->sec; 2384 ts->bt_frac = bt->frac; 2385 break; 2386 } 2387 } 2388 2389 /* 2390 * Move the packet data from interface memory (pkt) into the 2391 * store buffer. "cpfn" is the routine called to do the actual data 2392 * transfer. bcopy is passed in to copy contiguous chunks, while 2393 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 2394 * pkt is really an mbuf. 2395 */ 2396 static void 2397 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 2398 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 2399 struct bintime *bt) 2400 { 2401 struct bpf_xhdr hdr; 2402 #ifndef BURN_BRIDGES 2403 struct bpf_hdr hdr_old; 2404 #ifdef COMPAT_FREEBSD32 2405 struct bpf_hdr32 hdr32_old; 2406 #endif 2407 #endif 2408 int caplen, curlen, hdrlen, totlen; 2409 int do_wakeup = 0; 2410 int do_timestamp; 2411 int tstype; 2412 2413 BPFD_LOCK_ASSERT(d); 2414 2415 /* 2416 * Detect whether user space has released a buffer back to us, and if 2417 * so, move it from being a hold buffer to a free buffer. This may 2418 * not be the best place to do it (for example, we might only want to 2419 * run this check if we need the space), but for now it's a reliable 2420 * spot to do it. 2421 */ 2422 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 2423 d->bd_fbuf = d->bd_hbuf; 2424 d->bd_hbuf = NULL; 2425 d->bd_hlen = 0; 2426 bpf_buf_reclaimed(d); 2427 } 2428 2429 /* 2430 * Figure out how many bytes to move. If the packet is 2431 * greater or equal to the snapshot length, transfer that 2432 * much. Otherwise, transfer the whole packet (unless 2433 * we hit the buffer size limit). 2434 */ 2435 hdrlen = bpf_hdrlen(d); 2436 totlen = hdrlen + min(snaplen, pktlen); 2437 if (totlen > d->bd_bufsize) 2438 totlen = d->bd_bufsize; 2439 2440 /* 2441 * Round up the end of the previous packet to the next longword. 2442 * 2443 * Drop the packet if there's no room and no hope of room 2444 * If the packet would overflow the storage buffer or the storage 2445 * buffer is considered immutable by the buffer model, try to rotate 2446 * the buffer and wakeup pending processes. 2447 */ 2448 #ifdef COMPAT_FREEBSD32 2449 if (d->bd_compat32) 2450 curlen = BPF_WORDALIGN32(d->bd_slen); 2451 else 2452 #endif 2453 curlen = BPF_WORDALIGN(d->bd_slen); 2454 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 2455 if (d->bd_fbuf == NULL) { 2456 /* 2457 * There's no room in the store buffer, and no 2458 * prospect of room, so drop the packet. Notify the 2459 * buffer model. 2460 */ 2461 bpf_buffull(d); 2462 counter_u64_add(d->bd_dcount, 1); 2463 return; 2464 } 2465 KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use")); 2466 ROTATE_BUFFERS(d); 2467 do_wakeup = 1; 2468 curlen = 0; 2469 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 2470 /* 2471 * Immediate mode is set, or the read timeout has already 2472 * expired during a select call. A packet arrived, so the 2473 * reader should be woken up. 2474 */ 2475 do_wakeup = 1; 2476 caplen = totlen - hdrlen; 2477 tstype = d->bd_tstamp; 2478 do_timestamp = tstype != BPF_T_NONE; 2479 #ifndef BURN_BRIDGES 2480 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { 2481 struct bpf_ts ts; 2482 if (do_timestamp) 2483 bpf_bintime2ts(bt, &ts, tstype); 2484 #ifdef COMPAT_FREEBSD32 2485 if (d->bd_compat32) { 2486 bzero(&hdr32_old, sizeof(hdr32_old)); 2487 if (do_timestamp) { 2488 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; 2489 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; 2490 } 2491 hdr32_old.bh_datalen = pktlen; 2492 hdr32_old.bh_hdrlen = hdrlen; 2493 hdr32_old.bh_caplen = caplen; 2494 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, 2495 sizeof(hdr32_old)); 2496 goto copy; 2497 } 2498 #endif 2499 bzero(&hdr_old, sizeof(hdr_old)); 2500 if (do_timestamp) { 2501 hdr_old.bh_tstamp.tv_sec = ts.bt_sec; 2502 hdr_old.bh_tstamp.tv_usec = ts.bt_frac; 2503 } 2504 hdr_old.bh_datalen = pktlen; 2505 hdr_old.bh_hdrlen = hdrlen; 2506 hdr_old.bh_caplen = caplen; 2507 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, 2508 sizeof(hdr_old)); 2509 goto copy; 2510 } 2511 #endif 2512 2513 /* 2514 * Append the bpf header. Note we append the actual header size, but 2515 * move forward the length of the header plus padding. 2516 */ 2517 bzero(&hdr, sizeof(hdr)); 2518 if (do_timestamp) 2519 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); 2520 hdr.bh_datalen = pktlen; 2521 hdr.bh_hdrlen = hdrlen; 2522 hdr.bh_caplen = caplen; 2523 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 2524 2525 /* 2526 * Copy the packet data into the store buffer and update its length. 2527 */ 2528 #ifndef BURN_BRIDGES 2529 copy: 2530 #endif 2531 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); 2532 d->bd_slen = curlen + totlen; 2533 2534 if (do_wakeup) 2535 bpf_wakeup(d); 2536 } 2537 2538 /* 2539 * Free buffers currently in use by a descriptor. 2540 * Called on close. 2541 */ 2542 static void 2543 bpf_freed(struct bpf_d *d) 2544 { 2545 2546 /* 2547 * We don't need to lock out interrupts since this descriptor has 2548 * been detached from its interface and it yet hasn't been marked 2549 * free. 2550 */ 2551 bpf_free(d); 2552 if (d->bd_rfilter != NULL) { 2553 free((caddr_t)d->bd_rfilter, M_BPF); 2554 #ifdef BPF_JITTER 2555 if (d->bd_bfilter != NULL) 2556 bpf_destroy_jit_filter(d->bd_bfilter); 2557 #endif 2558 } 2559 if (d->bd_wfilter != NULL) 2560 free((caddr_t)d->bd_wfilter, M_BPF); 2561 mtx_destroy(&d->bd_lock); 2562 2563 counter_u64_free(d->bd_rcount); 2564 counter_u64_free(d->bd_dcount); 2565 counter_u64_free(d->bd_fcount); 2566 counter_u64_free(d->bd_wcount); 2567 counter_u64_free(d->bd_wfcount); 2568 counter_u64_free(d->bd_wdcount); 2569 counter_u64_free(d->bd_zcopy); 2570 2571 } 2572 2573 /* 2574 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 2575 * fixed size of the link header (variable length headers not yet supported). 2576 */ 2577 void 2578 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2579 { 2580 2581 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2582 } 2583 2584 /* 2585 * Attach an interface to bpf. ifp is a pointer to the structure 2586 * defining the interface to be attached, dlt is the link layer type, 2587 * and hdrlen is the fixed size of the link header (variable length 2588 * headers are not yet supporrted). 2589 */ 2590 void 2591 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2592 { 2593 struct bpf_if *bp; 2594 2595 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 2596 if (bp == NULL) 2597 panic("bpfattach"); 2598 2599 LIST_INIT(&bp->bif_dlist); 2600 LIST_INIT(&bp->bif_wlist); 2601 bp->bif_ifp = ifp; 2602 bp->bif_dlt = dlt; 2603 rw_init(&bp->bif_lock, "bpf interface lock"); 2604 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 2605 bp->bif_bpf = driverp; 2606 *driverp = bp; 2607 2608 BPF_LOCK(); 2609 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 2610 BPF_UNLOCK(); 2611 2612 bp->bif_hdrlen = hdrlen; 2613 2614 if (bootverbose && IS_DEFAULT_VNET(curvnet)) 2615 if_printf(ifp, "bpf attached\n"); 2616 } 2617 2618 #ifdef VIMAGE 2619 /* 2620 * When moving interfaces between vnet instances we need a way to 2621 * query the dlt and hdrlen before detach so we can re-attch the if_bpf 2622 * after the vmove. We unfortunately have no device driver infrastructure 2623 * to query the interface for these values after creation/attach, thus 2624 * add this as a workaround. 2625 */ 2626 int 2627 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen) 2628 { 2629 2630 if (bp == NULL) 2631 return (ENXIO); 2632 if (bif_dlt == NULL && bif_hdrlen == NULL) 2633 return (0); 2634 2635 if (bif_dlt != NULL) 2636 *bif_dlt = bp->bif_dlt; 2637 if (bif_hdrlen != NULL) 2638 *bif_hdrlen = bp->bif_hdrlen; 2639 2640 return (0); 2641 } 2642 #endif 2643 2644 /* 2645 * Detach bpf from an interface. This involves detaching each descriptor 2646 * associated with the interface. Notify each descriptor as it's detached 2647 * so that any sleepers wake up and get ENXIO. 2648 */ 2649 void 2650 bpfdetach(struct ifnet *ifp) 2651 { 2652 struct bpf_if *bp, *bp_temp; 2653 struct bpf_d *d; 2654 int ndetached; 2655 2656 ndetached = 0; 2657 2658 BPF_LOCK(); 2659 /* Find all bpf_if struct's which reference ifp and detach them. */ 2660 LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) { 2661 if (ifp != bp->bif_ifp) 2662 continue; 2663 2664 LIST_REMOVE(bp, bif_next); 2665 /* Add to to-be-freed list */ 2666 LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next); 2667 2668 ndetached++; 2669 /* 2670 * Delay freeing bp till interface is detached 2671 * and all routes through this interface are removed. 2672 * Mark bp as detached to restrict new consumers. 2673 */ 2674 BPFIF_WLOCK(bp); 2675 bp->bif_flags |= BPFIF_FLAG_DYING; 2676 *bp->bif_bpf = (struct bpf_if *)&dead_bpf_if; 2677 BPFIF_WUNLOCK(bp); 2678 2679 CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p", 2680 __func__, bp->bif_dlt, bp, ifp); 2681 2682 /* Free common descriptors */ 2683 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 2684 bpf_detachd_locked(d); 2685 BPFD_LOCK(d); 2686 bpf_wakeup(d); 2687 BPFD_UNLOCK(d); 2688 } 2689 2690 /* Free writer-only descriptors */ 2691 while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) { 2692 bpf_detachd_locked(d); 2693 BPFD_LOCK(d); 2694 bpf_wakeup(d); 2695 BPFD_UNLOCK(d); 2696 } 2697 } 2698 BPF_UNLOCK(); 2699 2700 #ifdef INVARIANTS 2701 if (ndetached == 0) 2702 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 2703 #endif 2704 } 2705 2706 /* 2707 * Interface departure handler. 2708 * Note departure event does not guarantee interface is going down. 2709 * Interface renaming is currently done via departure/arrival event set. 2710 * 2711 * Departure handled is called after all routes pointing to 2712 * given interface are removed and interface is in down state 2713 * restricting any packets to be sent/received. We assume it is now safe 2714 * to free data allocated by BPF. 2715 */ 2716 static void 2717 bpf_ifdetach(void *arg __unused, struct ifnet *ifp) 2718 { 2719 struct bpf_if *bp, *bp_temp; 2720 int nmatched = 0; 2721 2722 /* Ignore ifnet renaming. */ 2723 if (ifp->if_flags & IFF_RENAMING) 2724 return; 2725 2726 BPF_LOCK(); 2727 /* 2728 * Find matching entries in free list. 2729 * Nothing should be found if bpfdetach() was not called. 2730 */ 2731 LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) { 2732 if (ifp != bp->bif_ifp) 2733 continue; 2734 2735 CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p", 2736 __func__, bp, ifp); 2737 2738 LIST_REMOVE(bp, bif_next); 2739 2740 rw_destroy(&bp->bif_lock); 2741 free(bp, M_BPF); 2742 2743 nmatched++; 2744 } 2745 BPF_UNLOCK(); 2746 } 2747 2748 /* 2749 * Get a list of available data link type of the interface. 2750 */ 2751 static int 2752 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2753 { 2754 struct ifnet *ifp; 2755 struct bpf_if *bp; 2756 u_int *lst; 2757 int error, n, n1; 2758 2759 BPF_LOCK_ASSERT(); 2760 2761 ifp = d->bd_bif->bif_ifp; 2762 again: 2763 n1 = 0; 2764 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2765 if (bp->bif_ifp == ifp) 2766 n1++; 2767 } 2768 if (bfl->bfl_list == NULL) { 2769 bfl->bfl_len = n1; 2770 return (0); 2771 } 2772 if (n1 > bfl->bfl_len) 2773 return (ENOMEM); 2774 BPF_UNLOCK(); 2775 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); 2776 n = 0; 2777 BPF_LOCK(); 2778 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2779 if (bp->bif_ifp != ifp) 2780 continue; 2781 if (n >= n1) { 2782 free(lst, M_TEMP); 2783 goto again; 2784 } 2785 lst[n] = bp->bif_dlt; 2786 n++; 2787 } 2788 BPF_UNLOCK(); 2789 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); 2790 free(lst, M_TEMP); 2791 BPF_LOCK(); 2792 bfl->bfl_len = n; 2793 return (error); 2794 } 2795 2796 /* 2797 * Set the data link type of a BPF instance. 2798 */ 2799 static int 2800 bpf_setdlt(struct bpf_d *d, u_int dlt) 2801 { 2802 int error, opromisc; 2803 struct ifnet *ifp; 2804 struct bpf_if *bp; 2805 2806 BPF_LOCK_ASSERT(); 2807 2808 if (d->bd_bif->bif_dlt == dlt) 2809 return (0); 2810 ifp = d->bd_bif->bif_ifp; 2811 2812 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2813 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 2814 break; 2815 } 2816 2817 if (bp != NULL) { 2818 opromisc = d->bd_promisc; 2819 bpf_attachd(d, bp); 2820 BPFD_LOCK(d); 2821 reset_d(d); 2822 BPFD_UNLOCK(d); 2823 if (opromisc) { 2824 error = ifpromisc(bp->bif_ifp, 1); 2825 if (error) 2826 if_printf(bp->bif_ifp, 2827 "bpf_setdlt: ifpromisc failed (%d)\n", 2828 error); 2829 else 2830 d->bd_promisc = 1; 2831 } 2832 } 2833 return (bp == NULL ? EINVAL : 0); 2834 } 2835 2836 static void 2837 bpf_drvinit(void *unused) 2838 { 2839 struct cdev *dev; 2840 2841 sx_init(&bpf_sx, "bpf global lock"); 2842 LIST_INIT(&bpf_iflist); 2843 LIST_INIT(&bpf_freelist); 2844 2845 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); 2846 /* For compatibility */ 2847 make_dev_alias(dev, "bpf0"); 2848 2849 /* Register interface departure handler */ 2850 bpf_ifdetach_cookie = EVENTHANDLER_REGISTER( 2851 ifnet_departure_event, bpf_ifdetach, NULL, 2852 EVENTHANDLER_PRI_ANY); 2853 } 2854 2855 /* 2856 * Zero out the various packet counters associated with all of the bpf 2857 * descriptors. At some point, we will probably want to get a bit more 2858 * granular and allow the user to specify descriptors to be zeroed. 2859 */ 2860 static void 2861 bpf_zero_counters(void) 2862 { 2863 struct bpf_if *bp; 2864 struct bpf_d *bd; 2865 2866 BPF_LOCK(); 2867 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2868 BPFIF_RLOCK(bp); 2869 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2870 BPFD_LOCK(bd); 2871 counter_u64_zero(bd->bd_rcount); 2872 counter_u64_zero(bd->bd_dcount); 2873 counter_u64_zero(bd->bd_fcount); 2874 counter_u64_zero(bd->bd_wcount); 2875 counter_u64_zero(bd->bd_wfcount); 2876 counter_u64_zero(bd->bd_zcopy); 2877 BPFD_UNLOCK(bd); 2878 } 2879 BPFIF_RUNLOCK(bp); 2880 } 2881 BPF_UNLOCK(); 2882 } 2883 2884 /* 2885 * Fill filter statistics 2886 */ 2887 static void 2888 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 2889 { 2890 2891 bzero(d, sizeof(*d)); 2892 BPFD_LOCK_ASSERT(bd); 2893 d->bd_structsize = sizeof(*d); 2894 /* XXX: reading should be protected by global lock */ 2895 d->bd_immediate = bd->bd_immediate; 2896 d->bd_promisc = bd->bd_promisc; 2897 d->bd_hdrcmplt = bd->bd_hdrcmplt; 2898 d->bd_direction = bd->bd_direction; 2899 d->bd_feedback = bd->bd_feedback; 2900 d->bd_async = bd->bd_async; 2901 d->bd_rcount = counter_u64_fetch(bd->bd_rcount); 2902 d->bd_dcount = counter_u64_fetch(bd->bd_dcount); 2903 d->bd_fcount = counter_u64_fetch(bd->bd_fcount); 2904 d->bd_sig = bd->bd_sig; 2905 d->bd_slen = bd->bd_slen; 2906 d->bd_hlen = bd->bd_hlen; 2907 d->bd_bufsize = bd->bd_bufsize; 2908 d->bd_pid = bd->bd_pid; 2909 strlcpy(d->bd_ifname, 2910 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2911 d->bd_locked = bd->bd_locked; 2912 d->bd_wcount = counter_u64_fetch(bd->bd_wcount); 2913 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); 2914 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); 2915 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); 2916 d->bd_bufmode = bd->bd_bufmode; 2917 } 2918 2919 /* 2920 * Handle `netstat -B' stats request 2921 */ 2922 static int 2923 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2924 { 2925 static const struct xbpf_d zerostats; 2926 struct xbpf_d *xbdbuf, *xbd, tempstats; 2927 int index, error; 2928 struct bpf_if *bp; 2929 struct bpf_d *bd; 2930 2931 /* 2932 * XXX This is not technically correct. It is possible for non 2933 * privileged users to open bpf devices. It would make sense 2934 * if the users who opened the devices were able to retrieve 2935 * the statistics for them, too. 2936 */ 2937 error = priv_check(req->td, PRIV_NET_BPF); 2938 if (error) 2939 return (error); 2940 /* 2941 * Check to see if the user is requesting that the counters be 2942 * zeroed out. Explicitly check that the supplied data is zeroed, 2943 * as we aren't allowing the user to set the counters currently. 2944 */ 2945 if (req->newptr != NULL) { 2946 if (req->newlen != sizeof(tempstats)) 2947 return (EINVAL); 2948 memset(&tempstats, 0, sizeof(tempstats)); 2949 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); 2950 if (error) 2951 return (error); 2952 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) 2953 return (EINVAL); 2954 bpf_zero_counters(); 2955 return (0); 2956 } 2957 if (req->oldptr == NULL) 2958 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2959 if (bpf_bpfd_cnt == 0) 2960 return (SYSCTL_OUT(req, 0, 0)); 2961 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2962 BPF_LOCK(); 2963 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2964 BPF_UNLOCK(); 2965 free(xbdbuf, M_BPF); 2966 return (ENOMEM); 2967 } 2968 index = 0; 2969 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2970 BPFIF_RLOCK(bp); 2971 /* Send writers-only first */ 2972 LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { 2973 xbd = &xbdbuf[index++]; 2974 BPFD_LOCK(bd); 2975 bpfstats_fill_xbpf(xbd, bd); 2976 BPFD_UNLOCK(bd); 2977 } 2978 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2979 xbd = &xbdbuf[index++]; 2980 BPFD_LOCK(bd); 2981 bpfstats_fill_xbpf(xbd, bd); 2982 BPFD_UNLOCK(bd); 2983 } 2984 BPFIF_RUNLOCK(bp); 2985 } 2986 BPF_UNLOCK(); 2987 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2988 free(xbdbuf, M_BPF); 2989 return (error); 2990 } 2991 2992 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2993 2994 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2995 2996 /* 2997 * NOP stubs to allow bpf-using drivers to load and function. 2998 * 2999 * A 'better' implementation would allow the core bpf functionality 3000 * to be loaded at runtime. 3001 */ 3002 3003 void 3004 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 3005 { 3006 } 3007 3008 void 3009 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 3010 { 3011 } 3012 3013 void 3014 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 3015 { 3016 } 3017 3018 void 3019 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 3020 { 3021 3022 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 3023 } 3024 3025 void 3026 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 3027 { 3028 3029 *driverp = (struct bpf_if *)&dead_bpf_if; 3030 } 3031 3032 void 3033 bpfdetach(struct ifnet *ifp) 3034 { 3035 } 3036 3037 u_int 3038 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 3039 { 3040 return -1; /* "no filter" behaviour */ 3041 } 3042 3043 int 3044 bpf_validate(const struct bpf_insn *f, int len) 3045 { 3046 return 0; /* false */ 3047 } 3048 3049 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 3050 3051 #ifdef DDB 3052 static void 3053 bpf_show_bpf_if(struct bpf_if *bpf_if) 3054 { 3055 3056 if (bpf_if == NULL) 3057 return; 3058 db_printf("%p:\n", bpf_if); 3059 #define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e); 3060 /* bif_ext.bif_next */ 3061 /* bif_ext.bif_dlist */ 3062 BPF_DB_PRINTF("%#x", bif_dlt); 3063 BPF_DB_PRINTF("%u", bif_hdrlen); 3064 BPF_DB_PRINTF("%p", bif_ifp); 3065 /* bif_lock */ 3066 /* bif_wlist */ 3067 BPF_DB_PRINTF("%#x", bif_flags); 3068 } 3069 3070 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if) 3071 { 3072 3073 if (!have_addr) { 3074 db_printf("usage: show bpf_if <struct bpf_if *>\n"); 3075 return; 3076 } 3077 3078 bpf_show_bpf_if((struct bpf_if *)addr); 3079 } 3080 #endif 3081