1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * This code is derived from the Stanford/CMU enet packet filter, 9 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 10 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 11 * Berkeley Laboratory. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 #include "opt_bpf.h" 40 #include "opt_ddb.h" 41 #include "opt_netgraph.h" 42 43 #include <sys/param.h> 44 #include <sys/conf.h> 45 #include <sys/eventhandler.h> 46 #include <sys/fcntl.h> 47 #include <sys/jail.h> 48 #include <sys/ktr.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mbuf.h> 52 #include <sys/mutex.h> 53 #include <sys/time.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/signalvar.h> 57 #include <sys/filio.h> 58 #include <sys/sockio.h> 59 #include <sys/ttycom.h> 60 #include <sys/uio.h> 61 #include <sys/sysent.h> 62 #include <sys/systm.h> 63 64 #include <sys/event.h> 65 #include <sys/file.h> 66 #include <sys/poll.h> 67 #include <sys/proc.h> 68 69 #include <sys/socket.h> 70 71 #ifdef DDB 72 #include <ddb/ddb.h> 73 #endif 74 75 #include <net/if.h> 76 #include <net/if_var.h> 77 #include <net/if_private.h> 78 #include <net/if_vlan_var.h> 79 #include <net/if_dl.h> 80 #include <net/bpf.h> 81 #include <net/bpf_buffer.h> 82 #ifdef BPF_JITTER 83 #include <net/bpf_jitter.h> 84 #endif 85 #include <net/bpf_zerocopy.h> 86 #include <net/bpfdesc.h> 87 #include <net/route.h> 88 #include <net/vnet.h> 89 90 #include <netinet/in.h> 91 #include <netinet/if_ether.h> 92 #include <sys/kernel.h> 93 #include <sys/sysctl.h> 94 95 #include <net80211/ieee80211_freebsd.h> 96 97 #include <security/mac/mac_framework.h> 98 99 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 100 101 static const struct bpf_if_ext dead_bpf_if = { 102 .bif_dlist = CK_LIST_HEAD_INITIALIZER() 103 }; 104 105 struct bpf_if { 106 #define bif_next bif_ext.bif_next 107 #define bif_dlist bif_ext.bif_dlist 108 struct bpf_if_ext bif_ext; /* public members */ 109 u_int bif_dlt; /* link layer type */ 110 u_int bif_hdrlen; /* length of link header */ 111 struct bpfd_list bif_wlist; /* writer-only list */ 112 struct ifnet *bif_ifp; /* corresponding interface */ 113 struct bpf_if **bif_bpf; /* Pointer to pointer to us */ 114 volatile u_int bif_refcnt; 115 struct epoch_context epoch_ctx; 116 }; 117 118 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); 119 120 struct bpf_program_buffer { 121 struct epoch_context epoch_ctx; 122 #ifdef BPF_JITTER 123 bpf_jit_filter *func; 124 #endif 125 void *buffer[0]; 126 }; 127 128 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 129 130 #define PRINET 26 /* interruptible */ 131 #define BPF_PRIO_MAX 7 132 133 #define SIZEOF_BPF_HDR(type) \ 134 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) 135 136 #ifdef COMPAT_FREEBSD32 137 #include <sys/mount.h> 138 #include <compat/freebsd32/freebsd32.h> 139 #define BPF_ALIGNMENT32 sizeof(int32_t) 140 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) 141 142 #ifndef BURN_BRIDGES 143 /* 144 * 32-bit version of structure prepended to each packet. We use this header 145 * instead of the standard one for 32-bit streams. We mark the a stream as 146 * 32-bit the first time we see a 32-bit compat ioctl request. 147 */ 148 struct bpf_hdr32 { 149 struct timeval32 bh_tstamp; /* time stamp */ 150 uint32_t bh_caplen; /* length of captured portion */ 151 uint32_t bh_datalen; /* original length of packet */ 152 uint16_t bh_hdrlen; /* length of bpf header (this struct 153 plus alignment padding) */ 154 }; 155 #endif 156 157 struct bpf_program32 { 158 u_int bf_len; 159 uint32_t bf_insns; 160 }; 161 162 struct bpf_dltlist32 { 163 u_int bfl_len; 164 u_int bfl_list; 165 }; 166 167 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) 168 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) 169 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) 170 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) 171 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) 172 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) 173 #endif 174 175 #define BPF_LOCK() sx_xlock(&bpf_sx) 176 #define BPF_UNLOCK() sx_xunlock(&bpf_sx) 177 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) 178 /* 179 * bpf_iflist is a list of BPF interface structures, each corresponding to a 180 * specific DLT. The same network interface might have several BPF interface 181 * structures registered by different layers in the stack (i.e., 802.11 182 * frames, ethernet frames, etc). 183 */ 184 CK_LIST_HEAD(bpf_iflist, bpf_if); 185 static struct bpf_iflist bpf_iflist = CK_LIST_HEAD_INITIALIZER(); 186 static struct sx bpf_sx; /* bpf global lock */ 187 static int bpf_bpfd_cnt; 188 189 static void bpfif_ref(struct bpf_if *); 190 static void bpfif_rele(struct bpf_if *); 191 192 static void bpfd_ref(struct bpf_d *); 193 static void bpfd_rele(struct bpf_d *); 194 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 195 static void bpf_detachd(struct bpf_d *); 196 static void bpf_detachd_locked(struct bpf_d *, bool); 197 static void bpfd_free(epoch_context_t); 198 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 199 struct sockaddr *, int *, struct bpf_d *); 200 static int bpf_setif(struct bpf_d *, struct ifreq *); 201 static void bpf_timed_out(void *); 202 static __inline void 203 bpf_wakeup(struct bpf_d *); 204 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 205 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 206 struct bintime *); 207 static void reset_d(struct bpf_d *); 208 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 209 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 210 static int bpf_setdlt(struct bpf_d *, u_int); 211 static void filt_bpfdetach(struct knote *); 212 static int filt_bpfread(struct knote *, long); 213 static int filt_bpfwrite(struct knote *, long); 214 static void bpf_drvinit(void *); 215 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 216 217 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 218 "bpf sysctl"); 219 int bpf_maxinsns = BPF_MAXINSNS; 220 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 221 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 222 static int bpf_zerocopy_enable = 0; 223 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 224 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 225 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, 226 bpf_stats_sysctl, "bpf statistics portal"); 227 228 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; 229 #define V_bpf_optimize_writers VNET(bpf_optimize_writers) 230 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN, 231 &VNET_NAME(bpf_optimize_writers), 0, 232 "Do not send packets until BPF program is set"); 233 234 static d_open_t bpfopen; 235 static d_read_t bpfread; 236 static d_write_t bpfwrite; 237 static d_ioctl_t bpfioctl; 238 static d_poll_t bpfpoll; 239 static d_kqfilter_t bpfkqfilter; 240 241 static struct cdevsw bpf_cdevsw = { 242 .d_version = D_VERSION, 243 .d_open = bpfopen, 244 .d_read = bpfread, 245 .d_write = bpfwrite, 246 .d_ioctl = bpfioctl, 247 .d_poll = bpfpoll, 248 .d_name = "bpf", 249 .d_kqfilter = bpfkqfilter, 250 }; 251 252 static const struct filterops bpfread_filtops = { 253 .f_isfd = 1, 254 .f_detach = filt_bpfdetach, 255 .f_event = filt_bpfread, 256 .f_copy = knote_triv_copy, 257 }; 258 259 static const struct filterops bpfwrite_filtops = { 260 .f_isfd = 1, 261 .f_detach = filt_bpfdetach, 262 .f_event = filt_bpfwrite, 263 .f_copy = knote_triv_copy, 264 }; 265 266 /* 267 * LOCKING MODEL USED BY BPF 268 * 269 * Locks: 270 * 1) global lock (BPF_LOCK). Sx, used to protect some global counters, 271 * every bpf_iflist changes, serializes ioctl access to bpf descriptors. 272 * 2) Descriptor lock. Mutex, used to protect BPF buffers and various 273 * structure fields used by bpf_*tap* code. 274 * 275 * Lock order: global lock, then descriptor lock. 276 * 277 * There are several possible consumers: 278 * 279 * 1. The kernel registers interface pointer with bpfattach(). 280 * Each call allocates new bpf_if structure, references ifnet pointer 281 * and links bpf_if into bpf_iflist chain. This is protected with global 282 * lock. 283 * 284 * 2. An userland application uses ioctl() call to bpf_d descriptor. 285 * All such call are serialized with global lock. BPF filters can be 286 * changed, but pointer to old filter will be freed using NET_EPOCH_CALL(). 287 * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to 288 * filter pointers, even if change will happen during bpf_tap execution. 289 * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL(). 290 * 291 * 3. An userland application can write packets into bpf_d descriptor. 292 * There we need to be sure, that ifnet won't disappear during bpfwrite(). 293 * 294 * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to 295 * bif_dlist is protected with net_epoch_preempt section. So, it should 296 * be safe to make access to bpf_d descriptor inside the section. 297 * 298 * 5. The kernel invokes bpfdetach() on interface destroying. All lists 299 * are modified with global lock held and actual free() is done using 300 * NET_EPOCH_CALL(). 301 */ 302 303 static void 304 bpfif_free(epoch_context_t ctx) 305 { 306 struct bpf_if *bp; 307 308 bp = __containerof(ctx, struct bpf_if, epoch_ctx); 309 if_rele(bp->bif_ifp); 310 free(bp, M_BPF); 311 } 312 313 static void 314 bpfif_ref(struct bpf_if *bp) 315 { 316 317 refcount_acquire(&bp->bif_refcnt); 318 } 319 320 static void 321 bpfif_rele(struct bpf_if *bp) 322 { 323 324 if (!refcount_release(&bp->bif_refcnt)) 325 return; 326 NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx); 327 } 328 329 static void 330 bpfd_ref(struct bpf_d *d) 331 { 332 333 refcount_acquire(&d->bd_refcnt); 334 } 335 336 static void 337 bpfd_rele(struct bpf_d *d) 338 { 339 340 if (!refcount_release(&d->bd_refcnt)) 341 return; 342 NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx); 343 } 344 345 static struct bpf_program_buffer* 346 bpf_program_buffer_alloc(size_t size, int flags) 347 { 348 349 return (malloc(sizeof(struct bpf_program_buffer) + size, 350 M_BPF, flags)); 351 } 352 353 static void 354 bpf_program_buffer_free(epoch_context_t ctx) 355 { 356 struct bpf_program_buffer *ptr; 357 358 ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx); 359 #ifdef BPF_JITTER 360 if (ptr->func != NULL) 361 bpf_destroy_jit_filter(ptr->func); 362 #endif 363 free(ptr, M_BPF); 364 } 365 366 /* 367 * Wrapper functions for various buffering methods. If the set of buffer 368 * modes expands, we will probably want to introduce a switch data structure 369 * similar to protosw, et. 370 */ 371 static void 372 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 373 u_int len) 374 { 375 376 BPFD_LOCK_ASSERT(d); 377 378 switch (d->bd_bufmode) { 379 case BPF_BUFMODE_BUFFER: 380 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 381 382 case BPF_BUFMODE_ZBUF: 383 counter_u64_add(d->bd_zcopy, 1); 384 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 385 386 default: 387 panic("bpf_buf_append_bytes"); 388 } 389 } 390 391 static void 392 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 393 u_int len) 394 { 395 396 BPFD_LOCK_ASSERT(d); 397 398 switch (d->bd_bufmode) { 399 case BPF_BUFMODE_BUFFER: 400 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 401 402 case BPF_BUFMODE_ZBUF: 403 counter_u64_add(d->bd_zcopy, 1); 404 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 405 406 default: 407 panic("bpf_buf_append_mbuf"); 408 } 409 } 410 411 /* 412 * This function gets called when the free buffer is re-assigned. 413 */ 414 static void 415 bpf_buf_reclaimed(struct bpf_d *d) 416 { 417 418 BPFD_LOCK_ASSERT(d); 419 420 switch (d->bd_bufmode) { 421 case BPF_BUFMODE_BUFFER: 422 return; 423 424 case BPF_BUFMODE_ZBUF: 425 bpf_zerocopy_buf_reclaimed(d); 426 return; 427 428 default: 429 panic("bpf_buf_reclaimed"); 430 } 431 } 432 433 /* 434 * If the buffer mechanism has a way to decide that a held buffer can be made 435 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 436 * returned if the buffer can be discarded, (0) is returned if it cannot. 437 */ 438 static int 439 bpf_canfreebuf(struct bpf_d *d) 440 { 441 442 BPFD_LOCK_ASSERT(d); 443 444 switch (d->bd_bufmode) { 445 case BPF_BUFMODE_ZBUF: 446 return (bpf_zerocopy_canfreebuf(d)); 447 } 448 return (0); 449 } 450 451 /* 452 * Allow the buffer model to indicate that the current store buffer is 453 * immutable, regardless of the appearance of space. Return (1) if the 454 * buffer is writable, and (0) if not. 455 */ 456 static int 457 bpf_canwritebuf(struct bpf_d *d) 458 { 459 BPFD_LOCK_ASSERT(d); 460 461 switch (d->bd_bufmode) { 462 case BPF_BUFMODE_ZBUF: 463 return (bpf_zerocopy_canwritebuf(d)); 464 } 465 return (1); 466 } 467 468 /* 469 * Notify buffer model that an attempt to write to the store buffer has 470 * resulted in a dropped packet, in which case the buffer may be considered 471 * full. 472 */ 473 static void 474 bpf_buffull(struct bpf_d *d) 475 { 476 477 BPFD_LOCK_ASSERT(d); 478 479 switch (d->bd_bufmode) { 480 case BPF_BUFMODE_ZBUF: 481 bpf_zerocopy_buffull(d); 482 break; 483 } 484 } 485 486 /* 487 * Notify the buffer model that a buffer has moved into the hold position. 488 */ 489 void 490 bpf_bufheld(struct bpf_d *d) 491 { 492 493 BPFD_LOCK_ASSERT(d); 494 495 switch (d->bd_bufmode) { 496 case BPF_BUFMODE_ZBUF: 497 bpf_zerocopy_bufheld(d); 498 break; 499 } 500 } 501 502 static void 503 bpf_free(struct bpf_d *d) 504 { 505 506 switch (d->bd_bufmode) { 507 case BPF_BUFMODE_BUFFER: 508 return (bpf_buffer_free(d)); 509 510 case BPF_BUFMODE_ZBUF: 511 return (bpf_zerocopy_free(d)); 512 513 default: 514 panic("bpf_buf_free"); 515 } 516 } 517 518 static int 519 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 520 { 521 522 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 523 return (EOPNOTSUPP); 524 return (bpf_buffer_uiomove(d, buf, len, uio)); 525 } 526 527 static int 528 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 529 { 530 531 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 532 return (EOPNOTSUPP); 533 return (bpf_buffer_ioctl_sblen(d, i)); 534 } 535 536 static int 537 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 538 { 539 540 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 541 return (EOPNOTSUPP); 542 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 543 } 544 545 static int 546 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 547 { 548 549 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 550 return (EOPNOTSUPP); 551 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 552 } 553 554 static int 555 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 556 { 557 558 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 559 return (EOPNOTSUPP); 560 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 561 } 562 563 /* 564 * General BPF functions. 565 */ 566 static int 567 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 568 struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) 569 { 570 const struct ieee80211_bpf_params *p; 571 struct ether_header *eh; 572 struct mbuf *m; 573 int error; 574 int len; 575 int hlen; 576 int slen; 577 578 /* 579 * Build a sockaddr based on the data link layer type. 580 * We do this at this level because the ethernet header 581 * is copied directly into the data field of the sockaddr. 582 * In the case of SLIP, there is no header and the packet 583 * is forwarded as is. 584 * Also, we are careful to leave room at the front of the mbuf 585 * for the link level header. 586 */ 587 switch (linktype) { 588 case DLT_SLIP: 589 sockp->sa_family = AF_INET; 590 hlen = 0; 591 break; 592 593 case DLT_EN10MB: 594 sockp->sa_family = AF_UNSPEC; 595 /* XXX Would MAXLINKHDR be better? */ 596 hlen = ETHER_HDR_LEN; 597 break; 598 599 case DLT_FDDI: 600 sockp->sa_family = AF_IMPLINK; 601 hlen = 0; 602 break; 603 604 case DLT_RAW: 605 sockp->sa_family = AF_UNSPEC; 606 hlen = 0; 607 break; 608 609 case DLT_NULL: 610 /* 611 * null interface types require a 4 byte pseudo header which 612 * corresponds to the address family of the packet. 613 */ 614 sockp->sa_family = AF_UNSPEC; 615 hlen = 4; 616 break; 617 618 case DLT_ATM_RFC1483: 619 /* 620 * en atm driver requires 4-byte atm pseudo header. 621 * though it isn't standard, vpi:vci needs to be 622 * specified anyway. 623 */ 624 sockp->sa_family = AF_UNSPEC; 625 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 626 break; 627 628 case DLT_PPP: 629 sockp->sa_family = AF_UNSPEC; 630 hlen = 4; /* This should match PPP_HDRLEN */ 631 break; 632 633 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 634 sockp->sa_family = AF_IEEE80211; 635 hlen = 0; 636 break; 637 638 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 639 sockp->sa_family = AF_IEEE80211; 640 sockp->sa_len = 12; /* XXX != 0 */ 641 hlen = sizeof(struct ieee80211_bpf_params); 642 break; 643 644 default: 645 return (EIO); 646 } 647 648 len = uio->uio_resid; 649 if (len < hlen || len - hlen > ifp->if_mtu) 650 return (EMSGSIZE); 651 652 /* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */ 653 m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR); 654 if (m == NULL) 655 return (EIO); 656 m->m_pkthdr.len = m->m_len = len; 657 *mp = m; 658 659 error = uiomove(mtod(m, u_char *), len, uio); 660 if (error) 661 goto bad; 662 663 slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); 664 if (slen == 0) { 665 error = EPERM; 666 goto bad; 667 } 668 669 /* Check for multicast destination */ 670 switch (linktype) { 671 case DLT_EN10MB: 672 eh = mtod(m, struct ether_header *); 673 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 674 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 675 ETHER_ADDR_LEN) == 0) 676 m->m_flags |= M_BCAST; 677 else 678 m->m_flags |= M_MCAST; 679 } 680 if (d->bd_hdrcmplt == 0) { 681 memcpy(eh->ether_shost, IF_LLADDR(ifp), 682 sizeof(eh->ether_shost)); 683 } 684 break; 685 } 686 687 /* 688 * Make room for link header, and copy it to sockaddr 689 */ 690 if (hlen != 0) { 691 if (sockp->sa_family == AF_IEEE80211) { 692 /* 693 * Collect true length from the parameter header 694 * NB: sockp is known to be zero'd so if we do a 695 * short copy unspecified parameters will be 696 * zero. 697 * NB: packet may not be aligned after stripping 698 * bpf params 699 * XXX check ibp_vers 700 */ 701 p = mtod(m, const struct ieee80211_bpf_params *); 702 hlen = p->ibp_len; 703 if (hlen > sizeof(sockp->sa_data)) { 704 error = EINVAL; 705 goto bad; 706 } 707 } 708 bcopy(mtod(m, const void *), sockp->sa_data, hlen); 709 } 710 *hdrlen = hlen; 711 712 return (0); 713 bad: 714 m_freem(m); 715 return (error); 716 } 717 718 /* 719 * Attach descriptor to the bpf interface, i.e. make d listen on bp, 720 * then reset its buffers and counters with reset_d(). 721 */ 722 static void 723 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 724 { 725 int op_w; 726 727 BPF_LOCK_ASSERT(); 728 729 /* 730 * Save sysctl value to protect from sysctl change 731 * between reads 732 */ 733 op_w = V_bpf_optimize_writers || d->bd_writer; 734 735 if (d->bd_bif != NULL) 736 bpf_detachd_locked(d, false); 737 /* 738 * Point d at bp, and add d to the interface's list. 739 * Since there are many applications using BPF for 740 * sending raw packets only (dhcpd, cdpd are good examples) 741 * we can delay adding d to the list of active listeners until 742 * some filter is configured. 743 */ 744 745 BPFD_LOCK(d); 746 /* 747 * Hold reference to bpif while descriptor uses this interface. 748 */ 749 bpfif_ref(bp); 750 d->bd_bif = bp; 751 if (op_w != 0) { 752 /* Add to writers-only list */ 753 CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); 754 /* 755 * We decrement bd_writer on every filter set operation. 756 * First BIOCSETF is done by pcap_open_live() to set up 757 * snap length. After that appliation usually sets its own 758 * filter. 759 */ 760 d->bd_writer = 2; 761 } else 762 CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 763 764 reset_d(d); 765 766 /* Trigger EVFILT_WRITE events. */ 767 bpf_wakeup(d); 768 769 BPFD_UNLOCK(d); 770 bpf_bpfd_cnt++; 771 772 CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", 773 __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); 774 775 if (op_w == 0) 776 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 777 } 778 779 /* 780 * Check if we need to upgrade our descriptor @d from write-only mode. 781 */ 782 static int 783 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, 784 int flen) 785 { 786 int is_snap, need_upgrade; 787 788 /* 789 * Check if we've already upgraded or new filter is empty. 790 */ 791 if (d->bd_writer == 0 || fcode == NULL) 792 return (0); 793 794 need_upgrade = 0; 795 796 /* 797 * Check if cmd looks like snaplen setting from 798 * pcap_bpf.c:pcap_open_live(). 799 * Note we're not checking .k value here: 800 * while pcap_open_live() definitely sets to non-zero value, 801 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. 802 * do not consider upgrading immediately 803 */ 804 if (cmd == BIOCSETF && flen == 1 && 805 fcode[0].code == (BPF_RET | BPF_K)) 806 is_snap = 1; 807 else 808 is_snap = 0; 809 810 if (is_snap == 0) { 811 /* 812 * We're setting first filter and it doesn't look like 813 * setting snaplen. We're probably using bpf directly. 814 * Upgrade immediately. 815 */ 816 need_upgrade = 1; 817 } else { 818 /* 819 * Do not require upgrade by first BIOCSETF 820 * (used to set snaplen) by pcap_open_live(). 821 */ 822 823 if (--d->bd_writer == 0) { 824 /* 825 * First snaplen filter has already 826 * been set. This is probably catch-all 827 * filter 828 */ 829 need_upgrade = 1; 830 } 831 } 832 833 CTR5(KTR_NET, 834 "%s: filter function set by pid %d, " 835 "bd_writer counter %d, snap %d upgrade %d", 836 __func__, d->bd_pid, d->bd_writer, 837 is_snap, need_upgrade); 838 839 return (need_upgrade); 840 } 841 842 /* 843 * Detach a file from its interface. 844 */ 845 static void 846 bpf_detachd(struct bpf_d *d) 847 { 848 BPF_LOCK(); 849 bpf_detachd_locked(d, false); 850 BPF_UNLOCK(); 851 } 852 853 static void 854 bpf_detachd_locked(struct bpf_d *d, bool detached_ifp) 855 { 856 struct bpf_if *bp; 857 struct ifnet *ifp; 858 int error; 859 860 BPF_LOCK_ASSERT(); 861 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); 862 863 /* Check if descriptor is attached */ 864 if ((bp = d->bd_bif) == NULL) 865 return; 866 867 BPFD_LOCK(d); 868 /* Remove d from the interface's descriptor list. */ 869 CK_LIST_REMOVE(d, bd_next); 870 /* Save bd_writer value */ 871 error = d->bd_writer; 872 ifp = bp->bif_ifp; 873 d->bd_bif = NULL; 874 if (detached_ifp) { 875 /* 876 * Notify descriptor as it's detached, so that any 877 * sleepers wake up and get ENXIO. 878 */ 879 bpf_wakeup(d); 880 } 881 BPFD_UNLOCK(d); 882 bpf_bpfd_cnt--; 883 884 /* Call event handler iff d is attached */ 885 if (error == 0) 886 EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); 887 888 /* 889 * Check if this descriptor had requested promiscuous mode. 890 * If so and ifnet is not detached, turn it off. 891 */ 892 if (d->bd_promisc && !detached_ifp) { 893 d->bd_promisc = 0; 894 CURVNET_SET(ifp->if_vnet); 895 error = ifpromisc(ifp, 0); 896 CURVNET_RESTORE(); 897 if (error != 0 && error != ENXIO) { 898 /* 899 * ENXIO can happen if a pccard is unplugged 900 * Something is really wrong if we were able to put 901 * the driver into promiscuous mode, but can't 902 * take it out. 903 */ 904 if_printf(bp->bif_ifp, 905 "bpf_detach: ifpromisc failed (%d)\n", error); 906 } 907 } 908 bpfif_rele(bp); 909 } 910 911 /* 912 * Close the descriptor by detaching it from its interface, 913 * deallocating its buffers, and marking it free. 914 */ 915 static void 916 bpf_dtor(void *data) 917 { 918 struct bpf_d *d = data; 919 920 BPFD_LOCK(d); 921 if (d->bd_state == BPF_WAITING) 922 callout_stop(&d->bd_callout); 923 d->bd_state = BPF_IDLE; 924 BPFD_UNLOCK(d); 925 funsetown(&d->bd_sigio); 926 bpf_detachd(d); 927 #ifdef MAC 928 mac_bpfdesc_destroy(d); 929 #endif /* MAC */ 930 seldrain(&d->bd_sel); 931 knlist_destroy(&d->bd_sel.si_note); 932 callout_drain(&d->bd_callout); 933 bpfd_rele(d); 934 } 935 936 /* 937 * Open ethernet device. Returns ENXIO for illegal minor device number, 938 * EBUSY if file is open by another process. 939 */ 940 /* ARGSUSED */ 941 static int 942 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 943 { 944 struct bpf_d *d; 945 int error; 946 947 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 948 error = devfs_set_cdevpriv(d, bpf_dtor); 949 if (error != 0) { 950 free(d, M_BPF); 951 return (error); 952 } 953 954 /* Setup counters */ 955 d->bd_rcount = counter_u64_alloc(M_WAITOK); 956 d->bd_dcount = counter_u64_alloc(M_WAITOK); 957 d->bd_fcount = counter_u64_alloc(M_WAITOK); 958 d->bd_wcount = counter_u64_alloc(M_WAITOK); 959 d->bd_wfcount = counter_u64_alloc(M_WAITOK); 960 d->bd_wdcount = counter_u64_alloc(M_WAITOK); 961 d->bd_zcopy = counter_u64_alloc(M_WAITOK); 962 963 /* 964 * For historical reasons, perform a one-time initialization call to 965 * the buffer routines, even though we're not yet committed to a 966 * particular buffer method. 967 */ 968 bpf_buffer_init(d); 969 if ((flags & FREAD) == 0) 970 d->bd_writer = 2; 971 d->bd_hbuf_in_use = 0; 972 d->bd_bufmode = BPF_BUFMODE_BUFFER; 973 d->bd_sig = SIGIO; 974 d->bd_direction = BPF_D_INOUT; 975 refcount_init(&d->bd_refcnt, 1); 976 BPF_PID_REFRESH(d, td); 977 #ifdef MAC 978 mac_bpfdesc_init(d); 979 mac_bpfdesc_create(td->td_ucred, d); 980 #endif 981 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); 982 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); 983 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); 984 985 /* Disable VLAN pcp tagging. */ 986 d->bd_pcp = 0; 987 988 return (0); 989 } 990 991 /* 992 * bpfread - read next chunk of packets from buffers 993 */ 994 static int 995 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 996 { 997 struct bpf_d *d; 998 int error; 999 int non_block; 1000 int timed_out; 1001 1002 error = devfs_get_cdevpriv((void **)&d); 1003 if (error != 0) 1004 return (error); 1005 1006 /* 1007 * Restrict application to use a buffer the same size as 1008 * as kernel buffers. 1009 */ 1010 if (uio->uio_resid != d->bd_bufsize) 1011 return (EINVAL); 1012 1013 non_block = ((ioflag & O_NONBLOCK) != 0); 1014 1015 BPFD_LOCK(d); 1016 BPF_PID_REFRESH_CUR(d); 1017 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 1018 BPFD_UNLOCK(d); 1019 return (EOPNOTSUPP); 1020 } 1021 if (d->bd_state == BPF_WAITING) 1022 callout_stop(&d->bd_callout); 1023 timed_out = (d->bd_state == BPF_TIMED_OUT); 1024 d->bd_state = BPF_IDLE; 1025 while (d->bd_hbuf_in_use) { 1026 error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1027 PRINET | PCATCH, "bd_hbuf", 0); 1028 if (error != 0) { 1029 BPFD_UNLOCK(d); 1030 return (error); 1031 } 1032 } 1033 /* 1034 * If the hold buffer is empty, then do a timed sleep, which 1035 * ends when the timeout expires or when enough packets 1036 * have arrived to fill the store buffer. 1037 */ 1038 while (d->bd_hbuf == NULL) { 1039 if (d->bd_slen != 0) { 1040 /* 1041 * A packet(s) either arrived since the previous 1042 * read or arrived while we were asleep. 1043 */ 1044 if (d->bd_immediate || non_block || timed_out) { 1045 /* 1046 * Rotate the buffers and return what's here 1047 * if we are in immediate mode, non-blocking 1048 * flag is set, or this descriptor timed out. 1049 */ 1050 ROTATE_BUFFERS(d); 1051 break; 1052 } 1053 } 1054 1055 /* 1056 * No data is available, check to see if the bpf device 1057 * is still pointed at a real interface. If not, return 1058 * ENXIO so that the userland process knows to rebind 1059 * it before using it again. 1060 */ 1061 if (d->bd_bif == NULL) { 1062 BPFD_UNLOCK(d); 1063 return (ENXIO); 1064 } 1065 1066 if (non_block) { 1067 BPFD_UNLOCK(d); 1068 return (EWOULDBLOCK); 1069 } 1070 error = msleep(d, &d->bd_lock, PRINET | PCATCH, 1071 "bpf", d->bd_rtout); 1072 if (error == EINTR || error == ERESTART) { 1073 BPFD_UNLOCK(d); 1074 return (error); 1075 } 1076 if (error == EWOULDBLOCK) { 1077 /* 1078 * On a timeout, return what's in the buffer, 1079 * which may be nothing. If there is something 1080 * in the store buffer, we can rotate the buffers. 1081 */ 1082 if (d->bd_hbuf) 1083 /* 1084 * We filled up the buffer in between 1085 * getting the timeout and arriving 1086 * here, so we don't need to rotate. 1087 */ 1088 break; 1089 1090 if (d->bd_slen == 0) { 1091 BPFD_UNLOCK(d); 1092 return (0); 1093 } 1094 ROTATE_BUFFERS(d); 1095 break; 1096 } 1097 } 1098 /* 1099 * At this point, we know we have something in the hold slot. 1100 */ 1101 d->bd_hbuf_in_use = 1; 1102 BPFD_UNLOCK(d); 1103 1104 /* 1105 * Move data from hold buffer into user space. 1106 * We know the entire buffer is transferred since 1107 * we checked above that the read buffer is bpf_bufsize bytes. 1108 * 1109 * We do not have to worry about simultaneous reads because 1110 * we waited for sole access to the hold buffer above. 1111 */ 1112 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 1113 1114 BPFD_LOCK(d); 1115 if (d->bd_hbuf_in_use) { 1116 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); 1117 d->bd_fbuf = d->bd_hbuf; 1118 d->bd_hbuf = NULL; 1119 d->bd_hlen = 0; 1120 bpf_buf_reclaimed(d); 1121 d->bd_hbuf_in_use = 0; 1122 wakeup(&d->bd_hbuf_in_use); 1123 } 1124 BPFD_UNLOCK(d); 1125 1126 return (error); 1127 } 1128 1129 /* 1130 * If there are processes sleeping on this descriptor, wake them up. 1131 */ 1132 static __inline void 1133 bpf_wakeup(struct bpf_d *d) 1134 { 1135 1136 BPFD_LOCK_ASSERT(d); 1137 if (d->bd_state == BPF_WAITING) { 1138 callout_stop(&d->bd_callout); 1139 d->bd_state = BPF_IDLE; 1140 } 1141 wakeup(d); 1142 if (d->bd_async && d->bd_sig && d->bd_sigio) 1143 pgsigio(&d->bd_sigio, d->bd_sig, 0); 1144 1145 selwakeuppri(&d->bd_sel, PRINET); 1146 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 1147 } 1148 1149 static void 1150 bpf_timed_out(void *arg) 1151 { 1152 struct bpf_d *d = (struct bpf_d *)arg; 1153 1154 BPFD_LOCK_ASSERT(d); 1155 1156 if (callout_pending(&d->bd_callout) || 1157 !callout_active(&d->bd_callout)) 1158 return; 1159 if (d->bd_state == BPF_WAITING) { 1160 d->bd_state = BPF_TIMED_OUT; 1161 if (d->bd_slen != 0) 1162 bpf_wakeup(d); 1163 } 1164 } 1165 1166 static int 1167 bpf_ready(struct bpf_d *d) 1168 { 1169 1170 BPFD_LOCK_ASSERT(d); 1171 1172 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 1173 return (1); 1174 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1175 d->bd_slen != 0) 1176 return (1); 1177 return (0); 1178 } 1179 1180 static int 1181 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 1182 { 1183 struct route ro; 1184 struct sockaddr dst; 1185 struct epoch_tracker et; 1186 struct bpf_if *bp; 1187 struct bpf_d *d; 1188 struct ifnet *ifp; 1189 struct mbuf *m, *mc; 1190 int error, hlen; 1191 1192 error = devfs_get_cdevpriv((void **)&d); 1193 if (error != 0) 1194 return (error); 1195 1196 NET_EPOCH_ENTER(et); 1197 BPFD_LOCK(d); 1198 BPF_PID_REFRESH_CUR(d); 1199 counter_u64_add(d->bd_wcount, 1); 1200 if ((bp = d->bd_bif) == NULL) { 1201 error = ENXIO; 1202 goto out_locked; 1203 } 1204 1205 ifp = bp->bif_ifp; 1206 if ((ifp->if_flags & IFF_UP) == 0) { 1207 error = ENETDOWN; 1208 goto out_locked; 1209 } 1210 1211 if (uio->uio_resid == 0) 1212 goto out_locked; 1213 1214 bzero(&dst, sizeof(dst)); 1215 m = NULL; 1216 hlen = 0; 1217 1218 /* 1219 * Take extra reference, unlock d and exit from epoch section, 1220 * since bpf_movein() can sleep. 1221 */ 1222 bpfd_ref(d); 1223 NET_EPOCH_EXIT(et); 1224 BPFD_UNLOCK(d); 1225 1226 error = bpf_movein(uio, (int)bp->bif_dlt, ifp, 1227 &m, &dst, &hlen, d); 1228 1229 if (error != 0) { 1230 counter_u64_add(d->bd_wdcount, 1); 1231 bpfd_rele(d); 1232 return (error); 1233 } 1234 1235 BPFD_LOCK(d); 1236 /* 1237 * Check that descriptor is still attached to the interface. 1238 * This can happen on bpfdetach(). To avoid access to detached 1239 * ifnet, free mbuf and return ENXIO. 1240 */ 1241 if (d->bd_bif == NULL) { 1242 counter_u64_add(d->bd_wdcount, 1); 1243 BPFD_UNLOCK(d); 1244 bpfd_rele(d); 1245 m_freem(m); 1246 return (ENXIO); 1247 } 1248 counter_u64_add(d->bd_wfcount, 1); 1249 if (d->bd_hdrcmplt) 1250 dst.sa_family = pseudo_AF_HDRCMPLT; 1251 1252 if (d->bd_feedback) { 1253 mc = m_dup(m, M_NOWAIT); 1254 if (mc != NULL) 1255 mc->m_pkthdr.rcvif = ifp; 1256 /* Set M_PROMISC for outgoing packets to be discarded. */ 1257 if (d->bd_direction == BPF_D_INOUT) 1258 m->m_flags |= M_PROMISC; 1259 } else 1260 mc = NULL; 1261 1262 m->m_pkthdr.len -= hlen; 1263 m->m_len -= hlen; 1264 m->m_data += hlen; /* XXX */ 1265 1266 CURVNET_SET(ifp->if_vnet); 1267 #ifdef MAC 1268 mac_bpfdesc_create_mbuf(d, m); 1269 if (mc != NULL) 1270 mac_bpfdesc_create_mbuf(d, mc); 1271 #endif 1272 1273 bzero(&ro, sizeof(ro)); 1274 if (hlen != 0) { 1275 ro.ro_prepend = (u_char *)&dst.sa_data; 1276 ro.ro_plen = hlen; 1277 ro.ro_flags = RT_HAS_HEADER; 1278 } 1279 1280 if (d->bd_pcp != 0) 1281 vlan_set_pcp(m, d->bd_pcp); 1282 1283 /* Avoid possible recursion on BPFD_LOCK(). */ 1284 NET_EPOCH_ENTER(et); 1285 BPFD_UNLOCK(d); 1286 error = (*ifp->if_output)(ifp, m, &dst, &ro); 1287 if (error) 1288 counter_u64_add(d->bd_wdcount, 1); 1289 1290 if (mc != NULL) { 1291 if (error == 0) 1292 (*ifp->if_input)(ifp, mc); 1293 else 1294 m_freem(mc); 1295 } 1296 NET_EPOCH_EXIT(et); 1297 CURVNET_RESTORE(); 1298 bpfd_rele(d); 1299 return (error); 1300 1301 out_locked: 1302 counter_u64_add(d->bd_wdcount, 1); 1303 NET_EPOCH_EXIT(et); 1304 BPFD_UNLOCK(d); 1305 return (error); 1306 } 1307 1308 /* 1309 * Reset a descriptor by flushing its packet buffer and clearing the receive 1310 * and drop counts. This is doable for kernel-only buffers, but with 1311 * zero-copy buffers, we can't write to (or rotate) buffers that are 1312 * currently owned by userspace. It would be nice if we could encapsulate 1313 * this logic in the buffer code rather than here. 1314 */ 1315 static void 1316 reset_d(struct bpf_d *d) 1317 { 1318 1319 BPFD_LOCK_ASSERT(d); 1320 1321 while (d->bd_hbuf_in_use) 1322 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, 1323 "bd_hbuf", 0); 1324 if ((d->bd_hbuf != NULL) && 1325 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { 1326 /* Free the hold buffer. */ 1327 d->bd_fbuf = d->bd_hbuf; 1328 d->bd_hbuf = NULL; 1329 d->bd_hlen = 0; 1330 bpf_buf_reclaimed(d); 1331 } 1332 if (bpf_canwritebuf(d)) 1333 d->bd_slen = 0; 1334 counter_u64_zero(d->bd_rcount); 1335 counter_u64_zero(d->bd_dcount); 1336 counter_u64_zero(d->bd_fcount); 1337 counter_u64_zero(d->bd_wcount); 1338 counter_u64_zero(d->bd_wfcount); 1339 counter_u64_zero(d->bd_wdcount); 1340 counter_u64_zero(d->bd_zcopy); 1341 } 1342 1343 /* 1344 * FIONREAD Check for read packet available. 1345 * BIOCGBLEN Get buffer len [for read()]. 1346 * BIOCSETF Set read filter. 1347 * BIOCSETFNR Set read filter without resetting descriptor. 1348 * BIOCSETWF Set write filter. 1349 * BIOCFLUSH Flush read packet buffer. 1350 * BIOCPROMISC Put interface into promiscuous mode. 1351 * BIOCGDLT Get link layer type. 1352 * BIOCGETIF Get interface name. 1353 * BIOCSETIF Set interface. 1354 * BIOCSRTIMEOUT Set read timeout. 1355 * BIOCGRTIMEOUT Get read timeout. 1356 * BIOCGSTATS Get packet stats. 1357 * BIOCIMMEDIATE Set immediate mode. 1358 * BIOCVERSION Get filter language version. 1359 * BIOCGHDRCMPLT Get "header already complete" flag 1360 * BIOCSHDRCMPLT Set "header already complete" flag 1361 * BIOCGDIRECTION Get packet direction flag 1362 * BIOCSDIRECTION Set packet direction flag 1363 * BIOCGTSTAMP Get time stamp format and resolution. 1364 * BIOCSTSTAMP Set time stamp format and resolution. 1365 * BIOCLOCK Set "locked" flag 1366 * BIOCFEEDBACK Set packet feedback mode. 1367 * BIOCSETZBUF Set current zero-copy buffer locations. 1368 * BIOCGETZMAX Get maximum zero-copy buffer size. 1369 * BIOCROTZBUF Force rotation of zero-copy buffer 1370 * BIOCSETBUFMODE Set buffer mode. 1371 * BIOCGETBUFMODE Get current buffer mode. 1372 * BIOCSETVLANPCP Set VLAN PCP tag. 1373 */ 1374 /* ARGSUSED */ 1375 static int 1376 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1377 struct thread *td) 1378 { 1379 struct bpf_d *d; 1380 int error; 1381 1382 error = devfs_get_cdevpriv((void **)&d); 1383 if (error != 0) 1384 return (error); 1385 1386 /* 1387 * Refresh PID associated with this descriptor. 1388 */ 1389 BPFD_LOCK(d); 1390 BPF_PID_REFRESH(d, td); 1391 if (d->bd_state == BPF_WAITING) 1392 callout_stop(&d->bd_callout); 1393 d->bd_state = BPF_IDLE; 1394 BPFD_UNLOCK(d); 1395 1396 if (d->bd_locked == 1) { 1397 switch (cmd) { 1398 case BIOCGBLEN: 1399 case BIOCFLUSH: 1400 case BIOCGDLT: 1401 case BIOCGDLTLIST: 1402 #ifdef COMPAT_FREEBSD32 1403 case BIOCGDLTLIST32: 1404 #endif 1405 case BIOCGETIF: 1406 case BIOCGRTIMEOUT: 1407 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1408 case BIOCGRTIMEOUT32: 1409 #endif 1410 case BIOCGSTATS: 1411 case BIOCVERSION: 1412 case BIOCGRSIG: 1413 case BIOCGHDRCMPLT: 1414 case BIOCSTSTAMP: 1415 case BIOCFEEDBACK: 1416 case FIONREAD: 1417 case BIOCLOCK: 1418 case BIOCSRTIMEOUT: 1419 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1420 case BIOCSRTIMEOUT32: 1421 #endif 1422 case BIOCIMMEDIATE: 1423 case TIOCGPGRP: 1424 case BIOCROTZBUF: 1425 break; 1426 default: 1427 return (EPERM); 1428 } 1429 } 1430 #ifdef COMPAT_FREEBSD32 1431 /* 1432 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so 1433 * that it will get 32-bit packet headers. 1434 */ 1435 switch (cmd) { 1436 case BIOCSETF32: 1437 case BIOCSETFNR32: 1438 case BIOCSETWF32: 1439 case BIOCGDLTLIST32: 1440 case BIOCGRTIMEOUT32: 1441 case BIOCSRTIMEOUT32: 1442 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 1443 BPFD_LOCK(d); 1444 d->bd_compat32 = 1; 1445 BPFD_UNLOCK(d); 1446 } 1447 } 1448 #endif 1449 1450 CURVNET_SET(TD_TO_VNET(td)); 1451 switch (cmd) { 1452 default: 1453 error = EINVAL; 1454 break; 1455 1456 /* 1457 * Check for read packet available. 1458 */ 1459 case FIONREAD: 1460 { 1461 int n; 1462 1463 BPFD_LOCK(d); 1464 n = d->bd_slen; 1465 while (d->bd_hbuf_in_use) 1466 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1467 PRINET, "bd_hbuf", 0); 1468 if (d->bd_hbuf) 1469 n += d->bd_hlen; 1470 BPFD_UNLOCK(d); 1471 1472 *(int *)addr = n; 1473 break; 1474 } 1475 1476 /* 1477 * Get buffer len [for read()]. 1478 */ 1479 case BIOCGBLEN: 1480 BPFD_LOCK(d); 1481 *(u_int *)addr = d->bd_bufsize; 1482 BPFD_UNLOCK(d); 1483 break; 1484 1485 /* 1486 * Set buffer length. 1487 */ 1488 case BIOCSBLEN: 1489 error = bpf_ioctl_sblen(d, (u_int *)addr); 1490 break; 1491 1492 /* 1493 * Set link layer read filter. 1494 */ 1495 case BIOCSETF: 1496 case BIOCSETFNR: 1497 case BIOCSETWF: 1498 #ifdef COMPAT_FREEBSD32 1499 case BIOCSETF32: 1500 case BIOCSETFNR32: 1501 case BIOCSETWF32: 1502 #endif 1503 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1504 break; 1505 1506 /* 1507 * Flush read packet buffer. 1508 */ 1509 case BIOCFLUSH: 1510 BPFD_LOCK(d); 1511 reset_d(d); 1512 BPFD_UNLOCK(d); 1513 break; 1514 1515 /* 1516 * Put interface into promiscuous mode. 1517 */ 1518 case BIOCPROMISC: 1519 BPF_LOCK(); 1520 if (d->bd_bif == NULL) { 1521 /* 1522 * No interface attached yet. 1523 */ 1524 error = EINVAL; 1525 } else if (d->bd_promisc == 0) { 1526 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1527 if (error == 0) 1528 d->bd_promisc = 1; 1529 } 1530 BPF_UNLOCK(); 1531 break; 1532 1533 /* 1534 * Get current data link type. 1535 */ 1536 case BIOCGDLT: 1537 BPF_LOCK(); 1538 if (d->bd_bif == NULL) 1539 error = EINVAL; 1540 else 1541 *(u_int *)addr = d->bd_bif->bif_dlt; 1542 BPF_UNLOCK(); 1543 break; 1544 1545 /* 1546 * Get a list of supported data link types. 1547 */ 1548 #ifdef COMPAT_FREEBSD32 1549 case BIOCGDLTLIST32: 1550 { 1551 struct bpf_dltlist32 *list32; 1552 struct bpf_dltlist dltlist; 1553 1554 list32 = (struct bpf_dltlist32 *)addr; 1555 dltlist.bfl_len = list32->bfl_len; 1556 dltlist.bfl_list = PTRIN(list32->bfl_list); 1557 BPF_LOCK(); 1558 if (d->bd_bif == NULL) 1559 error = EINVAL; 1560 else { 1561 error = bpf_getdltlist(d, &dltlist); 1562 if (error == 0) 1563 list32->bfl_len = dltlist.bfl_len; 1564 } 1565 BPF_UNLOCK(); 1566 break; 1567 } 1568 #endif 1569 1570 case BIOCGDLTLIST: 1571 BPF_LOCK(); 1572 if (d->bd_bif == NULL) 1573 error = EINVAL; 1574 else 1575 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1576 BPF_UNLOCK(); 1577 break; 1578 1579 /* 1580 * Set data link type. 1581 */ 1582 case BIOCSDLT: 1583 BPF_LOCK(); 1584 if (d->bd_bif == NULL) 1585 error = EINVAL; 1586 else 1587 error = bpf_setdlt(d, *(u_int *)addr); 1588 BPF_UNLOCK(); 1589 break; 1590 1591 /* 1592 * Get interface name. 1593 */ 1594 case BIOCGETIF: 1595 BPF_LOCK(); 1596 if (d->bd_bif == NULL) 1597 error = EINVAL; 1598 else { 1599 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1600 struct ifreq *const ifr = (struct ifreq *)addr; 1601 1602 strlcpy(ifr->ifr_name, ifp->if_xname, 1603 sizeof(ifr->ifr_name)); 1604 } 1605 BPF_UNLOCK(); 1606 break; 1607 1608 /* 1609 * Set interface. 1610 */ 1611 case BIOCSETIF: 1612 { 1613 int alloc_buf, size; 1614 1615 /* 1616 * Behavior here depends on the buffering model. If 1617 * we're using kernel memory buffers, then we can 1618 * allocate them here. If we're using zero-copy, 1619 * then the user process must have registered buffers 1620 * by the time we get here. 1621 */ 1622 alloc_buf = 0; 1623 BPFD_LOCK(d); 1624 if (d->bd_bufmode == BPF_BUFMODE_BUFFER && 1625 d->bd_sbuf == NULL) 1626 alloc_buf = 1; 1627 BPFD_UNLOCK(d); 1628 if (alloc_buf) { 1629 size = d->bd_bufsize; 1630 error = bpf_buffer_ioctl_sblen(d, &size); 1631 if (error != 0) 1632 break; 1633 } 1634 BPF_LOCK(); 1635 error = bpf_setif(d, (struct ifreq *)addr); 1636 BPF_UNLOCK(); 1637 break; 1638 } 1639 1640 /* 1641 * Set read timeout. 1642 */ 1643 case BIOCSRTIMEOUT: 1644 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1645 case BIOCSRTIMEOUT32: 1646 #endif 1647 { 1648 struct timeval *tv = (struct timeval *)addr; 1649 #if defined(COMPAT_FREEBSD32) 1650 struct timeval32 *tv32; 1651 struct timeval tv64; 1652 1653 if (cmd == BIOCSRTIMEOUT32) { 1654 tv32 = (struct timeval32 *)addr; 1655 tv = &tv64; 1656 tv->tv_sec = tv32->tv_sec; 1657 tv->tv_usec = tv32->tv_usec; 1658 } else 1659 #endif 1660 tv = (struct timeval *)addr; 1661 1662 /* 1663 * Subtract 1 tick from tvtohz() since this isn't 1664 * a one-shot timer. 1665 */ 1666 if ((error = itimerfix(tv)) == 0) 1667 d->bd_rtout = tvtohz(tv) - 1; 1668 break; 1669 } 1670 1671 /* 1672 * Get read timeout. 1673 */ 1674 case BIOCGRTIMEOUT: 1675 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1676 case BIOCGRTIMEOUT32: 1677 #endif 1678 { 1679 struct timeval *tv; 1680 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1681 struct timeval32 *tv32; 1682 struct timeval tv64; 1683 1684 if (cmd == BIOCGRTIMEOUT32) 1685 tv = &tv64; 1686 else 1687 #endif 1688 tv = (struct timeval *)addr; 1689 1690 tv->tv_sec = d->bd_rtout / hz; 1691 tv->tv_usec = (d->bd_rtout % hz) * tick; 1692 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1693 if (cmd == BIOCGRTIMEOUT32) { 1694 tv32 = (struct timeval32 *)addr; 1695 tv32->tv_sec = tv->tv_sec; 1696 tv32->tv_usec = tv->tv_usec; 1697 } 1698 #endif 1699 1700 break; 1701 } 1702 1703 /* 1704 * Get packet stats. 1705 */ 1706 case BIOCGSTATS: 1707 { 1708 struct bpf_stat *bs = (struct bpf_stat *)addr; 1709 1710 /* XXXCSJP overflow */ 1711 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); 1712 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); 1713 break; 1714 } 1715 1716 /* 1717 * Set immediate mode. 1718 */ 1719 case BIOCIMMEDIATE: 1720 BPFD_LOCK(d); 1721 d->bd_immediate = *(u_int *)addr; 1722 BPFD_UNLOCK(d); 1723 break; 1724 1725 case BIOCVERSION: 1726 { 1727 struct bpf_version *bv = (struct bpf_version *)addr; 1728 1729 bv->bv_major = BPF_MAJOR_VERSION; 1730 bv->bv_minor = BPF_MINOR_VERSION; 1731 break; 1732 } 1733 1734 /* 1735 * Get "header already complete" flag 1736 */ 1737 case BIOCGHDRCMPLT: 1738 BPFD_LOCK(d); 1739 *(u_int *)addr = d->bd_hdrcmplt; 1740 BPFD_UNLOCK(d); 1741 break; 1742 1743 /* 1744 * Set "header already complete" flag 1745 */ 1746 case BIOCSHDRCMPLT: 1747 BPFD_LOCK(d); 1748 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1749 BPFD_UNLOCK(d); 1750 break; 1751 1752 /* 1753 * Get packet direction flag 1754 */ 1755 case BIOCGDIRECTION: 1756 BPFD_LOCK(d); 1757 *(u_int *)addr = d->bd_direction; 1758 BPFD_UNLOCK(d); 1759 break; 1760 1761 /* 1762 * Set packet direction flag 1763 */ 1764 case BIOCSDIRECTION: 1765 { 1766 u_int direction; 1767 1768 direction = *(u_int *)addr; 1769 switch (direction) { 1770 case BPF_D_IN: 1771 case BPF_D_INOUT: 1772 case BPF_D_OUT: 1773 BPFD_LOCK(d); 1774 d->bd_direction = direction; 1775 BPFD_UNLOCK(d); 1776 break; 1777 default: 1778 error = EINVAL; 1779 } 1780 } 1781 break; 1782 1783 /* 1784 * Get packet timestamp format and resolution. 1785 */ 1786 case BIOCGTSTAMP: 1787 BPFD_LOCK(d); 1788 *(u_int *)addr = d->bd_tstamp; 1789 BPFD_UNLOCK(d); 1790 break; 1791 1792 /* 1793 * Set packet timestamp format and resolution. 1794 */ 1795 case BIOCSTSTAMP: 1796 { 1797 u_int func; 1798 1799 func = *(u_int *)addr; 1800 if (BPF_T_VALID(func)) 1801 d->bd_tstamp = func; 1802 else 1803 error = EINVAL; 1804 } 1805 break; 1806 1807 case BIOCFEEDBACK: 1808 BPFD_LOCK(d); 1809 d->bd_feedback = *(u_int *)addr; 1810 BPFD_UNLOCK(d); 1811 break; 1812 1813 case BIOCLOCK: 1814 BPFD_LOCK(d); 1815 d->bd_locked = 1; 1816 BPFD_UNLOCK(d); 1817 break; 1818 1819 case FIONBIO: /* Non-blocking I/O */ 1820 break; 1821 1822 case FIOASYNC: /* Send signal on receive packets */ 1823 BPFD_LOCK(d); 1824 d->bd_async = *(int *)addr; 1825 BPFD_UNLOCK(d); 1826 break; 1827 1828 case FIOSETOWN: 1829 /* 1830 * XXX: Add some sort of locking here? 1831 * fsetown() can sleep. 1832 */ 1833 error = fsetown(*(int *)addr, &d->bd_sigio); 1834 break; 1835 1836 case FIOGETOWN: 1837 BPFD_LOCK(d); 1838 *(int *)addr = fgetown(&d->bd_sigio); 1839 BPFD_UNLOCK(d); 1840 break; 1841 1842 /* This is deprecated, FIOSETOWN should be used instead. */ 1843 case TIOCSPGRP: 1844 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1845 break; 1846 1847 /* This is deprecated, FIOGETOWN should be used instead. */ 1848 case TIOCGPGRP: 1849 *(int *)addr = -fgetown(&d->bd_sigio); 1850 break; 1851 1852 case BIOCSRSIG: /* Set receive signal */ 1853 { 1854 u_int sig; 1855 1856 sig = *(u_int *)addr; 1857 1858 if (sig >= NSIG) 1859 error = EINVAL; 1860 else { 1861 BPFD_LOCK(d); 1862 d->bd_sig = sig; 1863 BPFD_UNLOCK(d); 1864 } 1865 break; 1866 } 1867 case BIOCGRSIG: 1868 BPFD_LOCK(d); 1869 *(u_int *)addr = d->bd_sig; 1870 BPFD_UNLOCK(d); 1871 break; 1872 1873 case BIOCGETBUFMODE: 1874 BPFD_LOCK(d); 1875 *(u_int *)addr = d->bd_bufmode; 1876 BPFD_UNLOCK(d); 1877 break; 1878 1879 case BIOCSETBUFMODE: 1880 /* 1881 * Allow the buffering mode to be changed as long as we 1882 * haven't yet committed to a particular mode. Our 1883 * definition of commitment, for now, is whether or not a 1884 * buffer has been allocated or an interface attached, since 1885 * that's the point where things get tricky. 1886 */ 1887 switch (*(u_int *)addr) { 1888 case BPF_BUFMODE_BUFFER: 1889 break; 1890 1891 case BPF_BUFMODE_ZBUF: 1892 if (bpf_zerocopy_enable) 1893 break; 1894 /* FALLSTHROUGH */ 1895 1896 default: 1897 CURVNET_RESTORE(); 1898 return (EINVAL); 1899 } 1900 1901 BPFD_LOCK(d); 1902 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1903 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1904 BPFD_UNLOCK(d); 1905 CURVNET_RESTORE(); 1906 return (EBUSY); 1907 } 1908 d->bd_bufmode = *(u_int *)addr; 1909 BPFD_UNLOCK(d); 1910 break; 1911 1912 case BIOCGETZMAX: 1913 error = bpf_ioctl_getzmax(td, d, (size_t *)addr); 1914 break; 1915 1916 case BIOCSETZBUF: 1917 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); 1918 break; 1919 1920 case BIOCROTZBUF: 1921 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); 1922 break; 1923 1924 case BIOCSETVLANPCP: 1925 { 1926 u_int pcp; 1927 1928 pcp = *(u_int *)addr; 1929 if (pcp > BPF_PRIO_MAX || pcp < 0) { 1930 error = EINVAL; 1931 break; 1932 } 1933 d->bd_pcp = pcp; 1934 break; 1935 } 1936 } 1937 CURVNET_RESTORE(); 1938 return (error); 1939 } 1940 1941 /* 1942 * Set d's packet filter program to fp. If this file already has a filter, 1943 * free it and replace it. Returns EINVAL for bogus requests. 1944 * 1945 * Note we use global lock here to serialize bpf_setf() and bpf_setif() 1946 * calls. 1947 */ 1948 static int 1949 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1950 { 1951 #ifdef COMPAT_FREEBSD32 1952 struct bpf_program fp_swab; 1953 struct bpf_program32 *fp32; 1954 #endif 1955 struct bpf_program_buffer *fcode; 1956 struct bpf_insn *filter; 1957 #ifdef BPF_JITTER 1958 bpf_jit_filter *jfunc; 1959 #endif 1960 size_t size; 1961 u_int flen; 1962 bool track_event; 1963 1964 #ifdef COMPAT_FREEBSD32 1965 switch (cmd) { 1966 case BIOCSETF32: 1967 case BIOCSETWF32: 1968 case BIOCSETFNR32: 1969 fp32 = (struct bpf_program32 *)fp; 1970 fp_swab.bf_len = fp32->bf_len; 1971 fp_swab.bf_insns = 1972 (struct bpf_insn *)(uintptr_t)fp32->bf_insns; 1973 fp = &fp_swab; 1974 switch (cmd) { 1975 case BIOCSETF32: 1976 cmd = BIOCSETF; 1977 break; 1978 case BIOCSETWF32: 1979 cmd = BIOCSETWF; 1980 break; 1981 } 1982 break; 1983 } 1984 #endif 1985 1986 filter = NULL; 1987 #ifdef BPF_JITTER 1988 jfunc = NULL; 1989 #endif 1990 /* 1991 * Check new filter validness before acquiring any locks. 1992 * Allocate memory for new filter, if needed. 1993 */ 1994 flen = fp->bf_len; 1995 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) 1996 return (EINVAL); 1997 size = flen * sizeof(*fp->bf_insns); 1998 if (size > 0) { 1999 /* We're setting up new filter. Copy and check actual data. */ 2000 fcode = bpf_program_buffer_alloc(size, M_WAITOK); 2001 filter = (struct bpf_insn *)fcode->buffer; 2002 if (copyin(fp->bf_insns, filter, size) != 0 || 2003 !bpf_validate(filter, flen)) { 2004 free(fcode, M_BPF); 2005 return (EINVAL); 2006 } 2007 #ifdef BPF_JITTER 2008 if (cmd != BIOCSETWF) { 2009 /* 2010 * Filter is copied inside fcode and is 2011 * perfectly valid. 2012 */ 2013 jfunc = bpf_jitter(filter, flen); 2014 } 2015 #endif 2016 } 2017 2018 track_event = false; 2019 fcode = NULL; 2020 2021 BPF_LOCK(); 2022 BPFD_LOCK(d); 2023 /* Set up new filter. */ 2024 if (cmd == BIOCSETWF) { 2025 if (d->bd_wfilter != NULL) { 2026 fcode = __containerof((void *)d->bd_wfilter, 2027 struct bpf_program_buffer, buffer); 2028 #ifdef BPF_JITTER 2029 fcode->func = NULL; 2030 #endif 2031 } 2032 d->bd_wfilter = filter; 2033 } else { 2034 if (d->bd_rfilter != NULL) { 2035 fcode = __containerof((void *)d->bd_rfilter, 2036 struct bpf_program_buffer, buffer); 2037 #ifdef BPF_JITTER 2038 fcode->func = d->bd_bfilter; 2039 #endif 2040 } 2041 d->bd_rfilter = filter; 2042 #ifdef BPF_JITTER 2043 d->bd_bfilter = jfunc; 2044 #endif 2045 if (cmd == BIOCSETF) 2046 reset_d(d); 2047 2048 if (bpf_check_upgrade(cmd, d, filter, flen) != 0) { 2049 /* 2050 * Filter can be set several times without 2051 * specifying interface. In this case just mark d 2052 * as reader. 2053 */ 2054 d->bd_writer = 0; 2055 if (d->bd_bif != NULL) { 2056 /* 2057 * Remove descriptor from writers-only list 2058 * and add it to active readers list. 2059 */ 2060 CK_LIST_REMOVE(d, bd_next); 2061 CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist, 2062 d, bd_next); 2063 CTR2(KTR_NET, 2064 "%s: upgrade required by pid %d", 2065 __func__, d->bd_pid); 2066 track_event = true; 2067 } 2068 } 2069 } 2070 BPFD_UNLOCK(d); 2071 2072 if (fcode != NULL) 2073 NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx); 2074 2075 if (track_event) 2076 EVENTHANDLER_INVOKE(bpf_track, 2077 d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1); 2078 2079 BPF_UNLOCK(); 2080 return (0); 2081 } 2082 2083 /* 2084 * Detach a file from its current interface (if attached at all) and attach 2085 * to the interface indicated by the name stored in ifr. 2086 * Return an errno or 0. 2087 */ 2088 static int 2089 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 2090 { 2091 struct bpf_if *bp; 2092 struct ifnet *theywant; 2093 2094 BPF_LOCK_ASSERT(); 2095 2096 theywant = ifunit(ifr->ifr_name); 2097 if (theywant == NULL) 2098 return (ENXIO); 2099 /* 2100 * Look through attached interfaces for the named one. 2101 */ 2102 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2103 if (bp->bif_ifp == theywant && 2104 bp->bif_bpf == &theywant->if_bpf) 2105 break; 2106 } 2107 if (bp == NULL) 2108 return (ENXIO); 2109 2110 MPASS(bp == theywant->if_bpf); 2111 /* 2112 * At this point, we expect the buffer is already allocated. If not, 2113 * return an error. 2114 */ 2115 switch (d->bd_bufmode) { 2116 case BPF_BUFMODE_BUFFER: 2117 case BPF_BUFMODE_ZBUF: 2118 if (d->bd_sbuf == NULL) 2119 return (EINVAL); 2120 break; 2121 2122 default: 2123 panic("bpf_setif: bufmode %d", d->bd_bufmode); 2124 } 2125 if (bp != d->bd_bif) 2126 bpf_attachd(d, bp); 2127 else { 2128 BPFD_LOCK(d); 2129 reset_d(d); 2130 BPFD_UNLOCK(d); 2131 } 2132 return (0); 2133 } 2134 2135 /* 2136 * Support for select() and poll() system calls 2137 * 2138 * Return true iff the specific operation will not block indefinitely. 2139 * Otherwise, return false but make a note that a selwakeup() must be done. 2140 */ 2141 static int 2142 bpfpoll(struct cdev *dev, int events, struct thread *td) 2143 { 2144 struct bpf_d *d; 2145 int revents; 2146 2147 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) 2148 return (events & 2149 (POLLHUP | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)); 2150 2151 /* 2152 * Refresh PID associated with this descriptor. 2153 */ 2154 revents = events & (POLLOUT | POLLWRNORM); 2155 BPFD_LOCK(d); 2156 BPF_PID_REFRESH(d, td); 2157 if (events & (POLLIN | POLLRDNORM)) { 2158 if (bpf_ready(d)) 2159 revents |= events & (POLLIN | POLLRDNORM); 2160 else { 2161 selrecord(td, &d->bd_sel); 2162 /* Start the read timeout if necessary. */ 2163 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2164 callout_reset(&d->bd_callout, d->bd_rtout, 2165 bpf_timed_out, d); 2166 d->bd_state = BPF_WAITING; 2167 } 2168 } 2169 } 2170 BPFD_UNLOCK(d); 2171 return (revents); 2172 } 2173 2174 /* 2175 * Support for kevent() system call. Register EVFILT_READ filters and 2176 * reject all others. 2177 */ 2178 int 2179 bpfkqfilter(struct cdev *dev, struct knote *kn) 2180 { 2181 struct bpf_d *d; 2182 2183 if (devfs_get_cdevpriv((void **)&d) != 0) 2184 return (1); 2185 2186 switch (kn->kn_filter) { 2187 case EVFILT_READ: 2188 kn->kn_fop = &bpfread_filtops; 2189 break; 2190 2191 case EVFILT_WRITE: 2192 kn->kn_fop = &bpfwrite_filtops; 2193 break; 2194 2195 default: 2196 return (1); 2197 } 2198 2199 /* 2200 * Refresh PID associated with this descriptor. 2201 */ 2202 BPFD_LOCK(d); 2203 BPF_PID_REFRESH_CUR(d); 2204 kn->kn_hook = d; 2205 knlist_add(&d->bd_sel.si_note, kn, 1); 2206 BPFD_UNLOCK(d); 2207 2208 return (0); 2209 } 2210 2211 static void 2212 filt_bpfdetach(struct knote *kn) 2213 { 2214 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2215 2216 knlist_remove(&d->bd_sel.si_note, kn, 0); 2217 } 2218 2219 static int 2220 filt_bpfread(struct knote *kn, long hint) 2221 { 2222 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2223 int ready; 2224 2225 BPFD_LOCK_ASSERT(d); 2226 ready = bpf_ready(d); 2227 if (ready) { 2228 kn->kn_data = d->bd_slen; 2229 /* 2230 * Ignore the hold buffer if it is being copied to user space. 2231 */ 2232 if (!d->bd_hbuf_in_use && d->bd_hbuf) 2233 kn->kn_data += d->bd_hlen; 2234 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2235 callout_reset(&d->bd_callout, d->bd_rtout, 2236 bpf_timed_out, d); 2237 d->bd_state = BPF_WAITING; 2238 } 2239 2240 return (ready); 2241 } 2242 2243 static int 2244 filt_bpfwrite(struct knote *kn, long hint) 2245 { 2246 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2247 2248 BPFD_LOCK_ASSERT(d); 2249 2250 if (d->bd_bif == NULL) { 2251 kn->kn_data = 0; 2252 return (0); 2253 } else { 2254 kn->kn_data = d->bd_bif->bif_ifp->if_mtu; 2255 return (1); 2256 } 2257 } 2258 2259 #define BPF_TSTAMP_NONE 0 2260 #define BPF_TSTAMP_FAST 1 2261 #define BPF_TSTAMP_NORMAL 2 2262 #define BPF_TSTAMP_EXTERN 3 2263 2264 static int 2265 bpf_ts_quality(int tstype) 2266 { 2267 2268 if (tstype == BPF_T_NONE) 2269 return (BPF_TSTAMP_NONE); 2270 if ((tstype & BPF_T_FAST) != 0) 2271 return (BPF_TSTAMP_FAST); 2272 2273 return (BPF_TSTAMP_NORMAL); 2274 } 2275 2276 static int 2277 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) 2278 { 2279 struct timespec ts; 2280 struct m_tag *tag; 2281 int quality; 2282 2283 quality = bpf_ts_quality(tstype); 2284 if (quality == BPF_TSTAMP_NONE) 2285 return (quality); 2286 2287 if (m != NULL) { 2288 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) { 2289 mbuf_tstmp2timespec(m, &ts); 2290 timespec2bintime(&ts, bt); 2291 return (BPF_TSTAMP_EXTERN); 2292 } 2293 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); 2294 if (tag != NULL) { 2295 *bt = *(struct bintime *)(tag + 1); 2296 return (BPF_TSTAMP_EXTERN); 2297 } 2298 } 2299 if (quality == BPF_TSTAMP_NORMAL) 2300 binuptime(bt); 2301 else 2302 getbinuptime(bt); 2303 2304 return (quality); 2305 } 2306 2307 /* 2308 * Incoming linkage from device drivers. Process the packet pkt, of length 2309 * pktlen, which is stored in a contiguous buffer. The packet is parsed 2310 * by each process' filter, and if accepted, stashed into the corresponding 2311 * buffer. 2312 */ 2313 void 2314 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2315 { 2316 struct epoch_tracker et; 2317 struct bintime bt; 2318 struct bpf_d *d; 2319 #ifdef BPF_JITTER 2320 bpf_jit_filter *bf; 2321 #endif 2322 u_int slen; 2323 int gottime; 2324 2325 gottime = BPF_TSTAMP_NONE; 2326 NET_EPOCH_ENTER(et); 2327 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2328 counter_u64_add(d->bd_rcount, 1); 2329 /* 2330 * NB: We dont call BPF_CHECK_DIRECTION() here since there 2331 * is no way for the caller to indiciate to us whether this 2332 * packet is inbound or outbound. In the bpf_mtap() routines, 2333 * we use the interface pointers on the mbuf to figure it out. 2334 */ 2335 #ifdef BPF_JITTER 2336 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2337 if (bf != NULL) 2338 slen = (*(bf->func))(pkt, pktlen, pktlen); 2339 else 2340 #endif 2341 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 2342 if (slen != 0) { 2343 /* 2344 * Filter matches. Let's to acquire write lock. 2345 */ 2346 BPFD_LOCK(d); 2347 counter_u64_add(d->bd_fcount, 1); 2348 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2349 gottime = bpf_gettime(&bt, d->bd_tstamp, 2350 NULL); 2351 #ifdef MAC 2352 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2353 #endif 2354 catchpacket(d, pkt, pktlen, slen, 2355 bpf_append_bytes, &bt); 2356 BPFD_UNLOCK(d); 2357 } 2358 } 2359 NET_EPOCH_EXIT(et); 2360 } 2361 2362 void 2363 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 2364 { 2365 if (bpf_peers_present(ifp->if_bpf)) 2366 bpf_tap(ifp->if_bpf, pkt, pktlen); 2367 } 2368 2369 #define BPF_CHECK_DIRECTION(d, r, i) \ 2370 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 2371 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 2372 2373 /* 2374 * Incoming linkage from device drivers, when packet is in an mbuf chain. 2375 * Locking model is explained in bpf_tap(). 2376 */ 2377 void 2378 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2379 { 2380 struct epoch_tracker et; 2381 struct bintime bt; 2382 struct bpf_d *d; 2383 #ifdef BPF_JITTER 2384 bpf_jit_filter *bf; 2385 #endif 2386 u_int pktlen, slen; 2387 int gottime; 2388 2389 /* Skip outgoing duplicate packets. */ 2390 if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) { 2391 m->m_flags &= ~M_PROMISC; 2392 return; 2393 } 2394 2395 pktlen = m_length(m, NULL); 2396 gottime = BPF_TSTAMP_NONE; 2397 2398 NET_EPOCH_ENTER(et); 2399 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2400 if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp)) 2401 continue; 2402 counter_u64_add(d->bd_rcount, 1); 2403 #ifdef BPF_JITTER 2404 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2405 /* XXX We cannot handle multiple mbufs. */ 2406 if (bf != NULL && m->m_next == NULL) 2407 slen = (*(bf->func))(mtod(m, u_char *), pktlen, 2408 pktlen); 2409 else 2410 #endif 2411 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 2412 if (slen != 0) { 2413 BPFD_LOCK(d); 2414 2415 counter_u64_add(d->bd_fcount, 1); 2416 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2417 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2418 #ifdef MAC 2419 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2420 #endif 2421 catchpacket(d, (u_char *)m, pktlen, slen, 2422 bpf_append_mbuf, &bt); 2423 BPFD_UNLOCK(d); 2424 } 2425 } 2426 NET_EPOCH_EXIT(et); 2427 } 2428 2429 void 2430 bpf_mtap_if(if_t ifp, struct mbuf *m) 2431 { 2432 if (bpf_peers_present(ifp->if_bpf)) { 2433 M_ASSERTVALID(m); 2434 bpf_mtap(ifp->if_bpf, m); 2435 } 2436 } 2437 2438 /* 2439 * Incoming linkage from device drivers, when packet is in 2440 * an mbuf chain and to be prepended by a contiguous header. 2441 */ 2442 void 2443 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 2444 { 2445 struct epoch_tracker et; 2446 struct bintime bt; 2447 struct mbuf mb; 2448 struct bpf_d *d; 2449 u_int pktlen, slen; 2450 int gottime; 2451 2452 /* Skip outgoing duplicate packets. */ 2453 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2454 m->m_flags &= ~M_PROMISC; 2455 return; 2456 } 2457 2458 pktlen = m_length(m, NULL); 2459 /* 2460 * Craft on-stack mbuf suitable for passing to bpf_filter. 2461 * Note that we cut corners here; we only setup what's 2462 * absolutely needed--this mbuf should never go anywhere else. 2463 */ 2464 mb.m_flags = 0; 2465 mb.m_next = m; 2466 mb.m_data = data; 2467 mb.m_len = dlen; 2468 pktlen += dlen; 2469 2470 gottime = BPF_TSTAMP_NONE; 2471 2472 NET_EPOCH_ENTER(et); 2473 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2474 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2475 continue; 2476 counter_u64_add(d->bd_rcount, 1); 2477 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 2478 if (slen != 0) { 2479 BPFD_LOCK(d); 2480 2481 counter_u64_add(d->bd_fcount, 1); 2482 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2483 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2484 #ifdef MAC 2485 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2486 #endif 2487 catchpacket(d, (u_char *)&mb, pktlen, slen, 2488 bpf_append_mbuf, &bt); 2489 BPFD_UNLOCK(d); 2490 } 2491 } 2492 NET_EPOCH_EXIT(et); 2493 } 2494 2495 void 2496 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 2497 { 2498 if (bpf_peers_present(ifp->if_bpf)) { 2499 M_ASSERTVALID(m); 2500 bpf_mtap2(ifp->if_bpf, data, dlen, m); 2501 } 2502 } 2503 2504 #undef BPF_CHECK_DIRECTION 2505 #undef BPF_TSTAMP_NONE 2506 #undef BPF_TSTAMP_FAST 2507 #undef BPF_TSTAMP_NORMAL 2508 #undef BPF_TSTAMP_EXTERN 2509 2510 static int 2511 bpf_hdrlen(struct bpf_d *d) 2512 { 2513 int hdrlen; 2514 2515 hdrlen = d->bd_bif->bif_hdrlen; 2516 #ifndef BURN_BRIDGES 2517 if (d->bd_tstamp == BPF_T_NONE || 2518 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) 2519 #ifdef COMPAT_FREEBSD32 2520 if (d->bd_compat32) 2521 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); 2522 else 2523 #endif 2524 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); 2525 else 2526 #endif 2527 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); 2528 #ifdef COMPAT_FREEBSD32 2529 if (d->bd_compat32) 2530 hdrlen = BPF_WORDALIGN32(hdrlen); 2531 else 2532 #endif 2533 hdrlen = BPF_WORDALIGN(hdrlen); 2534 2535 return (hdrlen - d->bd_bif->bif_hdrlen); 2536 } 2537 2538 static void 2539 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) 2540 { 2541 struct bintime bt2, boottimebin; 2542 struct timeval tsm; 2543 struct timespec tsn; 2544 2545 if ((tstype & BPF_T_MONOTONIC) == 0) { 2546 bt2 = *bt; 2547 getboottimebin(&boottimebin); 2548 bintime_add(&bt2, &boottimebin); 2549 bt = &bt2; 2550 } 2551 switch (BPF_T_FORMAT(tstype)) { 2552 case BPF_T_MICROTIME: 2553 bintime2timeval(bt, &tsm); 2554 ts->bt_sec = tsm.tv_sec; 2555 ts->bt_frac = tsm.tv_usec; 2556 break; 2557 case BPF_T_NANOTIME: 2558 bintime2timespec(bt, &tsn); 2559 ts->bt_sec = tsn.tv_sec; 2560 ts->bt_frac = tsn.tv_nsec; 2561 break; 2562 case BPF_T_BINTIME: 2563 ts->bt_sec = bt->sec; 2564 ts->bt_frac = bt->frac; 2565 break; 2566 } 2567 } 2568 2569 /* 2570 * Move the packet data from interface memory (pkt) into the 2571 * store buffer. "cpfn" is the routine called to do the actual data 2572 * transfer. bcopy is passed in to copy contiguous chunks, while 2573 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 2574 * pkt is really an mbuf. 2575 */ 2576 static void 2577 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 2578 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 2579 struct bintime *bt) 2580 { 2581 static char zeroes[BPF_ALIGNMENT]; 2582 struct bpf_xhdr hdr; 2583 #ifndef BURN_BRIDGES 2584 struct bpf_hdr hdr_old; 2585 #ifdef COMPAT_FREEBSD32 2586 struct bpf_hdr32 hdr32_old; 2587 #endif 2588 #endif 2589 int caplen, curlen, hdrlen, pad, totlen; 2590 int do_wakeup = 0; 2591 int do_timestamp; 2592 int tstype; 2593 2594 BPFD_LOCK_ASSERT(d); 2595 if (d->bd_bif == NULL) { 2596 /* Descriptor was detached in concurrent thread */ 2597 counter_u64_add(d->bd_dcount, 1); 2598 return; 2599 } 2600 2601 /* 2602 * Detect whether user space has released a buffer back to us, and if 2603 * so, move it from being a hold buffer to a free buffer. This may 2604 * not be the best place to do it (for example, we might only want to 2605 * run this check if we need the space), but for now it's a reliable 2606 * spot to do it. 2607 */ 2608 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 2609 d->bd_fbuf = d->bd_hbuf; 2610 d->bd_hbuf = NULL; 2611 d->bd_hlen = 0; 2612 bpf_buf_reclaimed(d); 2613 } 2614 2615 /* 2616 * Figure out how many bytes to move. If the packet is 2617 * greater or equal to the snapshot length, transfer that 2618 * much. Otherwise, transfer the whole packet (unless 2619 * we hit the buffer size limit). 2620 */ 2621 hdrlen = bpf_hdrlen(d); 2622 totlen = hdrlen + min(snaplen, pktlen); 2623 if (totlen > d->bd_bufsize) 2624 totlen = d->bd_bufsize; 2625 2626 /* 2627 * Round up the end of the previous packet to the next longword. 2628 * 2629 * Drop the packet if there's no room and no hope of room 2630 * If the packet would overflow the storage buffer or the storage 2631 * buffer is considered immutable by the buffer model, try to rotate 2632 * the buffer and wakeup pending processes. 2633 */ 2634 #ifdef COMPAT_FREEBSD32 2635 if (d->bd_compat32) 2636 curlen = BPF_WORDALIGN32(d->bd_slen); 2637 else 2638 #endif 2639 curlen = BPF_WORDALIGN(d->bd_slen); 2640 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 2641 if (d->bd_fbuf == NULL) { 2642 /* 2643 * There's no room in the store buffer, and no 2644 * prospect of room, so drop the packet. Notify the 2645 * buffer model. 2646 */ 2647 bpf_buffull(d); 2648 counter_u64_add(d->bd_dcount, 1); 2649 return; 2650 } 2651 KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use")); 2652 ROTATE_BUFFERS(d); 2653 do_wakeup = 1; 2654 curlen = 0; 2655 } else { 2656 if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 2657 /* 2658 * Immediate mode is set, or the read timeout has 2659 * already expired during a select call. A packet 2660 * arrived, so the reader should be woken up. 2661 */ 2662 do_wakeup = 1; 2663 } 2664 pad = curlen - d->bd_slen; 2665 KASSERT(pad >= 0 && pad <= sizeof(zeroes), 2666 ("%s: invalid pad byte count %d", __func__, pad)); 2667 if (pad > 0) { 2668 /* Zero pad bytes. */ 2669 bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes, 2670 pad); 2671 } 2672 } 2673 2674 caplen = totlen - hdrlen; 2675 tstype = d->bd_tstamp; 2676 do_timestamp = tstype != BPF_T_NONE; 2677 #ifndef BURN_BRIDGES 2678 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { 2679 struct bpf_ts ts; 2680 if (do_timestamp) 2681 bpf_bintime2ts(bt, &ts, tstype); 2682 #ifdef COMPAT_FREEBSD32 2683 if (d->bd_compat32) { 2684 bzero(&hdr32_old, sizeof(hdr32_old)); 2685 if (do_timestamp) { 2686 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; 2687 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; 2688 } 2689 hdr32_old.bh_datalen = pktlen; 2690 hdr32_old.bh_hdrlen = hdrlen; 2691 hdr32_old.bh_caplen = caplen; 2692 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, 2693 sizeof(hdr32_old)); 2694 goto copy; 2695 } 2696 #endif 2697 bzero(&hdr_old, sizeof(hdr_old)); 2698 if (do_timestamp) { 2699 hdr_old.bh_tstamp.tv_sec = ts.bt_sec; 2700 hdr_old.bh_tstamp.tv_usec = ts.bt_frac; 2701 } 2702 hdr_old.bh_datalen = pktlen; 2703 hdr_old.bh_hdrlen = hdrlen; 2704 hdr_old.bh_caplen = caplen; 2705 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, 2706 sizeof(hdr_old)); 2707 goto copy; 2708 } 2709 #endif 2710 2711 /* 2712 * Append the bpf header. Note we append the actual header size, but 2713 * move forward the length of the header plus padding. 2714 */ 2715 bzero(&hdr, sizeof(hdr)); 2716 if (do_timestamp) 2717 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); 2718 hdr.bh_datalen = pktlen; 2719 hdr.bh_hdrlen = hdrlen; 2720 hdr.bh_caplen = caplen; 2721 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 2722 2723 /* 2724 * Copy the packet data into the store buffer and update its length. 2725 */ 2726 #ifndef BURN_BRIDGES 2727 copy: 2728 #endif 2729 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); 2730 d->bd_slen = curlen + totlen; 2731 2732 if (do_wakeup) 2733 bpf_wakeup(d); 2734 } 2735 2736 /* 2737 * Free buffers currently in use by a descriptor. 2738 * Called on close. 2739 */ 2740 static void 2741 bpfd_free(epoch_context_t ctx) 2742 { 2743 struct bpf_d *d; 2744 struct bpf_program_buffer *p; 2745 2746 /* 2747 * We don't need to lock out interrupts since this descriptor has 2748 * been detached from its interface and it yet hasn't been marked 2749 * free. 2750 */ 2751 d = __containerof(ctx, struct bpf_d, epoch_ctx); 2752 bpf_free(d); 2753 if (d->bd_rfilter != NULL) { 2754 p = __containerof((void *)d->bd_rfilter, 2755 struct bpf_program_buffer, buffer); 2756 #ifdef BPF_JITTER 2757 p->func = d->bd_bfilter; 2758 #endif 2759 bpf_program_buffer_free(&p->epoch_ctx); 2760 } 2761 if (d->bd_wfilter != NULL) { 2762 p = __containerof((void *)d->bd_wfilter, 2763 struct bpf_program_buffer, buffer); 2764 #ifdef BPF_JITTER 2765 p->func = NULL; 2766 #endif 2767 bpf_program_buffer_free(&p->epoch_ctx); 2768 } 2769 2770 mtx_destroy(&d->bd_lock); 2771 counter_u64_free(d->bd_rcount); 2772 counter_u64_free(d->bd_dcount); 2773 counter_u64_free(d->bd_fcount); 2774 counter_u64_free(d->bd_wcount); 2775 counter_u64_free(d->bd_wfcount); 2776 counter_u64_free(d->bd_wdcount); 2777 counter_u64_free(d->bd_zcopy); 2778 free(d, M_BPF); 2779 } 2780 2781 /* 2782 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 2783 * fixed size of the link header (variable length headers not yet supported). 2784 */ 2785 void 2786 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2787 { 2788 2789 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2790 } 2791 2792 /* 2793 * Attach an interface to bpf. ifp is a pointer to the structure 2794 * defining the interface to be attached, dlt is the link layer type, 2795 * and hdrlen is the fixed size of the link header (variable length 2796 * headers are not yet supporrted). 2797 */ 2798 void 2799 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, 2800 struct bpf_if **driverp) 2801 { 2802 struct bpf_if *bp; 2803 2804 KASSERT(*driverp == NULL, 2805 ("bpfattach2: driverp already initialized")); 2806 2807 bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO); 2808 2809 CK_LIST_INIT(&bp->bif_dlist); 2810 CK_LIST_INIT(&bp->bif_wlist); 2811 bp->bif_ifp = ifp; 2812 bp->bif_dlt = dlt; 2813 bp->bif_hdrlen = hdrlen; 2814 bp->bif_bpf = driverp; 2815 refcount_init(&bp->bif_refcnt, 1); 2816 *driverp = bp; 2817 /* 2818 * Reference ifnet pointer, so it won't freed until 2819 * we release it. 2820 */ 2821 if_ref(ifp); 2822 BPF_LOCK(); 2823 CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 2824 BPF_UNLOCK(); 2825 2826 if (bootverbose && IS_DEFAULT_VNET(curvnet)) 2827 if_printf(ifp, "bpf attached\n"); 2828 } 2829 2830 #ifdef VIMAGE 2831 /* 2832 * When moving interfaces between vnet instances we need a way to 2833 * query the dlt and hdrlen before detach so we can re-attch the if_bpf 2834 * after the vmove. We unfortunately have no device driver infrastructure 2835 * to query the interface for these values after creation/attach, thus 2836 * add this as a workaround. 2837 */ 2838 int 2839 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen) 2840 { 2841 2842 if (bp == NULL) 2843 return (ENXIO); 2844 if (bif_dlt == NULL && bif_hdrlen == NULL) 2845 return (0); 2846 2847 if (bif_dlt != NULL) 2848 *bif_dlt = bp->bif_dlt; 2849 if (bif_hdrlen != NULL) 2850 *bif_hdrlen = bp->bif_hdrlen; 2851 2852 return (0); 2853 } 2854 2855 /* 2856 * Detach descriptors on interface's vmove event. 2857 */ 2858 void 2859 bpf_ifdetach(struct ifnet *ifp) 2860 { 2861 struct bpf_if *bp; 2862 struct bpf_d *d; 2863 2864 BPF_LOCK(); 2865 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2866 if (bp->bif_ifp != ifp) 2867 continue; 2868 2869 /* Detach common descriptors */ 2870 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { 2871 bpf_detachd_locked(d, true); 2872 } 2873 2874 /* Detach writer-only descriptors */ 2875 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { 2876 bpf_detachd_locked(d, true); 2877 } 2878 } 2879 BPF_UNLOCK(); 2880 } 2881 #endif 2882 2883 /* 2884 * Detach bpf from an interface. This involves detaching each descriptor 2885 * associated with the interface. Notify each descriptor as it's detached 2886 * so that any sleepers wake up and get ENXIO. 2887 */ 2888 void 2889 bpfdetach(struct ifnet *ifp) 2890 { 2891 struct bpf_if *bp, *bp_temp; 2892 struct bpf_d *d; 2893 2894 BPF_LOCK(); 2895 /* Find all bpf_if struct's which reference ifp and detach them. */ 2896 CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) { 2897 if (ifp != bp->bif_ifp) 2898 continue; 2899 2900 CK_LIST_REMOVE(bp, bif_next); 2901 *bp->bif_bpf = __DECONST(struct bpf_if *, &dead_bpf_if); 2902 2903 CTR4(KTR_NET, 2904 "%s: sheduling free for encap %d (%p) for if %p", 2905 __func__, bp->bif_dlt, bp, ifp); 2906 2907 /* Detach common descriptors */ 2908 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { 2909 bpf_detachd_locked(d, true); 2910 } 2911 2912 /* Detach writer-only descriptors */ 2913 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { 2914 bpf_detachd_locked(d, true); 2915 } 2916 bpfif_rele(bp); 2917 } 2918 BPF_UNLOCK(); 2919 } 2920 2921 bool 2922 bpf_peers_present_if(struct ifnet *ifp) 2923 { 2924 return (bpf_peers_present(ifp->if_bpf)); 2925 } 2926 2927 /* 2928 * Get a list of available data link type of the interface. 2929 */ 2930 static int 2931 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2932 { 2933 struct ifnet *ifp; 2934 struct bpf_if *bp; 2935 u_int *lst; 2936 int error, n, n1; 2937 2938 BPF_LOCK_ASSERT(); 2939 2940 ifp = d->bd_bif->bif_ifp; 2941 n1 = 0; 2942 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2943 if (bp->bif_ifp == ifp) 2944 n1++; 2945 } 2946 if (bfl->bfl_list == NULL) { 2947 bfl->bfl_len = n1; 2948 return (0); 2949 } 2950 if (n1 > bfl->bfl_len) 2951 return (ENOMEM); 2952 2953 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); 2954 n = 0; 2955 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2956 if (bp->bif_ifp != ifp) 2957 continue; 2958 lst[n++] = bp->bif_dlt; 2959 } 2960 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); 2961 free(lst, M_TEMP); 2962 bfl->bfl_len = n; 2963 return (error); 2964 } 2965 2966 /* 2967 * Set the data link type of a BPF instance. 2968 */ 2969 static int 2970 bpf_setdlt(struct bpf_d *d, u_int dlt) 2971 { 2972 int error, opromisc; 2973 struct ifnet *ifp; 2974 struct bpf_if *bp; 2975 2976 BPF_LOCK_ASSERT(); 2977 MPASS(d->bd_bif != NULL); 2978 2979 /* 2980 * It is safe to check bd_bif without BPFD_LOCK, it can not be 2981 * changed while we hold global lock. 2982 */ 2983 if (d->bd_bif->bif_dlt == dlt) 2984 return (0); 2985 2986 ifp = d->bd_bif->bif_ifp; 2987 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2988 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 2989 break; 2990 } 2991 if (bp == NULL) 2992 return (EINVAL); 2993 2994 opromisc = d->bd_promisc; 2995 bpf_attachd(d, bp); 2996 if (opromisc) { 2997 error = ifpromisc(bp->bif_ifp, 1); 2998 if (error) 2999 if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n", 3000 __func__, error); 3001 else 3002 d->bd_promisc = 1; 3003 } 3004 return (0); 3005 } 3006 3007 static void 3008 bpf_drvinit(void *unused) 3009 { 3010 struct cdev *dev; 3011 3012 sx_init(&bpf_sx, "bpf global lock"); 3013 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); 3014 /* For compatibility */ 3015 make_dev_alias(dev, "bpf0"); 3016 } 3017 3018 /* 3019 * Zero out the various packet counters associated with all of the bpf 3020 * descriptors. At some point, we will probably want to get a bit more 3021 * granular and allow the user to specify descriptors to be zeroed. 3022 */ 3023 static void 3024 bpf_zero_counters(void) 3025 { 3026 struct bpf_if *bp; 3027 struct bpf_d *bd; 3028 3029 BPF_LOCK(); 3030 /* 3031 * We are protected by global lock here, interfaces and 3032 * descriptors can not be deleted while we hold it. 3033 */ 3034 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 3035 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 3036 counter_u64_zero(bd->bd_rcount); 3037 counter_u64_zero(bd->bd_dcount); 3038 counter_u64_zero(bd->bd_fcount); 3039 counter_u64_zero(bd->bd_wcount); 3040 counter_u64_zero(bd->bd_wfcount); 3041 counter_u64_zero(bd->bd_zcopy); 3042 } 3043 } 3044 BPF_UNLOCK(); 3045 } 3046 3047 /* 3048 * Fill filter statistics 3049 */ 3050 static void 3051 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 3052 { 3053 3054 BPF_LOCK_ASSERT(); 3055 bzero(d, sizeof(*d)); 3056 d->bd_structsize = sizeof(*d); 3057 d->bd_immediate = bd->bd_immediate; 3058 d->bd_promisc = bd->bd_promisc; 3059 d->bd_hdrcmplt = bd->bd_hdrcmplt; 3060 d->bd_direction = bd->bd_direction; 3061 d->bd_feedback = bd->bd_feedback; 3062 d->bd_async = bd->bd_async; 3063 d->bd_rcount = counter_u64_fetch(bd->bd_rcount); 3064 d->bd_dcount = counter_u64_fetch(bd->bd_dcount); 3065 d->bd_fcount = counter_u64_fetch(bd->bd_fcount); 3066 d->bd_sig = bd->bd_sig; 3067 d->bd_slen = bd->bd_slen; 3068 d->bd_hlen = bd->bd_hlen; 3069 d->bd_bufsize = bd->bd_bufsize; 3070 d->bd_pid = bd->bd_pid; 3071 strlcpy(d->bd_ifname, 3072 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 3073 d->bd_locked = bd->bd_locked; 3074 d->bd_wcount = counter_u64_fetch(bd->bd_wcount); 3075 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); 3076 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); 3077 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); 3078 d->bd_bufmode = bd->bd_bufmode; 3079 } 3080 3081 /* 3082 * Handle `netstat -B' stats request 3083 */ 3084 static int 3085 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 3086 { 3087 static const struct xbpf_d zerostats; 3088 struct xbpf_d *xbdbuf, *xbd, tempstats; 3089 int index, error; 3090 struct bpf_if *bp; 3091 struct bpf_d *bd; 3092 3093 /* 3094 * XXX This is not technically correct. It is possible for non 3095 * privileged users to open bpf devices. It would make sense 3096 * if the users who opened the devices were able to retrieve 3097 * the statistics for them, too. 3098 */ 3099 error = priv_check(req->td, PRIV_NET_BPF); 3100 if (error) 3101 return (error); 3102 /* 3103 * Check to see if the user is requesting that the counters be 3104 * zeroed out. Explicitly check that the supplied data is zeroed, 3105 * as we aren't allowing the user to set the counters currently. 3106 */ 3107 if (req->newptr != NULL) { 3108 if (req->newlen != sizeof(tempstats)) 3109 return (EINVAL); 3110 memset(&tempstats, 0, sizeof(tempstats)); 3111 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); 3112 if (error) 3113 return (error); 3114 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) 3115 return (EINVAL); 3116 bpf_zero_counters(); 3117 return (0); 3118 } 3119 if (req->oldptr == NULL) 3120 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 3121 if (bpf_bpfd_cnt == 0) 3122 return (SYSCTL_OUT(req, 0, 0)); 3123 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 3124 BPF_LOCK(); 3125 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 3126 BPF_UNLOCK(); 3127 free(xbdbuf, M_BPF); 3128 return (ENOMEM); 3129 } 3130 index = 0; 3131 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 3132 /* Send writers-only first */ 3133 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { 3134 xbd = &xbdbuf[index++]; 3135 bpfstats_fill_xbpf(xbd, bd); 3136 } 3137 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 3138 xbd = &xbdbuf[index++]; 3139 bpfstats_fill_xbpf(xbd, bd); 3140 } 3141 } 3142 BPF_UNLOCK(); 3143 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 3144 free(xbdbuf, M_BPF); 3145 return (error); 3146 } 3147 3148 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, bpf_drvinit, NULL); 3149 3150 #else /* !DEV_BPF && !NETGRAPH_BPF */ 3151 3152 /* 3153 * NOP stubs to allow bpf-using drivers to load and function. 3154 * 3155 * A 'better' implementation would allow the core bpf functionality 3156 * to be loaded at runtime. 3157 */ 3158 3159 void 3160 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 3161 { 3162 } 3163 3164 void 3165 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 3166 { 3167 } 3168 3169 void 3170 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 3171 { 3172 } 3173 3174 void 3175 bpf_mtap_if(if_t ifp, struct mbuf *m) 3176 { 3177 } 3178 3179 void 3180 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 3181 { 3182 } 3183 3184 void 3185 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 3186 { 3187 } 3188 3189 void 3190 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 3191 { 3192 3193 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 3194 } 3195 3196 void 3197 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 3198 { 3199 3200 *driverp = __DECONST(struct bpf_if *, &dead_bpf_if); 3201 } 3202 3203 void 3204 bpfdetach(struct ifnet *ifp) 3205 { 3206 } 3207 3208 bool 3209 bpf_peers_present_if(struct ifnet *ifp) 3210 { 3211 return (false); 3212 } 3213 3214 u_int 3215 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 3216 { 3217 return (-1); /* "no filter" behaviour */ 3218 } 3219 3220 int 3221 bpf_validate(const struct bpf_insn *f, int len) 3222 { 3223 return (0); /* false */ 3224 } 3225 3226 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 3227 3228 #ifdef DDB 3229 static void 3230 bpf_show_bpf_if(struct bpf_if *bpf_if) 3231 { 3232 3233 if (bpf_if == NULL) 3234 return; 3235 db_printf("%p:\n", bpf_if); 3236 #define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e); 3237 #define BPF_DB_PRINTF_RAW(f, e) db_printf(" %s = " f "\n", #e, e); 3238 /* bif_ext.bif_next */ 3239 /* bif_ext.bif_dlist */ 3240 BPF_DB_PRINTF("%#x", bif_dlt); 3241 BPF_DB_PRINTF("%u", bif_hdrlen); 3242 /* bif_wlist */ 3243 BPF_DB_PRINTF("%p", bif_ifp); 3244 BPF_DB_PRINTF("%p", bif_bpf); 3245 BPF_DB_PRINTF_RAW("%u", refcount_load(&bpf_if->bif_refcnt)); 3246 } 3247 3248 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if) 3249 { 3250 3251 if (!have_addr) { 3252 db_printf("usage: show bpf_if <struct bpf_if *>\n"); 3253 return; 3254 } 3255 3256 bpf_show_bpf_if((struct bpf_if *)addr); 3257 } 3258 #endif 3259