1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * This code is derived from the Stanford/CMU enet packet filter, 9 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 10 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 11 * Berkeley Laboratory. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 #include "opt_bpf.h" 40 #include "opt_netgraph.h" 41 42 #include <sys/param.h> 43 #include <sys/conf.h> 44 #include <sys/fcntl.h> 45 #include <sys/jail.h> 46 #include <sys/ktr.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/mutex.h> 51 #include <sys/time.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/signalvar.h> 55 #include <sys/filio.h> 56 #include <sys/sockio.h> 57 #include <sys/ttycom.h> 58 #include <sys/uio.h> 59 #include <sys/sysent.h> 60 #include <sys/systm.h> 61 62 #include <sys/file.h> 63 #include <sys/poll.h> 64 #include <sys/proc.h> 65 66 #include <sys/socket.h> 67 68 #include <net/if.h> 69 #include <net/if_var.h> 70 #include <net/if_private.h> 71 #include <net/if_vlan_var.h> 72 #include <net/bpf.h> 73 #include <net/bpf_buffer.h> 74 #ifdef BPF_JITTER 75 #include <net/bpf_jitter.h> 76 #endif 77 #include <net/bpf_zerocopy.h> 78 #include <net/bpfdesc.h> 79 #include <net/vnet.h> 80 81 #include <sys/kernel.h> 82 #include <sys/sysctl.h> 83 84 #include <security/mac/mac_framework.h> 85 86 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 87 88 struct bpf_if { 89 struct bpfd_list bif_dlist; /* list of all interfaces */ 90 LIST_ENTRY(bpf_if) bif_next; /* descriptor list */ 91 u_int bif_dlt; /* link layer type */ 92 u_int bif_hdrlen; /* length of link header */ 93 volatile u_int bif_refcnt; 94 struct bpfd_list bif_wlist; /* writer-only list */ 95 const struct bif_methods *bif_methods; 96 void *bif_softc; 97 const char *bif_name; 98 struct epoch_context epoch_ctx; 99 }; 100 101 /* See bpf_peers_present() in bpf.h. */ 102 _Static_assert(offsetof(struct bpf_if, bif_dlist) == 0, 103 "bpf_if shall start with bif_dlist"); 104 105 static inline void 106 bif_attachd(struct bpf_if *bp) 107 { 108 if (bp->bif_methods->bif_attachd != NULL) 109 bp->bif_methods->bif_attachd(bp->bif_softc); 110 } 111 112 static inline void 113 bif_detachd(struct bpf_if *bp) 114 { 115 if (bp->bif_methods->bif_detachd != NULL) 116 bp->bif_methods->bif_detachd(bp->bif_softc); 117 } 118 119 static inline uint32_t 120 bif_wrsize(struct bpf_if *bp) 121 { 122 if (bp->bif_methods->bif_wrsize != NULL) 123 return (bp->bif_methods->bif_wrsize(bp->bif_softc)); 124 else 125 return (0); 126 } 127 128 static inline int 129 bif_promisc(struct bpf_if *bp, bool on) 130 { 131 if (bp->bif_methods->bif_promisc != NULL) 132 return (bp->bif_methods->bif_promisc(bp->bif_softc, on)); 133 else 134 return (0); 135 } 136 137 #ifdef MAC 138 static inline int 139 bif_mac_check_receive(struct bpf_if *bp, struct bpf_d *d) 140 { 141 if (bp->bif_methods->bif_mac_check_receive != NULL) 142 return (bp->bif_methods->bif_mac_check_receive(bp->bif_softc, 143 d)); 144 else 145 return (0); 146 } 147 #endif 148 149 /* 150 * XXXGL: Once we migrate to tapping KPI that would specify packet direction 151 * we no longer need bif_chkdir method. 152 */ 153 static inline bool 154 bpf_chkdir(struct bpf_d *d, struct mbuf *m) 155 { 156 return (d->bd_bif->bif_methods->bif_chkdir(d->bd_bif->bif_softc, m, 157 d->bd_direction)); 158 } 159 160 struct bpf_program_buffer { 161 struct epoch_context epoch_ctx; 162 #ifdef BPF_JITTER 163 bpf_jit_filter *func; 164 #endif 165 void *buffer[0]; 166 }; 167 168 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 169 170 #define PRINET 26 /* interruptible */ 171 #define BPF_PRIO_MAX 7 172 173 #define SIZEOF_BPF_HDR(type) \ 174 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) 175 176 #ifdef COMPAT_FREEBSD32 177 #include <sys/mount.h> 178 #include <compat/freebsd32/freebsd32.h> 179 #define BPF_ALIGNMENT32 sizeof(int32_t) 180 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) 181 182 #ifndef BURN_BRIDGES 183 /* 184 * 32-bit version of structure prepended to each packet. We use this header 185 * instead of the standard one for 32-bit streams. We mark the a stream as 186 * 32-bit the first time we see a 32-bit compat ioctl request. 187 */ 188 struct bpf_hdr32 { 189 struct timeval32 bh_tstamp; /* time stamp */ 190 uint32_t bh_caplen; /* length of captured portion */ 191 uint32_t bh_datalen; /* original length of packet */ 192 uint16_t bh_hdrlen; /* length of bpf header (this struct 193 plus alignment padding) */ 194 }; 195 #endif 196 197 struct bpf_program32 { 198 u_int bf_len; 199 uint32_t bf_insns; 200 }; 201 202 struct bpf_dltlist32 { 203 u_int bfl_len; 204 u_int bfl_list; 205 }; 206 207 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) 208 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) 209 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) 210 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) 211 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) 212 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) 213 #endif 214 215 #define BPF_LOCK() sx_xlock(&bpf_sx) 216 #define BPF_UNLOCK() sx_xunlock(&bpf_sx) 217 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) 218 /* 219 * bpf_iflist is a list of BPF interface structures, each corresponding to a 220 * specific DLT. The same network interface might have several BPF interface 221 * structures registered by different layers in the stack (i.e., 802.11 222 * frames, ethernet frames, etc). 223 */ 224 static LIST_HEAD(, bpf_if) bpf_iflist = LIST_HEAD_INITIALIZER(); 225 static struct sx bpf_sx; /* bpf global lock */ 226 227 static void bpfif_ref(struct bpf_if *); 228 static void bpfif_rele(struct bpf_if *); 229 230 static void bpfd_ref(struct bpf_d *); 231 static void bpfd_rele(struct bpf_d *); 232 static int bpf_attachd(struct bpf_d *d, struct bpf_if *); 233 static void bpf_detachd(struct bpf_d *, bool); 234 static void bpfd_free(epoch_context_t); 235 static void bpf_timed_out(void *); 236 static __inline void 237 bpf_wakeup(struct bpf_d *); 238 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 239 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 240 struct bintime *); 241 static void reset_d(struct bpf_d *); 242 static int bpf_getiflist(struct bpf_iflist *); 243 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 244 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 245 static int bpf_setdlt(struct bpf_d *, u_int); 246 static void filt_bpfdetach(struct knote *); 247 static int filt_bpfread(struct knote *, long); 248 static int filt_bpfwrite(struct knote *, long); 249 static void bpf_drvinit(void *); 250 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 251 252 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 253 "bpf sysctl"); 254 int bpf_maxinsns = BPF_MAXINSNS; 255 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 256 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 257 static int bpf_zerocopy_enable = 0; 258 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 259 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 260 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, 261 bpf_stats_sysctl, "bpf statistics portal"); 262 263 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; 264 #define V_bpf_optimize_writers VNET(bpf_optimize_writers) 265 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN, 266 &VNET_NAME(bpf_optimize_writers), 0, 267 "Do not send packets until BPF program is set"); 268 269 static d_open_t bpfopen; 270 static d_read_t bpfread; 271 static d_write_t bpfwrite; 272 static d_ioctl_t bpfioctl; 273 static d_poll_t bpfpoll; 274 static d_kqfilter_t bpfkqfilter; 275 276 static struct cdevsw bpf_cdevsw = { 277 .d_version = D_VERSION, 278 .d_open = bpfopen, 279 .d_read = bpfread, 280 .d_write = bpfwrite, 281 .d_ioctl = bpfioctl, 282 .d_poll = bpfpoll, 283 .d_name = "bpf", 284 .d_kqfilter = bpfkqfilter, 285 }; 286 287 static const struct filterops bpfread_filtops = { 288 .f_isfd = 1, 289 .f_detach = filt_bpfdetach, 290 .f_event = filt_bpfread, 291 .f_copy = knote_triv_copy, 292 }; 293 294 static const struct filterops bpfwrite_filtops = { 295 .f_isfd = 1, 296 .f_detach = filt_bpfdetach, 297 .f_event = filt_bpfwrite, 298 .f_copy = knote_triv_copy, 299 }; 300 301 /* 302 * LOCKING MODEL USED BY BPF 303 * 304 * Locks: 305 * 1) global lock (BPF_LOCK). Sx, used to protect some global counters, 306 * every bpf_iflist changes, serializes ioctl access to bpf descriptors. 307 * 2) Descriptor lock. Mutex, used to protect BPF buffers and various 308 * structure fields used by bpf_*tap* code. 309 * 310 * Lock order: global lock, then descriptor lock. 311 * 312 * There are several possible consumers: 313 * 314 * 1. The kernel registers interface pointer with bpfattach(). 315 * Each call allocates new bpf_if structure, references ifnet pointer 316 * and links bpf_if into bpf_iflist chain. This is protected with global 317 * lock. 318 * 319 * 2. An userland application uses ioctl() call to bpf_d descriptor. 320 * All such call are serialized with global lock. BPF filters can be 321 * changed, but pointer to old filter will be freed using NET_EPOCH_CALL(). 322 * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to 323 * filter pointers, even if change will happen during bpf_tap execution. 324 * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL(). 325 * 326 * 3. An userland application can write packets into bpf_d descriptor. 327 * There we need to be sure, that ifnet won't disappear during bpfwrite(). 328 * 329 * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to 330 * bif_dlist is protected with net_epoch_preempt section. So, it should 331 * be safe to make access to bpf_d descriptor inside the section. 332 * 333 * 5. The kernel invokes bpfdetach() on interface destroying. All lists 334 * are modified with global lock held and actual free() is done using 335 * NET_EPOCH_CALL(). 336 */ 337 338 static void 339 bpfif_free(epoch_context_t ctx) 340 { 341 struct bpf_if *bp; 342 343 bp = __containerof(ctx, struct bpf_if, epoch_ctx); 344 free(bp, M_BPF); 345 } 346 347 static void 348 bpfif_ref(struct bpf_if *bp) 349 { 350 351 refcount_acquire(&bp->bif_refcnt); 352 } 353 354 static void 355 bpfif_rele(struct bpf_if *bp) 356 { 357 358 if (!refcount_release(&bp->bif_refcnt)) 359 return; 360 NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx); 361 } 362 363 static void 364 bpfd_ref(struct bpf_d *d) 365 { 366 367 refcount_acquire(&d->bd_refcnt); 368 } 369 370 static void 371 bpfd_rele(struct bpf_d *d) 372 { 373 374 if (!refcount_release(&d->bd_refcnt)) 375 return; 376 NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx); 377 } 378 379 static struct bpf_program_buffer* 380 bpf_program_buffer_alloc(size_t size, int flags) 381 { 382 383 return (malloc(sizeof(struct bpf_program_buffer) + size, 384 M_BPF, flags)); 385 } 386 387 static void 388 bpf_program_buffer_free(epoch_context_t ctx) 389 { 390 struct bpf_program_buffer *ptr; 391 392 ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx); 393 #ifdef BPF_JITTER 394 if (ptr->func != NULL) 395 bpf_destroy_jit_filter(ptr->func); 396 #endif 397 free(ptr, M_BPF); 398 } 399 400 /* 401 * Wrapper functions for various buffering methods. If the set of buffer 402 * modes expands, we will probably want to introduce a switch data structure 403 * similar to protosw, et. 404 */ 405 static void 406 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 407 u_int len) 408 { 409 410 BPFD_LOCK_ASSERT(d); 411 412 switch (d->bd_bufmode) { 413 case BPF_BUFMODE_BUFFER: 414 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 415 416 case BPF_BUFMODE_ZBUF: 417 counter_u64_add(d->bd_zcopy, 1); 418 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 419 420 default: 421 panic("bpf_buf_append_bytes"); 422 } 423 } 424 425 static void 426 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 427 u_int len) 428 { 429 430 BPFD_LOCK_ASSERT(d); 431 432 switch (d->bd_bufmode) { 433 case BPF_BUFMODE_BUFFER: 434 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 435 436 case BPF_BUFMODE_ZBUF: 437 counter_u64_add(d->bd_zcopy, 1); 438 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 439 440 default: 441 panic("bpf_buf_append_mbuf"); 442 } 443 } 444 445 /* 446 * This function gets called when the free buffer is re-assigned. 447 */ 448 static void 449 bpf_buf_reclaimed(struct bpf_d *d) 450 { 451 452 BPFD_LOCK_ASSERT(d); 453 454 switch (d->bd_bufmode) { 455 case BPF_BUFMODE_BUFFER: 456 return; 457 458 case BPF_BUFMODE_ZBUF: 459 bpf_zerocopy_buf_reclaimed(d); 460 return; 461 462 default: 463 panic("bpf_buf_reclaimed"); 464 } 465 } 466 467 /* 468 * If the buffer mechanism has a way to decide that a held buffer can be made 469 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 470 * returned if the buffer can be discarded, (0) is returned if it cannot. 471 */ 472 static int 473 bpf_canfreebuf(struct bpf_d *d) 474 { 475 476 BPFD_LOCK_ASSERT(d); 477 478 switch (d->bd_bufmode) { 479 case BPF_BUFMODE_ZBUF: 480 return (bpf_zerocopy_canfreebuf(d)); 481 } 482 return (0); 483 } 484 485 /* 486 * Allow the buffer model to indicate that the current store buffer is 487 * immutable, regardless of the appearance of space. Return (1) if the 488 * buffer is writable, and (0) if not. 489 */ 490 static int 491 bpf_canwritebuf(struct bpf_d *d) 492 { 493 BPFD_LOCK_ASSERT(d); 494 495 switch (d->bd_bufmode) { 496 case BPF_BUFMODE_ZBUF: 497 return (bpf_zerocopy_canwritebuf(d)); 498 } 499 return (1); 500 } 501 502 /* 503 * Notify buffer model that an attempt to write to the store buffer has 504 * resulted in a dropped packet, in which case the buffer may be considered 505 * full. 506 */ 507 static void 508 bpf_buffull(struct bpf_d *d) 509 { 510 511 BPFD_LOCK_ASSERT(d); 512 513 switch (d->bd_bufmode) { 514 case BPF_BUFMODE_ZBUF: 515 bpf_zerocopy_buffull(d); 516 break; 517 } 518 } 519 520 /* 521 * Notify the buffer model that a buffer has moved into the hold position. 522 */ 523 void 524 bpf_bufheld(struct bpf_d *d) 525 { 526 527 BPFD_LOCK_ASSERT(d); 528 529 switch (d->bd_bufmode) { 530 case BPF_BUFMODE_ZBUF: 531 bpf_zerocopy_bufheld(d); 532 break; 533 } 534 } 535 536 static void 537 bpf_free(struct bpf_d *d) 538 { 539 540 switch (d->bd_bufmode) { 541 case BPF_BUFMODE_BUFFER: 542 return (bpf_buffer_free(d)); 543 544 case BPF_BUFMODE_ZBUF: 545 return (bpf_zerocopy_free(d)); 546 547 default: 548 panic("bpf_buf_free"); 549 } 550 } 551 552 static int 553 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 554 { 555 556 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 557 return (EOPNOTSUPP); 558 return (bpf_buffer_uiomove(d, buf, len, uio)); 559 } 560 561 static int 562 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 563 { 564 565 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 566 return (EOPNOTSUPP); 567 return (bpf_buffer_ioctl_sblen(d, i)); 568 } 569 570 static int 571 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 572 { 573 574 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 575 return (EOPNOTSUPP); 576 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 577 } 578 579 static int 580 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 581 { 582 583 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 584 return (EOPNOTSUPP); 585 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 586 } 587 588 static int 589 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 590 { 591 592 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 593 return (EOPNOTSUPP); 594 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 595 } 596 597 /* 598 * Check if we need to upgrade our descriptor @d from write-only mode. 599 */ 600 static int 601 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, 602 int flen) 603 { 604 int is_snap, need_upgrade; 605 606 /* 607 * Check if we've already upgraded or new filter is empty. 608 */ 609 if (d->bd_writer == 0 || fcode == NULL) 610 return (0); 611 612 need_upgrade = 0; 613 614 /* 615 * Check if cmd looks like snaplen setting from 616 * pcap_bpf.c:pcap_open_live(). 617 * Note we're not checking .k value here: 618 * while pcap_open_live() definitely sets to non-zero value, 619 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. 620 * do not consider upgrading immediately 621 */ 622 if (cmd == BIOCSETF && flen == 1 && 623 fcode[0].code == (BPF_RET | BPF_K)) 624 is_snap = 1; 625 else 626 is_snap = 0; 627 628 if (is_snap == 0) { 629 /* 630 * We're setting first filter and it doesn't look like 631 * setting snaplen. We're probably using bpf directly. 632 * Upgrade immediately. 633 */ 634 need_upgrade = 1; 635 } else { 636 /* 637 * Do not require upgrade by first BIOCSETF 638 * (used to set snaplen) by pcap_open_live(). 639 */ 640 641 if (--d->bd_writer == 0) { 642 /* 643 * First snaplen filter has already 644 * been set. This is probably catch-all 645 * filter 646 */ 647 need_upgrade = 1; 648 } 649 } 650 651 CTR5(KTR_NET, 652 "%s: filter function set by pid %d, " 653 "bd_writer counter %d, snap %d upgrade %d", 654 __func__, d->bd_pid, d->bd_writer, 655 is_snap, need_upgrade); 656 657 return (need_upgrade); 658 } 659 660 /* 661 * Detach a file from its interface. 662 */ 663 static void 664 bpf_detachd(struct bpf_d *d, bool detached_ifp) 665 { 666 struct bpf_if *bp; 667 bool writer; 668 669 BPF_LOCK_ASSERT(); 670 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); 671 672 /* Check if descriptor is attached */ 673 if ((bp = d->bd_bif) == NULL) 674 return; 675 676 BPFD_LOCK(d); 677 CK_LIST_REMOVE(d, bd_next); 678 writer = (d->bd_writer > 0); 679 d->bd_bif = NULL; 680 if (detached_ifp) { 681 /* 682 * Notify descriptor as it's detached, so that any 683 * sleepers wake up and get ENXIO. 684 */ 685 bpf_wakeup(d); 686 } 687 BPFD_UNLOCK(d); 688 689 if (!writer) 690 bif_detachd(bp); 691 692 if (d->bd_promisc && !detached_ifp) { 693 d->bd_promisc = 0; 694 (void)bif_promisc(bp, false); 695 } 696 697 bpfif_rele(bp); 698 } 699 700 /* 701 * Close the descriptor by detaching it from its interface, 702 * deallocating its buffers, and marking it free. 703 */ 704 static void 705 bpf_dtor(void *data) 706 { 707 struct bpf_d *d = data; 708 709 BPFD_LOCK(d); 710 if (d->bd_state == BPF_WAITING) 711 callout_stop(&d->bd_callout); 712 d->bd_state = BPF_IDLE; 713 BPFD_UNLOCK(d); 714 funsetown(&d->bd_sigio); 715 BPF_LOCK(); 716 bpf_detachd(d, false); 717 BPF_UNLOCK(); 718 #ifdef MAC 719 mac_bpfdesc_destroy(d); 720 #endif /* MAC */ 721 seldrain(&d->bd_sel); 722 knlist_destroy(&d->bd_sel.si_note); 723 callout_drain(&d->bd_callout); 724 bpfd_rele(d); 725 } 726 727 /* 728 * Open ethernet device. Returns ENXIO for illegal minor device number, 729 * EBUSY if file is open by another process. 730 */ 731 /* ARGSUSED */ 732 static int 733 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 734 { 735 struct bpf_d *d; 736 int error; 737 738 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 739 error = devfs_set_cdevpriv(d, bpf_dtor); 740 if (error != 0) { 741 free(d, M_BPF); 742 return (error); 743 } 744 745 /* Setup counters */ 746 d->bd_rcount = counter_u64_alloc(M_WAITOK); 747 d->bd_dcount = counter_u64_alloc(M_WAITOK); 748 d->bd_fcount = counter_u64_alloc(M_WAITOK); 749 d->bd_wcount = counter_u64_alloc(M_WAITOK); 750 d->bd_wfcount = counter_u64_alloc(M_WAITOK); 751 d->bd_wdcount = counter_u64_alloc(M_WAITOK); 752 d->bd_zcopy = counter_u64_alloc(M_WAITOK); 753 754 /* 755 * For historical reasons, perform a one-time initialization call to 756 * the buffer routines, even though we're not yet committed to a 757 * particular buffer method. 758 */ 759 bpf_buffer_init(d); 760 if ((flags & FREAD) == 0) 761 d->bd_writer = 2; 762 d->bd_bufmode = BPF_BUFMODE_BUFFER; 763 d->bd_sig = SIGIO; 764 d->bd_direction = BPF_D_INOUT; 765 refcount_init(&d->bd_refcnt, 1); 766 BPF_PID_REFRESH(d, td); 767 #ifdef MAC 768 mac_bpfdesc_init(d); 769 mac_bpfdesc_create(td->td_ucred, d); 770 #endif 771 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); 772 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); 773 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); 774 775 /* Disable VLAN pcp tagging. */ 776 d->bd_pcp = 0; 777 778 return (0); 779 } 780 781 /* 782 * bpfread - read next chunk of packets from buffers 783 */ 784 static int 785 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 786 { 787 struct bpf_d *d; 788 int error; 789 int non_block; 790 int timed_out; 791 792 error = devfs_get_cdevpriv((void **)&d); 793 if (error != 0) 794 return (error); 795 796 /* 797 * Restrict application to use a buffer the same size as 798 * as kernel buffers. 799 */ 800 if (uio->uio_resid != d->bd_bufsize) 801 return (EINVAL); 802 803 non_block = ((ioflag & O_NONBLOCK) != 0); 804 805 BPFD_LOCK(d); 806 BPF_PID_REFRESH_CUR(d); 807 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 808 BPFD_UNLOCK(d); 809 return (EOPNOTSUPP); 810 } 811 if (d->bd_state == BPF_WAITING) 812 callout_stop(&d->bd_callout); 813 timed_out = (d->bd_state == BPF_TIMED_OUT); 814 d->bd_state = BPF_IDLE; 815 while (d->bd_flags & BPFD_HBUF_INUSE) { 816 error = mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET | PCATCH, 817 "bd_hbuf", 0); 818 if (error != 0) { 819 BPFD_UNLOCK(d); 820 return (error); 821 } 822 } 823 /* 824 * If the hold buffer is empty, then do a timed sleep, which 825 * ends when the timeout expires or when enough packets 826 * have arrived to fill the store buffer. 827 */ 828 while (d->bd_hbuf == NULL) { 829 if (d->bd_slen != 0) { 830 /* 831 * A packet(s) either arrived since the previous 832 * read or arrived while we were asleep. 833 */ 834 if ((d->bd_flags & BPFD_IMMEDIATE) || non_block || 835 timed_out) { 836 /* 837 * Rotate the buffers and return what's here 838 * if we are in immediate mode, non-blocking 839 * flag is set, or this descriptor timed out. 840 */ 841 ROTATE_BUFFERS(d); 842 break; 843 } 844 } 845 846 /* 847 * No data is available, check to see if the bpf device 848 * is still pointed at a real interface. If not, return 849 * ENXIO so that the userland process knows to rebind 850 * it before using it again. 851 */ 852 if (d->bd_bif == NULL) { 853 BPFD_UNLOCK(d); 854 return (ENXIO); 855 } 856 857 if (non_block) { 858 BPFD_UNLOCK(d); 859 return (EWOULDBLOCK); 860 } 861 error = msleep(d, &d->bd_lock, PRINET | PCATCH, 862 "bpf", d->bd_rtout); 863 if (error == EINTR || error == ERESTART) { 864 BPFD_UNLOCK(d); 865 return (error); 866 } 867 if (error == EWOULDBLOCK) { 868 /* 869 * On a timeout, return what's in the buffer, 870 * which may be nothing. If there is something 871 * in the store buffer, we can rotate the buffers. 872 */ 873 if (d->bd_hbuf) 874 /* 875 * We filled up the buffer in between 876 * getting the timeout and arriving 877 * here, so we don't need to rotate. 878 */ 879 break; 880 881 if (d->bd_slen == 0) { 882 BPFD_UNLOCK(d); 883 return (0); 884 } 885 ROTATE_BUFFERS(d); 886 break; 887 } 888 } 889 /* 890 * At this point, we know we have something in the hold slot. 891 */ 892 d->bd_flags |= BPFD_HBUF_INUSE; 893 BPFD_UNLOCK(d); 894 895 /* 896 * Move data from hold buffer into user space. 897 * We know the entire buffer is transferred since 898 * we checked above that the read buffer is bpf_bufsize bytes. 899 * 900 * We do not have to worry about simultaneous reads because 901 * we waited for sole access to the hold buffer above. 902 */ 903 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 904 905 BPFD_LOCK(d); 906 if (d->bd_flags & BPFD_HBUF_INUSE) { 907 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); 908 d->bd_fbuf = d->bd_hbuf; 909 d->bd_hbuf = NULL; 910 d->bd_hlen = 0; 911 bpf_buf_reclaimed(d); 912 d->bd_flags &= ~BPFD_HBUF_INUSE; 913 wakeup(&d->bd_hbuf); 914 } 915 BPFD_UNLOCK(d); 916 917 return (error); 918 } 919 920 /* 921 * If there are processes sleeping on this descriptor, wake them up. 922 */ 923 static __inline void 924 bpf_wakeup(struct bpf_d *d) 925 { 926 927 BPFD_LOCK_ASSERT(d); 928 if (d->bd_state == BPF_WAITING) { 929 callout_stop(&d->bd_callout); 930 d->bd_state = BPF_IDLE; 931 } 932 wakeup(d); 933 if ((d->bd_flags & BPFD_ASYNC) && d->bd_sig && d->bd_sigio) 934 pgsigio(&d->bd_sigio, d->bd_sig, 0); 935 936 selwakeuppri(&d->bd_sel, PRINET); 937 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 938 } 939 940 static void 941 bpf_timed_out(void *arg) 942 { 943 struct bpf_d *d = (struct bpf_d *)arg; 944 945 BPFD_LOCK_ASSERT(d); 946 947 if (callout_pending(&d->bd_callout) || 948 !callout_active(&d->bd_callout)) 949 return; 950 if (d->bd_state == BPF_WAITING) { 951 d->bd_state = BPF_TIMED_OUT; 952 if (d->bd_slen != 0) 953 bpf_wakeup(d); 954 } 955 } 956 957 static int 958 bpf_ready(struct bpf_d *d) 959 { 960 961 BPFD_LOCK_ASSERT(d); 962 963 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 964 return (1); 965 if (((d->bd_flags & BPFD_IMMEDIATE) || d->bd_state == BPF_TIMED_OUT) && 966 d->bd_slen != 0) 967 return (1); 968 return (0); 969 } 970 971 static int 972 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 973 { 974 struct epoch_tracker et; 975 struct bpf_if *bp; 976 struct bpf_d *d; 977 struct mbuf *m, *mc; 978 ssize_t len; 979 int error; 980 981 error = devfs_get_cdevpriv((void **)&d); 982 if (error != 0) 983 return (error); 984 985 if (uio->uio_resid == 0) 986 return (0); 987 988 BPFD_LOCK(d); 989 if ((bp = d->bd_bif) == NULL) 990 error = ENXIO; 991 else if (bp->bif_methods->bif_write == NULL) 992 error = EOPNOTSUPP; 993 if (error) { 994 BPFD_UNLOCK(d); 995 counter_u64_add(d->bd_wdcount, 1); 996 return (error); 997 } 998 bpfd_ref(d); 999 BPFD_UNLOCK(d); 1000 1001 len = uio->uio_resid; 1002 /* Allocate a mbuf, up to MJUM16BYTES bytes, for our write. */ 1003 m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR); 1004 if (m == NULL) { 1005 error = ENOMEM; 1006 goto fail_wref; 1007 } 1008 m->m_pkthdr.len = m->m_len = len; 1009 1010 error = uiomove(mtod(m, u_char *), len, uio); 1011 if (error) 1012 goto fail_wref; 1013 1014 if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) { 1015 error = EPERM; 1016 goto fail_wref; 1017 } 1018 1019 if (d->bd_flags & BPFD_FEEDBACK) { 1020 mc = m_dup(m, M_WAITOK); 1021 /* Set M_PROMISC for outgoing packets to be discarded. */ 1022 if (d->bd_direction == BPF_D_INOUT) 1023 m->m_flags |= M_PROMISC; 1024 } else 1025 mc = NULL; 1026 1027 /* XXXGL: should belong to bpf_ifnet.c */ 1028 if (d->bd_pcp != 0) 1029 (void)vlan_set_pcp(m, d->bd_pcp); 1030 1031 BPFD_LOCK(d); 1032 #ifdef MAC 1033 mac_bpfdesc_create_mbuf(d, m); 1034 if (mc != NULL) 1035 mac_bpfdesc_create_mbuf(d, mc); 1036 #endif 1037 /* 1038 * Check that descriptor is still attached to the interface. 1039 * This can happen on bpfdetach() or if other thread did BIOCSDLT. 1040 */ 1041 if (__predict_false(d->bd_bif != bp)) { 1042 BPFD_UNLOCK(d); 1043 m_freem(mc); 1044 error = ENXIO; 1045 goto fail_wref; 1046 } 1047 BPFD_UNLOCK(d); 1048 1049 NET_EPOCH_ENTER(et); 1050 error = bp->bif_methods->bif_write(bp->bif_softc, m, mc, d->bd_flags); 1051 NET_EPOCH_EXIT(et); 1052 if (error) 1053 counter_u64_add(d->bd_wdcount, 1); 1054 else 1055 counter_u64_add(d->bd_wfcount, 1); 1056 bpfd_rele(d); 1057 1058 return (error); 1059 1060 fail_wref: 1061 counter_u64_add(d->bd_wdcount, 1); 1062 bpfd_rele(d); 1063 m_freem(m); 1064 return (error); 1065 } 1066 1067 /* 1068 * Reset a descriptor by flushing its packet buffer and clearing the receive 1069 * and drop counts. This is doable for kernel-only buffers, but with 1070 * zero-copy buffers, we can't write to (or rotate) buffers that are 1071 * currently owned by userspace. It would be nice if we could encapsulate 1072 * this logic in the buffer code rather than here. 1073 */ 1074 static void 1075 reset_d(struct bpf_d *d) 1076 { 1077 1078 BPFD_LOCK_ASSERT(d); 1079 1080 while (d->bd_flags & BPFD_HBUF_INUSE) 1081 mtx_sleep(&d->bd_hbuf, &d->bd_lock, PRINET, "bd_hbuf", 0); 1082 if ((d->bd_hbuf != NULL) && 1083 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { 1084 /* Free the hold buffer. */ 1085 d->bd_fbuf = d->bd_hbuf; 1086 d->bd_hbuf = NULL; 1087 d->bd_hlen = 0; 1088 bpf_buf_reclaimed(d); 1089 } 1090 if (bpf_canwritebuf(d)) 1091 d->bd_slen = 0; 1092 counter_u64_zero(d->bd_rcount); 1093 counter_u64_zero(d->bd_dcount); 1094 counter_u64_zero(d->bd_fcount); 1095 counter_u64_zero(d->bd_wcount); 1096 counter_u64_zero(d->bd_wfcount); 1097 counter_u64_zero(d->bd_wdcount); 1098 counter_u64_zero(d->bd_zcopy); 1099 } 1100 1101 /* 1102 * FIONREAD Check for read packet available. 1103 * BIOCGETIFLIST Get list of all tap points. 1104 * BIOCGBLEN Get buffer len [for read()]. 1105 * BIOCSETF Set read filter. 1106 * BIOCSETFNR Set read filter without resetting descriptor. 1107 * BIOCSETWF Set write filter. 1108 * BIOCFLUSH Flush read packet buffer. 1109 * BIOCPROMISC Put interface into promiscuous mode. 1110 * BIOCGDLT Get link layer type. 1111 * BIOCGETIF Get interface name. 1112 * BIOCSETIF Set interface. 1113 * BIOCSRTIMEOUT Set read timeout. 1114 * BIOCGRTIMEOUT Get read timeout. 1115 * BIOCGSTATS Get packet stats. 1116 * BIOCIMMEDIATE Set immediate mode. 1117 * BIOCVERSION Get filter language version. 1118 * BIOCGHDRCMPLT Get "header already complete" flag 1119 * BIOCSHDRCMPLT Set "header already complete" flag 1120 * BIOCGDIRECTION Get packet direction flag 1121 * BIOCSDIRECTION Set packet direction flag 1122 * BIOCGTSTAMP Get time stamp format and resolution. 1123 * BIOCSTSTAMP Set time stamp format and resolution. 1124 * BIOCLOCK Set "locked" flag 1125 * BIOCFEEDBACK Set packet feedback mode. 1126 * BIOCSETZBUF Set current zero-copy buffer locations. 1127 * BIOCGETZMAX Get maximum zero-copy buffer size. 1128 * BIOCROTZBUF Force rotation of zero-copy buffer 1129 * BIOCSETBUFMODE Set buffer mode. 1130 * BIOCGETBUFMODE Get current buffer mode. 1131 * BIOCSETVLANPCP Set VLAN PCP tag. 1132 */ 1133 /* ARGSUSED */ 1134 static int 1135 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1136 struct thread *td) 1137 { 1138 struct bpf_d *d; 1139 int error; 1140 1141 error = devfs_get_cdevpriv((void **)&d); 1142 if (error != 0) 1143 return (error); 1144 1145 /* 1146 * Refresh PID associated with this descriptor. 1147 */ 1148 BPFD_LOCK(d); 1149 BPF_PID_REFRESH(d, td); 1150 if (d->bd_state == BPF_WAITING) 1151 callout_stop(&d->bd_callout); 1152 d->bd_state = BPF_IDLE; 1153 BPFD_UNLOCK(d); 1154 1155 if (d->bd_flags & BPFD_LOCKED) { 1156 switch (cmd) { 1157 case BIOCGETIFLIST: 1158 case BIOCGBLEN: 1159 case BIOCFLUSH: 1160 case BIOCGDLT: 1161 case BIOCGDLTLIST: 1162 #ifdef COMPAT_FREEBSD32 1163 case BIOCGDLTLIST32: 1164 #endif 1165 case BIOCGETIF: 1166 case BIOCGRTIMEOUT: 1167 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1168 case BIOCGRTIMEOUT32: 1169 #endif 1170 case BIOCGSTATS: 1171 case BIOCVERSION: 1172 case BIOCGRSIG: 1173 case BIOCGHDRCMPLT: 1174 case BIOCSTSTAMP: 1175 case BIOCFEEDBACK: 1176 case FIONREAD: 1177 case BIOCLOCK: 1178 case BIOCSRTIMEOUT: 1179 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1180 case BIOCSRTIMEOUT32: 1181 #endif 1182 case BIOCIMMEDIATE: 1183 case TIOCGPGRP: 1184 case BIOCROTZBUF: 1185 break; 1186 default: 1187 return (EPERM); 1188 } 1189 } 1190 #ifdef COMPAT_FREEBSD32 1191 /* 1192 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so 1193 * that it will get 32-bit packet headers. 1194 */ 1195 switch (cmd) { 1196 case BIOCSETF32: 1197 case BIOCSETFNR32: 1198 case BIOCSETWF32: 1199 case BIOCGDLTLIST32: 1200 case BIOCGRTIMEOUT32: 1201 case BIOCSRTIMEOUT32: 1202 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 1203 BPFD_LOCK(d); 1204 d->bd_compat32 = 1; 1205 BPFD_UNLOCK(d); 1206 } 1207 } 1208 #endif 1209 1210 CURVNET_SET(TD_TO_VNET(td)); 1211 switch (cmd) { 1212 default: 1213 error = EINVAL; 1214 break; 1215 1216 /* 1217 * Check for read packet available. 1218 */ 1219 case FIONREAD: 1220 { 1221 int n; 1222 1223 BPFD_LOCK(d); 1224 n = d->bd_slen; 1225 while (d->bd_flags & BPFD_HBUF_INUSE) 1226 mtx_sleep(&d->bd_hbuf, &d->bd_lock, 1227 PRINET, "bd_hbuf", 0); 1228 if (d->bd_hbuf) 1229 n += d->bd_hlen; 1230 BPFD_UNLOCK(d); 1231 1232 *(int *)addr = n; 1233 break; 1234 } 1235 /* 1236 * Get list of all tap points. 1237 */ 1238 case BIOCGETIFLIST: 1239 error = bpf_getiflist((struct bpf_iflist *)addr); 1240 break; 1241 1242 /* 1243 * Get buffer len [for read()]. 1244 */ 1245 case BIOCGBLEN: 1246 BPFD_LOCK(d); 1247 *(u_int *)addr = d->bd_bufsize; 1248 BPFD_UNLOCK(d); 1249 break; 1250 1251 /* 1252 * Set buffer length. 1253 */ 1254 case BIOCSBLEN: 1255 error = bpf_ioctl_sblen(d, (u_int *)addr); 1256 break; 1257 1258 /* 1259 * Set link layer read filter. 1260 */ 1261 case BIOCSETF: 1262 case BIOCSETFNR: 1263 case BIOCSETWF: 1264 #ifdef COMPAT_FREEBSD32 1265 case BIOCSETF32: 1266 case BIOCSETFNR32: 1267 case BIOCSETWF32: 1268 #endif 1269 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1270 break; 1271 1272 /* 1273 * Flush read packet buffer. 1274 */ 1275 case BIOCFLUSH: 1276 BPFD_LOCK(d); 1277 reset_d(d); 1278 BPFD_UNLOCK(d); 1279 break; 1280 1281 /* 1282 * Put interface into promiscuous mode. 1283 */ 1284 case BIOCPROMISC: 1285 BPF_LOCK(); 1286 if (d->bd_bif == NULL) { 1287 /* 1288 * No interface attached yet. 1289 */ 1290 error = EINVAL; 1291 } else if (d->bd_promisc == 0) { 1292 struct bpf_if *bp = d->bd_bif; 1293 1294 if ((error = bif_promisc(bp, true)) == 0) 1295 d->bd_promisc = 1; 1296 } 1297 BPF_UNLOCK(); 1298 break; 1299 1300 /* 1301 * Get current data link type. 1302 */ 1303 case BIOCGDLT: 1304 BPF_LOCK(); 1305 if (d->bd_bif == NULL) 1306 error = EINVAL; 1307 else 1308 *(u_int *)addr = d->bd_bif->bif_dlt; 1309 BPF_UNLOCK(); 1310 break; 1311 1312 /* 1313 * Get a list of supported data link types. 1314 */ 1315 #ifdef COMPAT_FREEBSD32 1316 case BIOCGDLTLIST32: 1317 { 1318 struct bpf_dltlist32 *list32; 1319 struct bpf_dltlist dltlist; 1320 1321 list32 = (struct bpf_dltlist32 *)addr; 1322 dltlist.bfl_len = list32->bfl_len; 1323 dltlist.bfl_list = PTRIN(list32->bfl_list); 1324 BPF_LOCK(); 1325 if (d->bd_bif == NULL) 1326 error = EINVAL; 1327 else { 1328 error = bpf_getdltlist(d, &dltlist); 1329 if (error == 0) 1330 list32->bfl_len = dltlist.bfl_len; 1331 } 1332 BPF_UNLOCK(); 1333 break; 1334 } 1335 #endif 1336 1337 case BIOCGDLTLIST: 1338 BPF_LOCK(); 1339 if (d->bd_bif == NULL) 1340 error = EINVAL; 1341 else 1342 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1343 BPF_UNLOCK(); 1344 break; 1345 1346 /* 1347 * Set data link type. 1348 */ 1349 case BIOCSDLT: 1350 BPF_LOCK(); 1351 if (d->bd_bif == NULL) 1352 error = EINVAL; 1353 else 1354 error = bpf_setdlt(d, *(u_int *)addr); 1355 BPF_UNLOCK(); 1356 break; 1357 1358 /* 1359 * Get interface name. 1360 */ 1361 case BIOCGETIF: 1362 BPF_LOCK(); 1363 if (d->bd_bif == NULL) 1364 error = EINVAL; 1365 else { 1366 struct bpf_if *const bp = d->bd_bif; 1367 struct ifreq *const ifr = (struct ifreq *)addr; 1368 1369 strlcpy(ifr->ifr_name, bp->bif_name, 1370 sizeof(ifr->ifr_name)); 1371 } 1372 BPF_UNLOCK(); 1373 break; 1374 1375 /* 1376 * Set interface. 1377 */ 1378 case BIOCSETIF: { 1379 struct ifreq *const ifr = (struct ifreq *)addr; 1380 struct bpf_if *bp; 1381 1382 /* 1383 * Behavior here depends on the buffering model. If we're 1384 * using kernel memory buffers, then we can allocate them here. 1385 * If we're using zero-copy, then the user process must have 1386 * registered buffers by the time we get here. 1387 */ 1388 BPFD_LOCK(d); 1389 if (d->bd_bufmode == BPF_BUFMODE_BUFFER && 1390 d->bd_sbuf == NULL) { 1391 u_int size; 1392 1393 size = d->bd_bufsize; 1394 BPFD_UNLOCK(d); 1395 error = bpf_buffer_ioctl_sblen(d, &size); 1396 if (error != 0) 1397 break; 1398 } else 1399 BPFD_UNLOCK(d); 1400 BPF_LOCK(); 1401 /* 1402 * Look through attached interfaces for the named one. 1403 */ 1404 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1405 if (strncmp(ifr->ifr_name, bp->bif_name, 1406 sizeof(ifr->ifr_name)) == 0) 1407 break; 1408 } 1409 if (bp != NULL) 1410 error = bpf_attachd(d, bp); 1411 else 1412 error = ENXIO; 1413 BPF_UNLOCK(); 1414 break; 1415 } 1416 /* 1417 * Set read timeout. 1418 */ 1419 case BIOCSRTIMEOUT: 1420 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1421 case BIOCSRTIMEOUT32: 1422 #endif 1423 { 1424 struct timeval *tv = (struct timeval *)addr; 1425 #if defined(COMPAT_FREEBSD32) 1426 struct timeval32 *tv32; 1427 struct timeval tv64; 1428 1429 if (cmd == BIOCSRTIMEOUT32) { 1430 tv32 = (struct timeval32 *)addr; 1431 tv = &tv64; 1432 tv->tv_sec = tv32->tv_sec; 1433 tv->tv_usec = tv32->tv_usec; 1434 } else 1435 #endif 1436 tv = (struct timeval *)addr; 1437 1438 /* 1439 * Subtract 1 tick from tvtohz() since this isn't 1440 * a one-shot timer. 1441 */ 1442 if ((error = itimerfix(tv)) == 0) 1443 d->bd_rtout = tvtohz(tv) - 1; 1444 break; 1445 } 1446 1447 /* 1448 * Get read timeout. 1449 */ 1450 case BIOCGRTIMEOUT: 1451 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1452 case BIOCGRTIMEOUT32: 1453 #endif 1454 { 1455 struct timeval *tv; 1456 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1457 struct timeval32 *tv32; 1458 struct timeval tv64; 1459 1460 if (cmd == BIOCGRTIMEOUT32) 1461 tv = &tv64; 1462 else 1463 #endif 1464 tv = (struct timeval *)addr; 1465 1466 tv->tv_sec = d->bd_rtout / hz; 1467 tv->tv_usec = (d->bd_rtout % hz) * tick; 1468 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1469 if (cmd == BIOCGRTIMEOUT32) { 1470 tv32 = (struct timeval32 *)addr; 1471 tv32->tv_sec = tv->tv_sec; 1472 tv32->tv_usec = tv->tv_usec; 1473 } 1474 #endif 1475 1476 break; 1477 } 1478 1479 /* 1480 * Get packet stats. 1481 */ 1482 case BIOCGSTATS: 1483 { 1484 struct bpf_stat *bs = (struct bpf_stat *)addr; 1485 1486 /* XXXCSJP overflow */ 1487 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); 1488 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); 1489 break; 1490 } 1491 1492 /* 1493 * Set immediate mode. 1494 */ 1495 case BIOCIMMEDIATE: 1496 BPFD_LOCK(d); 1497 d->bd_flags |= *(u_int *)addr ? BPFD_IMMEDIATE : 0; 1498 BPFD_UNLOCK(d); 1499 break; 1500 1501 case BIOCVERSION: 1502 { 1503 struct bpf_version *bv = (struct bpf_version *)addr; 1504 1505 bv->bv_major = BPF_MAJOR_VERSION; 1506 bv->bv_minor = BPF_MINOR_VERSION; 1507 break; 1508 } 1509 1510 /* 1511 * Get "header already complete" flag 1512 */ 1513 case BIOCGHDRCMPLT: 1514 BPFD_LOCK(d); 1515 *(u_int *)addr = d->bd_flags & BPFD_HDRCMPLT ? 1 : 0; 1516 BPFD_UNLOCK(d); 1517 break; 1518 1519 /* 1520 * Set "header already complete" flag 1521 */ 1522 case BIOCSHDRCMPLT: 1523 BPFD_LOCK(d); 1524 d->bd_flags |= *(u_int *)addr ? BPFD_HDRCMPLT : 0; 1525 BPFD_UNLOCK(d); 1526 break; 1527 1528 /* 1529 * Get packet direction flag 1530 */ 1531 case BIOCGDIRECTION: 1532 BPFD_LOCK(d); 1533 *(u_int *)addr = d->bd_direction; 1534 BPFD_UNLOCK(d); 1535 break; 1536 1537 /* 1538 * Set packet direction flag 1539 */ 1540 case BIOCSDIRECTION: 1541 { 1542 u_int direction; 1543 1544 direction = *(u_int *)addr; 1545 switch (direction) { 1546 case BPF_D_IN: 1547 case BPF_D_INOUT: 1548 case BPF_D_OUT: 1549 BPFD_LOCK(d); 1550 d->bd_direction = direction; 1551 BPFD_UNLOCK(d); 1552 break; 1553 default: 1554 error = EINVAL; 1555 } 1556 } 1557 break; 1558 1559 /* 1560 * Get packet timestamp format and resolution. 1561 */ 1562 case BIOCGTSTAMP: 1563 BPFD_LOCK(d); 1564 *(u_int *)addr = d->bd_tstamp; 1565 BPFD_UNLOCK(d); 1566 break; 1567 1568 /* 1569 * Set packet timestamp format and resolution. 1570 */ 1571 case BIOCSTSTAMP: 1572 { 1573 u_int func; 1574 1575 func = *(u_int *)addr; 1576 if (BPF_T_VALID(func)) 1577 d->bd_tstamp = func; 1578 else 1579 error = EINVAL; 1580 } 1581 break; 1582 1583 case BIOCFEEDBACK: 1584 BPFD_LOCK(d); 1585 d->bd_flags |= *(u_int *)addr ? BPFD_FEEDBACK : 0; 1586 BPFD_UNLOCK(d); 1587 break; 1588 1589 case BIOCLOCK: 1590 BPFD_LOCK(d); 1591 d->bd_flags |= BPFD_LOCKED; 1592 BPFD_UNLOCK(d); 1593 break; 1594 1595 case FIONBIO: /* Non-blocking I/O */ 1596 break; 1597 1598 case FIOASYNC: /* Send signal on receive packets */ 1599 BPFD_LOCK(d); 1600 d->bd_flags |= *(u_int *)addr ? BPFD_ASYNC : 0; 1601 BPFD_UNLOCK(d); 1602 break; 1603 1604 case FIOSETOWN: 1605 /* 1606 * XXX: Add some sort of locking here? 1607 * fsetown() can sleep. 1608 */ 1609 error = fsetown(*(int *)addr, &d->bd_sigio); 1610 break; 1611 1612 case FIOGETOWN: 1613 BPFD_LOCK(d); 1614 *(int *)addr = fgetown(&d->bd_sigio); 1615 BPFD_UNLOCK(d); 1616 break; 1617 1618 /* This is deprecated, FIOSETOWN should be used instead. */ 1619 case TIOCSPGRP: 1620 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1621 break; 1622 1623 /* This is deprecated, FIOGETOWN should be used instead. */ 1624 case TIOCGPGRP: 1625 *(int *)addr = -fgetown(&d->bd_sigio); 1626 break; 1627 1628 case BIOCSRSIG: /* Set receive signal */ 1629 { 1630 u_int sig; 1631 1632 sig = *(u_int *)addr; 1633 1634 if (sig >= NSIG) 1635 error = EINVAL; 1636 else { 1637 BPFD_LOCK(d); 1638 d->bd_sig = sig; 1639 BPFD_UNLOCK(d); 1640 } 1641 break; 1642 } 1643 case BIOCGRSIG: 1644 BPFD_LOCK(d); 1645 *(u_int *)addr = d->bd_sig; 1646 BPFD_UNLOCK(d); 1647 break; 1648 1649 case BIOCGETBUFMODE: 1650 BPFD_LOCK(d); 1651 *(u_int *)addr = d->bd_bufmode; 1652 BPFD_UNLOCK(d); 1653 break; 1654 1655 case BIOCSETBUFMODE: 1656 /* 1657 * Allow the buffering mode to be changed as long as we 1658 * haven't yet committed to a particular mode. Our 1659 * definition of commitment, for now, is whether or not a 1660 * buffer has been allocated or an interface attached, since 1661 * that's the point where things get tricky. 1662 */ 1663 switch (*(u_int *)addr) { 1664 case BPF_BUFMODE_BUFFER: 1665 break; 1666 1667 case BPF_BUFMODE_ZBUF: 1668 if (bpf_zerocopy_enable) 1669 break; 1670 /* FALLSTHROUGH */ 1671 1672 default: 1673 CURVNET_RESTORE(); 1674 return (EINVAL); 1675 } 1676 1677 BPFD_LOCK(d); 1678 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1679 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1680 BPFD_UNLOCK(d); 1681 CURVNET_RESTORE(); 1682 return (EBUSY); 1683 } 1684 d->bd_bufmode = *(u_int *)addr; 1685 BPFD_UNLOCK(d); 1686 break; 1687 1688 case BIOCGETZMAX: 1689 error = bpf_ioctl_getzmax(td, d, (size_t *)addr); 1690 break; 1691 1692 case BIOCSETZBUF: 1693 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); 1694 break; 1695 1696 case BIOCROTZBUF: 1697 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); 1698 break; 1699 1700 case BIOCSETVLANPCP: 1701 { 1702 u_int pcp; 1703 1704 pcp = *(u_int *)addr; 1705 if (pcp > BPF_PRIO_MAX || pcp < 0) { 1706 error = EINVAL; 1707 break; 1708 } 1709 d->bd_pcp = pcp; 1710 break; 1711 } 1712 } 1713 CURVNET_RESTORE(); 1714 return (error); 1715 } 1716 1717 /* 1718 * Return list of available tapping points, or report how much space is 1719 * required for a successful return. 1720 */ 1721 static int 1722 bpf_getiflist(struct bpf_iflist *bi) 1723 { 1724 struct bpf_if *bp; 1725 u_int allsize, size, cnt; 1726 char *uaddr; 1727 1728 BPF_LOCK(); 1729 1730 cnt = allsize = size = 0; 1731 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1732 allsize += strlen(bp->bif_name) + 1; 1733 if (++cnt == bi->bi_count) 1734 size = allsize; 1735 } 1736 if (size == 0) 1737 size = allsize; 1738 1739 if (bi->bi_size == 0) { 1740 BPF_UNLOCK(); 1741 bi->bi_size = size; 1742 bi->bi_count = cnt; 1743 return (0); 1744 } else if (bi->bi_size < size) { 1745 BPF_UNLOCK(); 1746 return (ENOSPC); 1747 } 1748 1749 uaddr = bi->bi_ubuf; 1750 cnt = 0; 1751 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 1752 u_int len; 1753 int error; 1754 1755 len = strlen(bp->bif_name) + 1; 1756 if ((error = copyout(bp->bif_name, uaddr, len)) != 0) { 1757 BPF_UNLOCK(); 1758 return (error); 1759 } 1760 if (++cnt == bi->bi_count) 1761 break; 1762 uaddr += len; 1763 } 1764 BPF_UNLOCK(); 1765 bi->bi_count = cnt; 1766 1767 return (0); 1768 } 1769 1770 /* 1771 * Set d's packet filter program to fp. If this file already has a filter, 1772 * free it and replace it. Returns EINVAL for bogus requests. 1773 * 1774 * Note we use global lock here to serialize bpf_setf() and bpf_setif() 1775 * calls. 1776 */ 1777 static int 1778 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1779 { 1780 #ifdef COMPAT_FREEBSD32 1781 struct bpf_program fp_swab; 1782 struct bpf_program32 *fp32; 1783 #endif 1784 struct bpf_program_buffer *fcode; 1785 struct bpf_insn *filter; 1786 #ifdef BPF_JITTER 1787 bpf_jit_filter *jfunc; 1788 #endif 1789 size_t size; 1790 u_int flen; 1791 bool track_event; 1792 1793 #ifdef COMPAT_FREEBSD32 1794 switch (cmd) { 1795 case BIOCSETF32: 1796 case BIOCSETWF32: 1797 case BIOCSETFNR32: 1798 fp32 = (struct bpf_program32 *)fp; 1799 fp_swab.bf_len = fp32->bf_len; 1800 fp_swab.bf_insns = 1801 (struct bpf_insn *)(uintptr_t)fp32->bf_insns; 1802 fp = &fp_swab; 1803 switch (cmd) { 1804 case BIOCSETF32: 1805 cmd = BIOCSETF; 1806 break; 1807 case BIOCSETWF32: 1808 cmd = BIOCSETWF; 1809 break; 1810 } 1811 break; 1812 } 1813 #endif 1814 1815 filter = NULL; 1816 #ifdef BPF_JITTER 1817 jfunc = NULL; 1818 #endif 1819 /* 1820 * Check new filter validness before acquiring any locks. 1821 * Allocate memory for new filter, if needed. 1822 */ 1823 flen = fp->bf_len; 1824 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) 1825 return (EINVAL); 1826 size = flen * sizeof(*fp->bf_insns); 1827 if (size > 0) { 1828 /* We're setting up new filter. Copy and check actual data. */ 1829 fcode = bpf_program_buffer_alloc(size, M_WAITOK); 1830 filter = (struct bpf_insn *)fcode->buffer; 1831 if (copyin(fp->bf_insns, filter, size) != 0 || 1832 !bpf_validate(filter, flen)) { 1833 free(fcode, M_BPF); 1834 return (EINVAL); 1835 } 1836 #ifdef BPF_JITTER 1837 if (cmd != BIOCSETWF) { 1838 /* 1839 * Filter is copied inside fcode and is 1840 * perfectly valid. 1841 */ 1842 jfunc = bpf_jitter(filter, flen); 1843 } 1844 #endif 1845 } 1846 1847 track_event = false; 1848 fcode = NULL; 1849 1850 BPF_LOCK(); 1851 BPFD_LOCK(d); 1852 /* Set up new filter. */ 1853 if (cmd == BIOCSETWF) { 1854 if (d->bd_wfilter != NULL) { 1855 fcode = __containerof((void *)d->bd_wfilter, 1856 struct bpf_program_buffer, buffer); 1857 #ifdef BPF_JITTER 1858 fcode->func = NULL; 1859 #endif 1860 } 1861 d->bd_wfilter = filter; 1862 } else { 1863 if (d->bd_rfilter != NULL) { 1864 fcode = __containerof((void *)d->bd_rfilter, 1865 struct bpf_program_buffer, buffer); 1866 #ifdef BPF_JITTER 1867 fcode->func = d->bd_bfilter; 1868 #endif 1869 } 1870 d->bd_rfilter = filter; 1871 #ifdef BPF_JITTER 1872 d->bd_bfilter = jfunc; 1873 #endif 1874 if (cmd == BIOCSETF) 1875 reset_d(d); 1876 1877 if (bpf_check_upgrade(cmd, d, filter, flen) != 0) { 1878 /* 1879 * Filter can be set several times without 1880 * specifying interface. In this case just mark d 1881 * as reader. 1882 */ 1883 d->bd_writer = 0; 1884 if (d->bd_bif != NULL) { 1885 /* 1886 * Remove descriptor from writers-only list 1887 * and add it to active readers list. 1888 */ 1889 CK_LIST_REMOVE(d, bd_next); 1890 CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist, 1891 d, bd_next); 1892 CTR2(KTR_NET, 1893 "%s: upgrade required by pid %d", 1894 __func__, d->bd_pid); 1895 track_event = true; 1896 } 1897 } 1898 } 1899 BPFD_UNLOCK(d); 1900 1901 if (fcode != NULL) 1902 NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx); 1903 1904 if (track_event) 1905 bif_attachd(d->bd_bif); 1906 1907 BPF_UNLOCK(); 1908 return (0); 1909 } 1910 1911 /* 1912 * Attach descriptor to a tap point, possibly detaching from the old one, 1913 * reset the counters. 1914 * XXXGL: this KPI is subject to change 1915 */ 1916 static int 1917 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 1918 { 1919 bool writer; 1920 1921 BPF_LOCK_ASSERT(); 1922 1923 /* 1924 * At this point, we expect the buffer is already allocated. If not, 1925 * return an error. 1926 */ 1927 switch (d->bd_bufmode) { 1928 case BPF_BUFMODE_BUFFER: 1929 case BPF_BUFMODE_ZBUF: 1930 if (d->bd_sbuf == NULL) 1931 return (EINVAL); 1932 break; 1933 1934 default: 1935 panic("%s: bufmode %d", __func__, d->bd_bufmode); 1936 } 1937 1938 if (bp == d->bd_bif) { 1939 BPFD_LOCK(d); 1940 reset_d(d); 1941 BPFD_UNLOCK(d); 1942 return (0); 1943 } else if (d->bd_bif != NULL) 1944 bpf_detachd(d, false); 1945 1946 /* 1947 * Save sysctl value to protect from sysctl change between reads. 1948 */ 1949 writer = V_bpf_optimize_writers || (d->bd_writer > 0); 1950 1951 /* 1952 * Point d at bp, and add d to the interface's list. 1953 * Since there are many applications using BPF for 1954 * sending raw packets only (dhcpd, cdpd are good examples) 1955 * we can delay adding d to the list of active listeners until 1956 * some filter is configured. 1957 */ 1958 BPFD_LOCK(d); 1959 /* 1960 * Hold reference to bpif while descriptor uses this interface. 1961 */ 1962 bpfif_ref(bp); 1963 d->bd_bif = bp; 1964 if (writer) { 1965 /* Add to writers-only list */ 1966 CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); 1967 /* 1968 * We decrement bd_writer on every filter set operation. 1969 * First BIOCSETF is done by pcap_open_live() to set up 1970 * snap length. After that application usually sets its own 1971 * filter. 1972 */ 1973 d->bd_writer = 2; 1974 } else 1975 CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 1976 1977 reset_d(d); 1978 1979 /* Trigger EVFILT_WRITE events. */ 1980 bpf_wakeup(d); 1981 1982 BPFD_UNLOCK(d); 1983 1984 CTR3(KTR_NET, "%s: called by pid %d, adding to %s list", 1985 __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); 1986 1987 if (!writer) 1988 bif_attachd(bp); 1989 1990 return (0); 1991 } 1992 1993 /* 1994 * Support for select() and poll() system calls 1995 * 1996 * Return true iff the specific operation will not block indefinitely. 1997 * Otherwise, return false but make a note that a selwakeup() must be done. 1998 */ 1999 static int 2000 bpfpoll(struct cdev *dev, int events, struct thread *td) 2001 { 2002 struct bpf_d *d; 2003 int revents; 2004 2005 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) 2006 return (events & 2007 (POLLHUP | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)); 2008 2009 /* 2010 * Refresh PID associated with this descriptor. 2011 */ 2012 revents = events & (POLLOUT | POLLWRNORM); 2013 BPFD_LOCK(d); 2014 BPF_PID_REFRESH(d, td); 2015 if (events & (POLLIN | POLLRDNORM)) { 2016 if (bpf_ready(d)) 2017 revents |= events & (POLLIN | POLLRDNORM); 2018 else { 2019 selrecord(td, &d->bd_sel); 2020 /* Start the read timeout if necessary. */ 2021 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2022 callout_reset(&d->bd_callout, d->bd_rtout, 2023 bpf_timed_out, d); 2024 d->bd_state = BPF_WAITING; 2025 } 2026 } 2027 } 2028 BPFD_UNLOCK(d); 2029 return (revents); 2030 } 2031 2032 /* 2033 * Support for kevent() system call. Register EVFILT_READ filters and 2034 * reject all others. 2035 */ 2036 int 2037 bpfkqfilter(struct cdev *dev, struct knote *kn) 2038 { 2039 struct bpf_d *d; 2040 2041 if (devfs_get_cdevpriv((void **)&d) != 0) 2042 return (1); 2043 2044 switch (kn->kn_filter) { 2045 case EVFILT_READ: 2046 kn->kn_fop = &bpfread_filtops; 2047 break; 2048 2049 case EVFILT_WRITE: 2050 kn->kn_fop = &bpfwrite_filtops; 2051 break; 2052 2053 default: 2054 return (1); 2055 } 2056 2057 /* 2058 * Refresh PID associated with this descriptor. 2059 */ 2060 BPFD_LOCK(d); 2061 BPF_PID_REFRESH_CUR(d); 2062 kn->kn_hook = d; 2063 knlist_add(&d->bd_sel.si_note, kn, 1); 2064 BPFD_UNLOCK(d); 2065 2066 return (0); 2067 } 2068 2069 static void 2070 filt_bpfdetach(struct knote *kn) 2071 { 2072 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2073 2074 knlist_remove(&d->bd_sel.si_note, kn, 0); 2075 } 2076 2077 static int 2078 filt_bpfread(struct knote *kn, long hint) 2079 { 2080 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2081 int ready; 2082 2083 BPFD_LOCK_ASSERT(d); 2084 ready = bpf_ready(d); 2085 if (ready) { 2086 kn->kn_data = d->bd_slen; 2087 /* 2088 * Ignore the hold buffer if it is being copied to user space. 2089 */ 2090 if (!(d->bd_flags & BPFD_HBUF_INUSE) && d->bd_hbuf) 2091 kn->kn_data += d->bd_hlen; 2092 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2093 callout_reset(&d->bd_callout, d->bd_rtout, 2094 bpf_timed_out, d); 2095 d->bd_state = BPF_WAITING; 2096 } 2097 2098 return (ready); 2099 } 2100 2101 static int 2102 filt_bpfwrite(struct knote *kn, long hint) 2103 { 2104 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2105 2106 BPFD_LOCK_ASSERT(d); 2107 2108 if (d->bd_bif == NULL) { 2109 kn->kn_data = 0; 2110 return (0); 2111 } else { 2112 kn->kn_data = bif_wrsize(d->bd_bif); 2113 return (1); 2114 } 2115 } 2116 2117 #define BPF_TSTAMP_NONE 0 2118 #define BPF_TSTAMP_FAST 1 2119 #define BPF_TSTAMP_NORMAL 2 2120 #define BPF_TSTAMP_EXTERN 3 2121 2122 static int 2123 bpf_ts_quality(int tstype) 2124 { 2125 2126 if (tstype == BPF_T_NONE) 2127 return (BPF_TSTAMP_NONE); 2128 if ((tstype & BPF_T_FAST) != 0) 2129 return (BPF_TSTAMP_FAST); 2130 2131 return (BPF_TSTAMP_NORMAL); 2132 } 2133 2134 static int 2135 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) 2136 { 2137 struct timespec ts; 2138 struct m_tag *tag; 2139 int quality; 2140 2141 quality = bpf_ts_quality(tstype); 2142 if (quality == BPF_TSTAMP_NONE) 2143 return (quality); 2144 2145 if (m != NULL) { 2146 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) { 2147 mbuf_tstmp2timespec(m, &ts); 2148 timespec2bintime(&ts, bt); 2149 return (BPF_TSTAMP_EXTERN); 2150 } 2151 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); 2152 if (tag != NULL) { 2153 *bt = *(struct bintime *)(tag + 1); 2154 return (BPF_TSTAMP_EXTERN); 2155 } 2156 } 2157 if (quality == BPF_TSTAMP_NORMAL) 2158 binuptime(bt); 2159 else 2160 getbinuptime(bt); 2161 2162 return (quality); 2163 } 2164 2165 /* 2166 * Incoming linkage from device drivers. Process the packet pkt, of length 2167 * pktlen, which is stored in a contiguous buffer. The packet is parsed 2168 * by each process' filter, and if accepted, stashed into the corresponding 2169 * buffer. 2170 */ 2171 void 2172 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2173 { 2174 struct epoch_tracker et; 2175 struct bintime bt; 2176 struct bpf_d *d; 2177 #ifdef BPF_JITTER 2178 bpf_jit_filter *bf; 2179 #endif 2180 u_int slen; 2181 int gottime; 2182 2183 gottime = BPF_TSTAMP_NONE; 2184 NET_EPOCH_ENTER(et); 2185 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2186 counter_u64_add(d->bd_rcount, 1); 2187 /* 2188 * NB: We don't check the direction here since there 2189 * is no way for the caller to indiciate to us whether this 2190 * packet is inbound or outbound. In the bpf_mtap() routines, 2191 * we use the interface pointers on the mbuf to figure it out. 2192 */ 2193 #ifdef BPF_JITTER 2194 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2195 if (bf != NULL) 2196 slen = (*(bf->func))(pkt, pktlen, pktlen); 2197 else 2198 #endif 2199 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 2200 if (slen != 0) { 2201 /* 2202 * Filter matches. Let's to acquire write lock. 2203 */ 2204 BPFD_LOCK(d); 2205 counter_u64_add(d->bd_fcount, 1); 2206 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2207 gottime = bpf_gettime(&bt, d->bd_tstamp, 2208 NULL); 2209 #ifdef MAC 2210 if (bif_mac_check_receive(bp, d) == 0) 2211 #endif 2212 catchpacket(d, pkt, pktlen, slen, 2213 bpf_append_bytes, &bt); 2214 BPFD_UNLOCK(d); 2215 } 2216 } 2217 NET_EPOCH_EXIT(et); 2218 } 2219 2220 void 2221 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 2222 { 2223 if (bpf_peers_present(ifp->if_bpf)) 2224 bpf_tap(ifp->if_bpf, pkt, pktlen); 2225 } 2226 2227 /* 2228 * Incoming linkage from device drivers, when packet is in an mbuf chain. 2229 * Locking model is explained in bpf_tap(). 2230 */ 2231 void 2232 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2233 { 2234 struct epoch_tracker et; 2235 struct bintime bt; 2236 struct bpf_d *d; 2237 #ifdef BPF_JITTER 2238 bpf_jit_filter *bf; 2239 #endif 2240 u_int pktlen, slen; 2241 int gottime; 2242 2243 /* Skip outgoing duplicate packets. */ 2244 if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) { 2245 m->m_flags &= ~M_PROMISC; 2246 return; 2247 } 2248 2249 pktlen = m_length(m, NULL); 2250 gottime = BPF_TSTAMP_NONE; 2251 2252 NET_EPOCH_ENTER(et); 2253 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2254 if (bpf_chkdir(d, m)) 2255 continue; 2256 counter_u64_add(d->bd_rcount, 1); 2257 #ifdef BPF_JITTER 2258 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2259 /* XXX We cannot handle multiple mbufs. */ 2260 if (bf != NULL && m->m_next == NULL) 2261 slen = (*(bf->func))(mtod(m, u_char *), pktlen, 2262 pktlen); 2263 else 2264 #endif 2265 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 2266 if (slen != 0) { 2267 BPFD_LOCK(d); 2268 2269 counter_u64_add(d->bd_fcount, 1); 2270 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2271 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2272 #ifdef MAC 2273 if (bif_mac_check_receive(bp, d) == 0) 2274 #endif 2275 catchpacket(d, (u_char *)m, pktlen, slen, 2276 bpf_append_mbuf, &bt); 2277 BPFD_UNLOCK(d); 2278 } 2279 } 2280 NET_EPOCH_EXIT(et); 2281 } 2282 2283 void 2284 bpf_mtap_if(if_t ifp, struct mbuf *m) 2285 { 2286 if (bpf_peers_present(ifp->if_bpf)) { 2287 M_ASSERTVALID(m); 2288 bpf_mtap(ifp->if_bpf, m); 2289 } 2290 } 2291 2292 /* 2293 * Incoming linkage from device drivers, when packet is in 2294 * an mbuf chain and to be prepended by a contiguous header. 2295 */ 2296 void 2297 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 2298 { 2299 struct epoch_tracker et; 2300 struct bintime bt; 2301 struct mbuf mb; 2302 struct bpf_d *d; 2303 u_int pktlen, slen; 2304 int gottime; 2305 2306 /* Skip outgoing duplicate packets. */ 2307 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2308 m->m_flags &= ~M_PROMISC; 2309 return; 2310 } 2311 2312 pktlen = m_length(m, NULL); 2313 /* 2314 * Craft on-stack mbuf suitable for passing to bpf_filter. 2315 * Note that we cut corners here; we only setup what's 2316 * absolutely needed--this mbuf should never go anywhere else. 2317 */ 2318 mb.m_flags = 0; 2319 mb.m_next = m; 2320 mb.m_data = data; 2321 mb.m_len = dlen; 2322 pktlen += dlen; 2323 2324 gottime = BPF_TSTAMP_NONE; 2325 2326 NET_EPOCH_ENTER(et); 2327 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2328 if (bpf_chkdir(d, m)) 2329 continue; 2330 counter_u64_add(d->bd_rcount, 1); 2331 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 2332 if (slen != 0) { 2333 BPFD_LOCK(d); 2334 2335 counter_u64_add(d->bd_fcount, 1); 2336 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2337 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2338 #ifdef MAC 2339 if (bif_mac_check_receive(bp, d) == 0) 2340 #endif 2341 catchpacket(d, (u_char *)&mb, pktlen, slen, 2342 bpf_append_mbuf, &bt); 2343 BPFD_UNLOCK(d); 2344 } 2345 } 2346 NET_EPOCH_EXIT(et); 2347 } 2348 2349 void 2350 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 2351 { 2352 if (bpf_peers_present(ifp->if_bpf)) { 2353 M_ASSERTVALID(m); 2354 bpf_mtap2(ifp->if_bpf, data, dlen, m); 2355 } 2356 } 2357 2358 #undef BPF_TSTAMP_NONE 2359 #undef BPF_TSTAMP_FAST 2360 #undef BPF_TSTAMP_NORMAL 2361 #undef BPF_TSTAMP_EXTERN 2362 2363 static int 2364 bpf_hdrlen(struct bpf_d *d) 2365 { 2366 int hdrlen; 2367 2368 hdrlen = d->bd_bif->bif_hdrlen; 2369 #ifndef BURN_BRIDGES 2370 if (d->bd_tstamp == BPF_T_NONE || 2371 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) 2372 #ifdef COMPAT_FREEBSD32 2373 if (d->bd_compat32) 2374 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); 2375 else 2376 #endif 2377 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); 2378 else 2379 #endif 2380 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); 2381 #ifdef COMPAT_FREEBSD32 2382 if (d->bd_compat32) 2383 hdrlen = BPF_WORDALIGN32(hdrlen); 2384 else 2385 #endif 2386 hdrlen = BPF_WORDALIGN(hdrlen); 2387 2388 return (hdrlen - d->bd_bif->bif_hdrlen); 2389 } 2390 2391 static void 2392 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) 2393 { 2394 struct bintime bt2, boottimebin; 2395 struct timeval tsm; 2396 struct timespec tsn; 2397 2398 if ((tstype & BPF_T_MONOTONIC) == 0) { 2399 bt2 = *bt; 2400 getboottimebin(&boottimebin); 2401 bintime_add(&bt2, &boottimebin); 2402 bt = &bt2; 2403 } 2404 switch (BPF_T_FORMAT(tstype)) { 2405 case BPF_T_MICROTIME: 2406 bintime2timeval(bt, &tsm); 2407 ts->bt_sec = tsm.tv_sec; 2408 ts->bt_frac = tsm.tv_usec; 2409 break; 2410 case BPF_T_NANOTIME: 2411 bintime2timespec(bt, &tsn); 2412 ts->bt_sec = tsn.tv_sec; 2413 ts->bt_frac = tsn.tv_nsec; 2414 break; 2415 case BPF_T_BINTIME: 2416 ts->bt_sec = bt->sec; 2417 ts->bt_frac = bt->frac; 2418 break; 2419 } 2420 } 2421 2422 /* 2423 * Move the packet data from interface memory (pkt) into the 2424 * store buffer. "cpfn" is the routine called to do the actual data 2425 * transfer. bcopy is passed in to copy contiguous chunks, while 2426 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 2427 * pkt is really an mbuf. 2428 */ 2429 static void 2430 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 2431 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 2432 struct bintime *bt) 2433 { 2434 static char zeroes[BPF_ALIGNMENT]; 2435 struct bpf_xhdr hdr; 2436 #ifndef BURN_BRIDGES 2437 struct bpf_hdr hdr_old; 2438 #ifdef COMPAT_FREEBSD32 2439 struct bpf_hdr32 hdr32_old; 2440 #endif 2441 #endif 2442 int caplen, curlen, hdrlen, pad, totlen; 2443 int do_wakeup = 0; 2444 int do_timestamp; 2445 int tstype; 2446 2447 BPFD_LOCK_ASSERT(d); 2448 if (d->bd_bif == NULL) { 2449 /* Descriptor was detached in concurrent thread */ 2450 counter_u64_add(d->bd_dcount, 1); 2451 return; 2452 } 2453 2454 /* 2455 * Detect whether user space has released a buffer back to us, and if 2456 * so, move it from being a hold buffer to a free buffer. This may 2457 * not be the best place to do it (for example, we might only want to 2458 * run this check if we need the space), but for now it's a reliable 2459 * spot to do it. 2460 */ 2461 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 2462 d->bd_fbuf = d->bd_hbuf; 2463 d->bd_hbuf = NULL; 2464 d->bd_hlen = 0; 2465 bpf_buf_reclaimed(d); 2466 } 2467 2468 /* 2469 * Figure out how many bytes to move. If the packet is 2470 * greater or equal to the snapshot length, transfer that 2471 * much. Otherwise, transfer the whole packet (unless 2472 * we hit the buffer size limit). 2473 */ 2474 hdrlen = bpf_hdrlen(d); 2475 totlen = hdrlen + min(snaplen, pktlen); 2476 if (totlen > d->bd_bufsize) 2477 totlen = d->bd_bufsize; 2478 2479 /* 2480 * Round up the end of the previous packet to the next longword. 2481 * 2482 * Drop the packet if there's no room and no hope of room 2483 * If the packet would overflow the storage buffer or the storage 2484 * buffer is considered immutable by the buffer model, try to rotate 2485 * the buffer and wakeup pending processes. 2486 */ 2487 #ifdef COMPAT_FREEBSD32 2488 if (d->bd_compat32) 2489 curlen = BPF_WORDALIGN32(d->bd_slen); 2490 else 2491 #endif 2492 curlen = BPF_WORDALIGN(d->bd_slen); 2493 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 2494 if (d->bd_fbuf == NULL) { 2495 /* 2496 * There's no room in the store buffer, and no 2497 * prospect of room, so drop the packet. Notify the 2498 * buffer model. 2499 */ 2500 bpf_buffull(d); 2501 counter_u64_add(d->bd_dcount, 1); 2502 return; 2503 } 2504 KASSERT(!(d->bd_flags & BPFD_HBUF_INUSE), 2505 ("hold buffer is in use")); 2506 ROTATE_BUFFERS(d); 2507 do_wakeup = 1; 2508 curlen = 0; 2509 } else { 2510 if ((d->bd_flags & BPFD_IMMEDIATE) || 2511 d->bd_state == BPF_TIMED_OUT) { 2512 /* 2513 * Immediate mode is set, or the read timeout has 2514 * already expired during a select call. A packet 2515 * arrived, so the reader should be woken up. 2516 */ 2517 do_wakeup = 1; 2518 } 2519 pad = curlen - d->bd_slen; 2520 KASSERT(pad >= 0 && pad <= sizeof(zeroes), 2521 ("%s: invalid pad byte count %d", __func__, pad)); 2522 if (pad > 0) { 2523 /* Zero pad bytes. */ 2524 bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes, 2525 pad); 2526 } 2527 } 2528 2529 caplen = totlen - hdrlen; 2530 tstype = d->bd_tstamp; 2531 do_timestamp = tstype != BPF_T_NONE; 2532 #ifndef BURN_BRIDGES 2533 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { 2534 struct bpf_ts ts; 2535 if (do_timestamp) 2536 bpf_bintime2ts(bt, &ts, tstype); 2537 #ifdef COMPAT_FREEBSD32 2538 if (d->bd_compat32) { 2539 bzero(&hdr32_old, sizeof(hdr32_old)); 2540 if (do_timestamp) { 2541 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; 2542 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; 2543 } 2544 hdr32_old.bh_datalen = pktlen; 2545 hdr32_old.bh_hdrlen = hdrlen; 2546 hdr32_old.bh_caplen = caplen; 2547 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, 2548 sizeof(hdr32_old)); 2549 goto copy; 2550 } 2551 #endif 2552 bzero(&hdr_old, sizeof(hdr_old)); 2553 if (do_timestamp) { 2554 hdr_old.bh_tstamp.tv_sec = ts.bt_sec; 2555 hdr_old.bh_tstamp.tv_usec = ts.bt_frac; 2556 } 2557 hdr_old.bh_datalen = pktlen; 2558 hdr_old.bh_hdrlen = hdrlen; 2559 hdr_old.bh_caplen = caplen; 2560 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, 2561 sizeof(hdr_old)); 2562 goto copy; 2563 } 2564 #endif 2565 2566 /* 2567 * Append the bpf header. Note we append the actual header size, but 2568 * move forward the length of the header plus padding. 2569 */ 2570 bzero(&hdr, sizeof(hdr)); 2571 if (do_timestamp) 2572 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); 2573 hdr.bh_datalen = pktlen; 2574 hdr.bh_hdrlen = hdrlen; 2575 hdr.bh_caplen = caplen; 2576 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 2577 2578 /* 2579 * Copy the packet data into the store buffer and update its length. 2580 */ 2581 #ifndef BURN_BRIDGES 2582 copy: 2583 #endif 2584 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); 2585 d->bd_slen = curlen + totlen; 2586 2587 if (do_wakeup) 2588 bpf_wakeup(d); 2589 } 2590 2591 /* 2592 * Free buffers currently in use by a descriptor. 2593 * Called on close. 2594 */ 2595 static void 2596 bpfd_free(epoch_context_t ctx) 2597 { 2598 struct bpf_d *d; 2599 struct bpf_program_buffer *p; 2600 2601 /* 2602 * We don't need to lock out interrupts since this descriptor has 2603 * been detached from its interface and it yet hasn't been marked 2604 * free. 2605 */ 2606 d = __containerof(ctx, struct bpf_d, epoch_ctx); 2607 bpf_free(d); 2608 if (d->bd_rfilter != NULL) { 2609 p = __containerof((void *)d->bd_rfilter, 2610 struct bpf_program_buffer, buffer); 2611 #ifdef BPF_JITTER 2612 p->func = d->bd_bfilter; 2613 #endif 2614 bpf_program_buffer_free(&p->epoch_ctx); 2615 } 2616 if (d->bd_wfilter != NULL) { 2617 p = __containerof((void *)d->bd_wfilter, 2618 struct bpf_program_buffer, buffer); 2619 #ifdef BPF_JITTER 2620 p->func = NULL; 2621 #endif 2622 bpf_program_buffer_free(&p->epoch_ctx); 2623 } 2624 2625 mtx_destroy(&d->bd_lock); 2626 counter_u64_free(d->bd_rcount); 2627 counter_u64_free(d->bd_dcount); 2628 counter_u64_free(d->bd_fcount); 2629 counter_u64_free(d->bd_wcount); 2630 counter_u64_free(d->bd_wfcount); 2631 counter_u64_free(d->bd_wdcount); 2632 counter_u64_free(d->bd_zcopy); 2633 free(d, M_BPF); 2634 } 2635 2636 /* 2637 * Attach a tap point to bpf. 2638 * XXX: with current KPI it is consumer's responsibility to avoid duplicates. 2639 */ 2640 struct bpf_if * 2641 bpf_attach(const char *name, u_int dlt, u_int hdrlen, 2642 const struct bif_methods *methods, void *sc) 2643 { 2644 struct bpf_if *bp; 2645 2646 bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO); 2647 2648 CK_LIST_INIT(&bp->bif_dlist); 2649 CK_LIST_INIT(&bp->bif_wlist); 2650 bp->bif_dlt = dlt; 2651 bp->bif_hdrlen = hdrlen; 2652 bp->bif_softc = sc; 2653 bp->bif_name = name; 2654 bp->bif_methods = methods; 2655 refcount_init(&bp->bif_refcnt, 1); 2656 BPF_LOCK(); 2657 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 2658 BPF_UNLOCK(); 2659 2660 return (bp); 2661 } 2662 2663 #ifdef VIMAGE 2664 /* 2665 * Detach descriptors on interface's vmove event. 2666 * XXXGL: shouldn't be a special case, but a full detach. 2667 */ 2668 void 2669 bpf_ifdetach(struct ifnet *ifp) 2670 { 2671 struct bpf_if *bp; 2672 struct bpf_d *d; 2673 2674 BPF_LOCK(); 2675 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2676 /* XXXGL: assuming softc is ifnet here */ 2677 if (bp->bif_softc != ifp) 2678 continue; 2679 2680 /* Detach common descriptors */ 2681 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { 2682 bpf_detachd(d, true); 2683 } 2684 2685 /* Detach writer-only descriptors */ 2686 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { 2687 bpf_detachd(d, true); 2688 } 2689 } 2690 BPF_UNLOCK(); 2691 } 2692 #endif 2693 2694 /* 2695 * Detach bpf tap point. This involves detaching each descriptor associated 2696 * with the interface. Notify each descriptor as it's detached so that any 2697 * sleepers wake up and get ENXIO. 2698 */ 2699 void 2700 bpf_detach(struct bpf_if *bp) 2701 { 2702 struct bpf_d *d; 2703 2704 BPF_LOCK(); 2705 LIST_REMOVE(bp, bif_next); 2706 2707 CTR3(KTR_NET, "%s: sheduling free for encap %d for bp %p", 2708 __func__, bp->bif_dlt, bp); 2709 2710 /* Detach common descriptors */ 2711 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { 2712 bpf_detachd(d, true); 2713 } 2714 2715 /* Detach writer-only descriptors */ 2716 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { 2717 bpf_detachd(d, true); 2718 } 2719 bpfif_rele(bp); 2720 BPF_UNLOCK(); 2721 } 2722 2723 bool 2724 bpf_peers_present_if(struct ifnet *ifp) 2725 { 2726 return (bpf_peers_present(ifp->if_bpf)); 2727 } 2728 2729 /* 2730 * Get a list of available data link type of the tap point. If a tap point 2731 * attaches more than one time, it is supposed to attach with different DLTs 2732 * but with the same name pointer. 2733 */ 2734 static int 2735 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2736 { 2737 const char *name; 2738 struct bpf_if *bp; 2739 u_int *lst; 2740 int error, n, n1; 2741 2742 BPF_LOCK_ASSERT(); 2743 2744 name = d->bd_bif->bif_name; 2745 n1 = 0; 2746 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2747 if (bp->bif_name == name) 2748 n1++; 2749 } 2750 if (bfl->bfl_list == NULL) { 2751 bfl->bfl_len = n1; 2752 return (0); 2753 } 2754 if (n1 > bfl->bfl_len) 2755 return (ENOMEM); 2756 2757 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); 2758 n = 0; 2759 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2760 if (bp->bif_name != name) 2761 continue; 2762 lst[n++] = bp->bif_dlt; 2763 } 2764 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); 2765 free(lst, M_TEMP); 2766 bfl->bfl_len = n; 2767 return (error); 2768 } 2769 2770 /* 2771 * Set the data link type of a BPF descriptor. The convention is that 2772 * application first do BIOCSETIF and then BIOCSETDLT, thus the descriptor 2773 * is supposed to be already attached. Only one kernel facility provides 2774 * tapping points with same name but different DLT - ieee80211_radiotap. 2775 * 2776 * XXXGL: this function definitely looks suspicious, e.g. it clearly doesn't 2777 * clear promisc on the old bpf_if. The convention about reference counting 2778 * is also unclear. 2779 */ 2780 static int 2781 bpf_setdlt(struct bpf_d *d, u_int dlt) 2782 { 2783 int error, opromisc; 2784 const char *name; 2785 struct bpf_if *bp; 2786 2787 BPF_LOCK_ASSERT(); 2788 MPASS(d->bd_bif != NULL); 2789 2790 /* 2791 * It is safe to check bd_bif without BPFD_LOCK, it can not be 2792 * changed while we hold global lock. 2793 */ 2794 if (d->bd_bif->bif_dlt == dlt) 2795 return (0); 2796 2797 name = d->bd_bif->bif_name; 2798 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2799 if (bp->bif_name == name && bp->bif_dlt == dlt) 2800 break; 2801 } 2802 if (bp == NULL) 2803 return (EINVAL); 2804 2805 opromisc = d->bd_promisc; 2806 bpf_detachd(d, false); 2807 bpf_attachd(d, bp); 2808 if (opromisc) { 2809 error = bp->bif_methods->bif_promisc(bp->bif_softc, true); 2810 if (error) 2811 printf("%s: bif_promisc on %s failed (%d)\n", 2812 __func__, bp->bif_name, error); 2813 else 2814 d->bd_promisc = 1; 2815 } 2816 return (0); 2817 } 2818 2819 static void 2820 bpf_drvinit(void *unused) 2821 { 2822 struct cdev *dev; 2823 2824 sx_init(&bpf_sx, "bpf global lock"); 2825 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); 2826 /* For compatibility */ 2827 make_dev_alias(dev, "bpf0"); 2828 } 2829 2830 /* 2831 * Zero out the various packet counters associated with all of the bpf 2832 * descriptors. At some point, we will probably want to get a bit more 2833 * granular and allow the user to specify descriptors to be zeroed. 2834 */ 2835 static void 2836 bpf_zero_counters(void) 2837 { 2838 struct bpf_if *bp; 2839 struct bpf_d *bd; 2840 2841 BPF_LOCK(); 2842 /* 2843 * We are protected by global lock here, interfaces and 2844 * descriptors can not be deleted while we hold it. 2845 */ 2846 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2847 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2848 counter_u64_zero(bd->bd_rcount); 2849 counter_u64_zero(bd->bd_dcount); 2850 counter_u64_zero(bd->bd_fcount); 2851 counter_u64_zero(bd->bd_wcount); 2852 counter_u64_zero(bd->bd_wfcount); 2853 counter_u64_zero(bd->bd_zcopy); 2854 } 2855 } 2856 BPF_UNLOCK(); 2857 } 2858 2859 /* 2860 * Fill filter statistics 2861 */ 2862 static void 2863 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 2864 { 2865 2866 BPF_LOCK_ASSERT(); 2867 bzero(d, sizeof(*d)); 2868 d->bd_structsize = sizeof(*d); 2869 d->bd_immediate = bd->bd_flags & BPFD_IMMEDIATE ? 1 : 0; 2870 d->bd_promisc = bd->bd_promisc; 2871 d->bd_hdrcmplt = bd->bd_flags & BPFD_HDRCMPLT ? 1 : 0; 2872 d->bd_direction = bd->bd_direction; 2873 d->bd_feedback = bd->bd_flags & BPFD_FEEDBACK ? 1 : 0; 2874 d->bd_async = bd->bd_flags & BPFD_ASYNC ? 1 : 0; 2875 d->bd_rcount = counter_u64_fetch(bd->bd_rcount); 2876 d->bd_dcount = counter_u64_fetch(bd->bd_dcount); 2877 d->bd_fcount = counter_u64_fetch(bd->bd_fcount); 2878 d->bd_sig = bd->bd_sig; 2879 d->bd_slen = bd->bd_slen; 2880 d->bd_hlen = bd->bd_hlen; 2881 d->bd_bufsize = bd->bd_bufsize; 2882 d->bd_pid = bd->bd_pid; 2883 strlcpy(d->bd_ifname, bd->bd_bif->bif_name, sizeof(d->bd_ifname)); 2884 d->bd_locked = bd->bd_flags & BPFD_LOCKED ? 1 : 0; 2885 d->bd_wcount = counter_u64_fetch(bd->bd_wcount); 2886 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); 2887 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); 2888 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); 2889 d->bd_bufmode = bd->bd_bufmode; 2890 } 2891 2892 /* 2893 * Handle `netstat -B' stats request 2894 */ 2895 static int 2896 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2897 { 2898 static const struct xbpf_d zerostats; 2899 struct xbpf_d *xbdbuf, *xbd, tempstats; 2900 u_int bpfd_cnt, index; 2901 int error; 2902 struct bpf_if *bp; 2903 struct bpf_d *bd; 2904 2905 /* 2906 * XXX This is not technically correct. It is possible for non 2907 * privileged users to open bpf devices. It would make sense 2908 * if the users who opened the devices were able to retrieve 2909 * the statistics for them, too. 2910 */ 2911 error = priv_check(req->td, PRIV_NET_BPF); 2912 if (error) 2913 return (error); 2914 /* 2915 * Check to see if the user is requesting that the counters be 2916 * zeroed out. Explicitly check that the supplied data is zeroed, 2917 * as we aren't allowing the user to set the counters currently. 2918 */ 2919 if (req->newptr != NULL) { 2920 if (req->newlen != sizeof(tempstats)) 2921 return (EINVAL); 2922 memset(&tempstats, 0, sizeof(tempstats)); 2923 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); 2924 if (error) 2925 return (error); 2926 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) 2927 return (EINVAL); 2928 bpf_zero_counters(); 2929 return (0); 2930 } 2931 bpfd_cnt = 0; 2932 BPF_LOCK(); 2933 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2934 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) 2935 bpfd_cnt++; 2936 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) 2937 bpfd_cnt++; 2938 } 2939 if (bpfd_cnt == 0 || req->oldptr == NULL) { 2940 BPF_UNLOCK(); 2941 return (SYSCTL_OUT(req, 0, bpfd_cnt * sizeof(*xbd))); 2942 } 2943 if (req->oldlen < bpfd_cnt * sizeof(*xbd)) { 2944 BPF_UNLOCK(); 2945 return (ENOMEM); 2946 } 2947 xbdbuf = malloc(bpfd_cnt * sizeof(*xbd), M_BPF, M_WAITOK); 2948 index = 0; 2949 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2950 /* Send writers-only first */ 2951 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { 2952 MPASS(index <= bpfd_cnt); 2953 xbd = &xbdbuf[index++]; 2954 bpfstats_fill_xbpf(xbd, bd); 2955 } 2956 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2957 MPASS(index <= bpfd_cnt); 2958 xbd = &xbdbuf[index++]; 2959 bpfstats_fill_xbpf(xbd, bd); 2960 } 2961 } 2962 BPF_UNLOCK(); 2963 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2964 free(xbdbuf, M_BPF); 2965 return (error); 2966 } 2967 2968 SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, bpf_drvinit, NULL); 2969 2970 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2971 2972 /* 2973 * NOP stubs to allow bpf-using drivers to load and function. 2974 * 2975 * A 'better' implementation would allow the core bpf functionality 2976 * to be loaded at runtime. 2977 */ 2978 2979 void 2980 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2981 { 2982 } 2983 2984 void 2985 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 2986 { 2987 } 2988 2989 void 2990 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2991 { 2992 } 2993 2994 void 2995 bpf_mtap_if(if_t ifp, struct mbuf *m) 2996 { 2997 } 2998 2999 void 3000 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 3001 { 3002 } 3003 3004 void 3005 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 3006 { 3007 } 3008 3009 void 3010 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 3011 { 3012 static const struct bpfd_list dead_bpf_if = CK_LIST_HEAD_INITIALIZER(); 3013 3014 ifp->if_bpf = __DECONST(struct bpf_if *, &dead_bpf_if); 3015 } 3016 3017 void 3018 bpfdetach(struct ifnet *ifp) 3019 { 3020 } 3021 3022 bool 3023 bpf_peers_present_if(struct ifnet *ifp) 3024 { 3025 return (false); 3026 } 3027 3028 u_int 3029 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 3030 { 3031 return (-1); /* "no filter" behaviour */ 3032 } 3033 3034 int 3035 bpf_validate(const struct bpf_insn *f, int len) 3036 { 3037 return (0); /* false */ 3038 } 3039 3040 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 3041