1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org> 7 * 8 * This code is derived from the Stanford/CMU enet packet filter, 9 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 10 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 11 * Berkeley Laboratory. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 38 */ 39 40 #include <sys/cdefs.h> 41 #include "opt_bpf.h" 42 #include "opt_ddb.h" 43 #include "opt_netgraph.h" 44 45 #include <sys/param.h> 46 #include <sys/conf.h> 47 #include <sys/eventhandler.h> 48 #include <sys/fcntl.h> 49 #include <sys/jail.h> 50 #include <sys/ktr.h> 51 #include <sys/lock.h> 52 #include <sys/malloc.h> 53 #include <sys/mbuf.h> 54 #include <sys/mutex.h> 55 #include <sys/time.h> 56 #include <sys/priv.h> 57 #include <sys/proc.h> 58 #include <sys/signalvar.h> 59 #include <sys/filio.h> 60 #include <sys/sockio.h> 61 #include <sys/ttycom.h> 62 #include <sys/uio.h> 63 #include <sys/sysent.h> 64 #include <sys/systm.h> 65 66 #include <sys/event.h> 67 #include <sys/file.h> 68 #include <sys/poll.h> 69 #include <sys/proc.h> 70 71 #include <sys/socket.h> 72 73 #ifdef DDB 74 #include <ddb/ddb.h> 75 #endif 76 77 #include <net/if.h> 78 #include <net/if_var.h> 79 #include <net/if_private.h> 80 #include <net/if_vlan_var.h> 81 #include <net/if_dl.h> 82 #include <net/bpf.h> 83 #include <net/bpf_buffer.h> 84 #ifdef BPF_JITTER 85 #include <net/bpf_jitter.h> 86 #endif 87 #include <net/bpf_zerocopy.h> 88 #include <net/bpfdesc.h> 89 #include <net/route.h> 90 #include <net/vnet.h> 91 92 #include <netinet/in.h> 93 #include <netinet/if_ether.h> 94 #include <sys/kernel.h> 95 #include <sys/sysctl.h> 96 97 #include <net80211/ieee80211_freebsd.h> 98 99 #include <security/mac/mac_framework.h> 100 101 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 102 103 static struct bpf_if_ext dead_bpf_if = { 104 .bif_dlist = CK_LIST_HEAD_INITIALIZER() 105 }; 106 107 struct bpf_if { 108 #define bif_next bif_ext.bif_next 109 #define bif_dlist bif_ext.bif_dlist 110 struct bpf_if_ext bif_ext; /* public members */ 111 u_int bif_dlt; /* link layer type */ 112 u_int bif_hdrlen; /* length of link header */ 113 struct bpfd_list bif_wlist; /* writer-only list */ 114 struct ifnet *bif_ifp; /* corresponding interface */ 115 struct bpf_if **bif_bpf; /* Pointer to pointer to us */ 116 volatile u_int bif_refcnt; 117 struct epoch_context epoch_ctx; 118 }; 119 120 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); 121 122 struct bpf_program_buffer { 123 struct epoch_context epoch_ctx; 124 #ifdef BPF_JITTER 125 bpf_jit_filter *func; 126 #endif 127 void *buffer[0]; 128 }; 129 130 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 131 132 #define PRINET 26 /* interruptible */ 133 #define BPF_PRIO_MAX 7 134 135 #define SIZEOF_BPF_HDR(type) \ 136 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) 137 138 #ifdef COMPAT_FREEBSD32 139 #include <sys/mount.h> 140 #include <compat/freebsd32/freebsd32.h> 141 #define BPF_ALIGNMENT32 sizeof(int32_t) 142 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) 143 144 #ifndef BURN_BRIDGES 145 /* 146 * 32-bit version of structure prepended to each packet. We use this header 147 * instead of the standard one for 32-bit streams. We mark the a stream as 148 * 32-bit the first time we see a 32-bit compat ioctl request. 149 */ 150 struct bpf_hdr32 { 151 struct timeval32 bh_tstamp; /* time stamp */ 152 uint32_t bh_caplen; /* length of captured portion */ 153 uint32_t bh_datalen; /* original length of packet */ 154 uint16_t bh_hdrlen; /* length of bpf header (this struct 155 plus alignment padding) */ 156 }; 157 #endif 158 159 struct bpf_program32 { 160 u_int bf_len; 161 uint32_t bf_insns; 162 }; 163 164 struct bpf_dltlist32 { 165 u_int bfl_len; 166 u_int bfl_list; 167 }; 168 169 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) 170 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) 171 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) 172 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) 173 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) 174 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) 175 #endif 176 177 #define BPF_LOCK() sx_xlock(&bpf_sx) 178 #define BPF_UNLOCK() sx_xunlock(&bpf_sx) 179 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) 180 /* 181 * bpf_iflist is a list of BPF interface structures, each corresponding to a 182 * specific DLT. The same network interface might have several BPF interface 183 * structures registered by different layers in the stack (i.e., 802.11 184 * frames, ethernet frames, etc). 185 */ 186 CK_LIST_HEAD(bpf_iflist, bpf_if); 187 static struct bpf_iflist bpf_iflist; 188 static struct sx bpf_sx; /* bpf global lock */ 189 static int bpf_bpfd_cnt; 190 191 static void bpfif_ref(struct bpf_if *); 192 static void bpfif_rele(struct bpf_if *); 193 194 static void bpfd_ref(struct bpf_d *); 195 static void bpfd_rele(struct bpf_d *); 196 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 197 static void bpf_detachd(struct bpf_d *); 198 static void bpf_detachd_locked(struct bpf_d *, bool); 199 static void bpfd_free(epoch_context_t); 200 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 201 struct sockaddr *, int *, struct bpf_d *); 202 static int bpf_setif(struct bpf_d *, struct ifreq *); 203 static void bpf_timed_out(void *); 204 static __inline void 205 bpf_wakeup(struct bpf_d *); 206 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 207 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 208 struct bintime *); 209 static void reset_d(struct bpf_d *); 210 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 211 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 212 static int bpf_setdlt(struct bpf_d *, u_int); 213 static void filt_bpfdetach(struct knote *); 214 static int filt_bpfread(struct knote *, long); 215 static int filt_bpfwrite(struct knote *, long); 216 static void bpf_drvinit(void *); 217 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 218 219 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 220 "bpf sysctl"); 221 int bpf_maxinsns = BPF_MAXINSNS; 222 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 223 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 224 static int bpf_zerocopy_enable = 0; 225 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 226 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 227 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, 228 bpf_stats_sysctl, "bpf statistics portal"); 229 230 VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; 231 #define V_bpf_optimize_writers VNET(bpf_optimize_writers) 232 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RWTUN, 233 &VNET_NAME(bpf_optimize_writers), 0, 234 "Do not send packets until BPF program is set"); 235 236 static d_open_t bpfopen; 237 static d_read_t bpfread; 238 static d_write_t bpfwrite; 239 static d_ioctl_t bpfioctl; 240 static d_poll_t bpfpoll; 241 static d_kqfilter_t bpfkqfilter; 242 243 static struct cdevsw bpf_cdevsw = { 244 .d_version = D_VERSION, 245 .d_open = bpfopen, 246 .d_read = bpfread, 247 .d_write = bpfwrite, 248 .d_ioctl = bpfioctl, 249 .d_poll = bpfpoll, 250 .d_name = "bpf", 251 .d_kqfilter = bpfkqfilter, 252 }; 253 254 static struct filterops bpfread_filtops = { 255 .f_isfd = 1, 256 .f_detach = filt_bpfdetach, 257 .f_event = filt_bpfread, 258 }; 259 260 static struct filterops bpfwrite_filtops = { 261 .f_isfd = 1, 262 .f_detach = filt_bpfdetach, 263 .f_event = filt_bpfwrite, 264 }; 265 266 /* 267 * LOCKING MODEL USED BY BPF 268 * 269 * Locks: 270 * 1) global lock (BPF_LOCK). Sx, used to protect some global counters, 271 * every bpf_iflist changes, serializes ioctl access to bpf descriptors. 272 * 2) Descriptor lock. Mutex, used to protect BPF buffers and various 273 * structure fields used by bpf_*tap* code. 274 * 275 * Lock order: global lock, then descriptor lock. 276 * 277 * There are several possible consumers: 278 * 279 * 1. The kernel registers interface pointer with bpfattach(). 280 * Each call allocates new bpf_if structure, references ifnet pointer 281 * and links bpf_if into bpf_iflist chain. This is protected with global 282 * lock. 283 * 284 * 2. An userland application uses ioctl() call to bpf_d descriptor. 285 * All such call are serialized with global lock. BPF filters can be 286 * changed, but pointer to old filter will be freed using NET_EPOCH_CALL(). 287 * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to 288 * filter pointers, even if change will happen during bpf_tap execution. 289 * Destroying of bpf_d descriptor also is doing using NET_EPOCH_CALL(). 290 * 291 * 3. An userland application can write packets into bpf_d descriptor. 292 * There we need to be sure, that ifnet won't disappear during bpfwrite(). 293 * 294 * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to 295 * bif_dlist is protected with net_epoch_preempt section. So, it should 296 * be safe to make access to bpf_d descriptor inside the section. 297 * 298 * 5. The kernel invokes bpfdetach() on interface destroying. All lists 299 * are modified with global lock held and actual free() is done using 300 * NET_EPOCH_CALL(). 301 */ 302 303 static void 304 bpfif_free(epoch_context_t ctx) 305 { 306 struct bpf_if *bp; 307 308 bp = __containerof(ctx, struct bpf_if, epoch_ctx); 309 if_rele(bp->bif_ifp); 310 free(bp, M_BPF); 311 } 312 313 static void 314 bpfif_ref(struct bpf_if *bp) 315 { 316 317 refcount_acquire(&bp->bif_refcnt); 318 } 319 320 static void 321 bpfif_rele(struct bpf_if *bp) 322 { 323 324 if (!refcount_release(&bp->bif_refcnt)) 325 return; 326 NET_EPOCH_CALL(bpfif_free, &bp->epoch_ctx); 327 } 328 329 static void 330 bpfd_ref(struct bpf_d *d) 331 { 332 333 refcount_acquire(&d->bd_refcnt); 334 } 335 336 static void 337 bpfd_rele(struct bpf_d *d) 338 { 339 340 if (!refcount_release(&d->bd_refcnt)) 341 return; 342 NET_EPOCH_CALL(bpfd_free, &d->epoch_ctx); 343 } 344 345 static struct bpf_program_buffer* 346 bpf_program_buffer_alloc(size_t size, int flags) 347 { 348 349 return (malloc(sizeof(struct bpf_program_buffer) + size, 350 M_BPF, flags)); 351 } 352 353 static void 354 bpf_program_buffer_free(epoch_context_t ctx) 355 { 356 struct bpf_program_buffer *ptr; 357 358 ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx); 359 #ifdef BPF_JITTER 360 if (ptr->func != NULL) 361 bpf_destroy_jit_filter(ptr->func); 362 #endif 363 free(ptr, M_BPF); 364 } 365 366 /* 367 * Wrapper functions for various buffering methods. If the set of buffer 368 * modes expands, we will probably want to introduce a switch data structure 369 * similar to protosw, et. 370 */ 371 static void 372 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 373 u_int len) 374 { 375 376 BPFD_LOCK_ASSERT(d); 377 378 switch (d->bd_bufmode) { 379 case BPF_BUFMODE_BUFFER: 380 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 381 382 case BPF_BUFMODE_ZBUF: 383 counter_u64_add(d->bd_zcopy, 1); 384 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 385 386 default: 387 panic("bpf_buf_append_bytes"); 388 } 389 } 390 391 static void 392 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 393 u_int len) 394 { 395 396 BPFD_LOCK_ASSERT(d); 397 398 switch (d->bd_bufmode) { 399 case BPF_BUFMODE_BUFFER: 400 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 401 402 case BPF_BUFMODE_ZBUF: 403 counter_u64_add(d->bd_zcopy, 1); 404 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 405 406 default: 407 panic("bpf_buf_append_mbuf"); 408 } 409 } 410 411 /* 412 * This function gets called when the free buffer is re-assigned. 413 */ 414 static void 415 bpf_buf_reclaimed(struct bpf_d *d) 416 { 417 418 BPFD_LOCK_ASSERT(d); 419 420 switch (d->bd_bufmode) { 421 case BPF_BUFMODE_BUFFER: 422 return; 423 424 case BPF_BUFMODE_ZBUF: 425 bpf_zerocopy_buf_reclaimed(d); 426 return; 427 428 default: 429 panic("bpf_buf_reclaimed"); 430 } 431 } 432 433 /* 434 * If the buffer mechanism has a way to decide that a held buffer can be made 435 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 436 * returned if the buffer can be discarded, (0) is returned if it cannot. 437 */ 438 static int 439 bpf_canfreebuf(struct bpf_d *d) 440 { 441 442 BPFD_LOCK_ASSERT(d); 443 444 switch (d->bd_bufmode) { 445 case BPF_BUFMODE_ZBUF: 446 return (bpf_zerocopy_canfreebuf(d)); 447 } 448 return (0); 449 } 450 451 /* 452 * Allow the buffer model to indicate that the current store buffer is 453 * immutable, regardless of the appearance of space. Return (1) if the 454 * buffer is writable, and (0) if not. 455 */ 456 static int 457 bpf_canwritebuf(struct bpf_d *d) 458 { 459 BPFD_LOCK_ASSERT(d); 460 461 switch (d->bd_bufmode) { 462 case BPF_BUFMODE_ZBUF: 463 return (bpf_zerocopy_canwritebuf(d)); 464 } 465 return (1); 466 } 467 468 /* 469 * Notify buffer model that an attempt to write to the store buffer has 470 * resulted in a dropped packet, in which case the buffer may be considered 471 * full. 472 */ 473 static void 474 bpf_buffull(struct bpf_d *d) 475 { 476 477 BPFD_LOCK_ASSERT(d); 478 479 switch (d->bd_bufmode) { 480 case BPF_BUFMODE_ZBUF: 481 bpf_zerocopy_buffull(d); 482 break; 483 } 484 } 485 486 /* 487 * Notify the buffer model that a buffer has moved into the hold position. 488 */ 489 void 490 bpf_bufheld(struct bpf_d *d) 491 { 492 493 BPFD_LOCK_ASSERT(d); 494 495 switch (d->bd_bufmode) { 496 case BPF_BUFMODE_ZBUF: 497 bpf_zerocopy_bufheld(d); 498 break; 499 } 500 } 501 502 static void 503 bpf_free(struct bpf_d *d) 504 { 505 506 switch (d->bd_bufmode) { 507 case BPF_BUFMODE_BUFFER: 508 return (bpf_buffer_free(d)); 509 510 case BPF_BUFMODE_ZBUF: 511 return (bpf_zerocopy_free(d)); 512 513 default: 514 panic("bpf_buf_free"); 515 } 516 } 517 518 static int 519 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 520 { 521 522 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 523 return (EOPNOTSUPP); 524 return (bpf_buffer_uiomove(d, buf, len, uio)); 525 } 526 527 static int 528 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 529 { 530 531 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 532 return (EOPNOTSUPP); 533 return (bpf_buffer_ioctl_sblen(d, i)); 534 } 535 536 static int 537 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 538 { 539 540 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 541 return (EOPNOTSUPP); 542 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 543 } 544 545 static int 546 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 547 { 548 549 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 550 return (EOPNOTSUPP); 551 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 552 } 553 554 static int 555 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 556 { 557 558 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 559 return (EOPNOTSUPP); 560 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 561 } 562 563 /* 564 * General BPF functions. 565 */ 566 static int 567 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 568 struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) 569 { 570 const struct ieee80211_bpf_params *p; 571 struct ether_header *eh; 572 struct mbuf *m; 573 int error; 574 int len; 575 int hlen; 576 int slen; 577 578 /* 579 * Build a sockaddr based on the data link layer type. 580 * We do this at this level because the ethernet header 581 * is copied directly into the data field of the sockaddr. 582 * In the case of SLIP, there is no header and the packet 583 * is forwarded as is. 584 * Also, we are careful to leave room at the front of the mbuf 585 * for the link level header. 586 */ 587 switch (linktype) { 588 case DLT_SLIP: 589 sockp->sa_family = AF_INET; 590 hlen = 0; 591 break; 592 593 case DLT_EN10MB: 594 sockp->sa_family = AF_UNSPEC; 595 /* XXX Would MAXLINKHDR be better? */ 596 hlen = ETHER_HDR_LEN; 597 break; 598 599 case DLT_FDDI: 600 sockp->sa_family = AF_IMPLINK; 601 hlen = 0; 602 break; 603 604 case DLT_RAW: 605 sockp->sa_family = AF_UNSPEC; 606 hlen = 0; 607 break; 608 609 case DLT_NULL: 610 /* 611 * null interface types require a 4 byte pseudo header which 612 * corresponds to the address family of the packet. 613 */ 614 sockp->sa_family = AF_UNSPEC; 615 hlen = 4; 616 break; 617 618 case DLT_ATM_RFC1483: 619 /* 620 * en atm driver requires 4-byte atm pseudo header. 621 * though it isn't standard, vpi:vci needs to be 622 * specified anyway. 623 */ 624 sockp->sa_family = AF_UNSPEC; 625 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 626 break; 627 628 case DLT_PPP: 629 sockp->sa_family = AF_UNSPEC; 630 hlen = 4; /* This should match PPP_HDRLEN */ 631 break; 632 633 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 634 sockp->sa_family = AF_IEEE80211; 635 hlen = 0; 636 break; 637 638 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 639 sockp->sa_family = AF_IEEE80211; 640 sockp->sa_len = 12; /* XXX != 0 */ 641 hlen = sizeof(struct ieee80211_bpf_params); 642 break; 643 644 default: 645 return (EIO); 646 } 647 648 len = uio->uio_resid; 649 if (len < hlen || len - hlen > ifp->if_mtu) 650 return (EMSGSIZE); 651 652 /* Allocate a mbuf for our write, since m_get2 fails if len >= to MJUMPAGESIZE, use m_getjcl for bigger buffers */ 653 m = m_get3(len, M_WAITOK, MT_DATA, M_PKTHDR); 654 if (m == NULL) 655 return (EIO); 656 m->m_pkthdr.len = m->m_len = len; 657 *mp = m; 658 659 error = uiomove(mtod(m, u_char *), len, uio); 660 if (error) 661 goto bad; 662 663 slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); 664 if (slen == 0) { 665 error = EPERM; 666 goto bad; 667 } 668 669 /* Check for multicast destination */ 670 switch (linktype) { 671 case DLT_EN10MB: 672 eh = mtod(m, struct ether_header *); 673 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 674 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 675 ETHER_ADDR_LEN) == 0) 676 m->m_flags |= M_BCAST; 677 else 678 m->m_flags |= M_MCAST; 679 } 680 if (d->bd_hdrcmplt == 0) { 681 memcpy(eh->ether_shost, IF_LLADDR(ifp), 682 sizeof(eh->ether_shost)); 683 } 684 break; 685 } 686 687 /* 688 * Make room for link header, and copy it to sockaddr 689 */ 690 if (hlen != 0) { 691 if (sockp->sa_family == AF_IEEE80211) { 692 /* 693 * Collect true length from the parameter header 694 * NB: sockp is known to be zero'd so if we do a 695 * short copy unspecified parameters will be 696 * zero. 697 * NB: packet may not be aligned after stripping 698 * bpf params 699 * XXX check ibp_vers 700 */ 701 p = mtod(m, const struct ieee80211_bpf_params *); 702 hlen = p->ibp_len; 703 if (hlen > sizeof(sockp->sa_data)) { 704 error = EINVAL; 705 goto bad; 706 } 707 } 708 bcopy(mtod(m, const void *), sockp->sa_data, hlen); 709 } 710 *hdrlen = hlen; 711 712 return (0); 713 bad: 714 m_freem(m); 715 return (error); 716 } 717 718 /* 719 * Attach descriptor to the bpf interface, i.e. make d listen on bp, 720 * then reset its buffers and counters with reset_d(). 721 */ 722 static void 723 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 724 { 725 int op_w; 726 727 BPF_LOCK_ASSERT(); 728 729 /* 730 * Save sysctl value to protect from sysctl change 731 * between reads 732 */ 733 op_w = V_bpf_optimize_writers || d->bd_writer; 734 735 if (d->bd_bif != NULL) 736 bpf_detachd_locked(d, false); 737 /* 738 * Point d at bp, and add d to the interface's list. 739 * Since there are many applications using BPF for 740 * sending raw packets only (dhcpd, cdpd are good examples) 741 * we can delay adding d to the list of active listeners until 742 * some filter is configured. 743 */ 744 745 BPFD_LOCK(d); 746 /* 747 * Hold reference to bpif while descriptor uses this interface. 748 */ 749 bpfif_ref(bp); 750 d->bd_bif = bp; 751 if (op_w != 0) { 752 /* Add to writers-only list */ 753 CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); 754 /* 755 * We decrement bd_writer on every filter set operation. 756 * First BIOCSETF is done by pcap_open_live() to set up 757 * snap length. After that appliation usually sets its own 758 * filter. 759 */ 760 d->bd_writer = 2; 761 } else 762 CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 763 764 reset_d(d); 765 766 /* Trigger EVFILT_WRITE events. */ 767 bpf_wakeup(d); 768 769 BPFD_UNLOCK(d); 770 bpf_bpfd_cnt++; 771 772 CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", 773 __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); 774 775 if (op_w == 0) 776 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 777 } 778 779 /* 780 * Check if we need to upgrade our descriptor @d from write-only mode. 781 */ 782 static int 783 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, 784 int flen) 785 { 786 int is_snap, need_upgrade; 787 788 /* 789 * Check if we've already upgraded or new filter is empty. 790 */ 791 if (d->bd_writer == 0 || fcode == NULL) 792 return (0); 793 794 need_upgrade = 0; 795 796 /* 797 * Check if cmd looks like snaplen setting from 798 * pcap_bpf.c:pcap_open_live(). 799 * Note we're not checking .k value here: 800 * while pcap_open_live() definitely sets to non-zero value, 801 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. 802 * do not consider upgrading immediately 803 */ 804 if (cmd == BIOCSETF && flen == 1 && 805 fcode[0].code == (BPF_RET | BPF_K)) 806 is_snap = 1; 807 else 808 is_snap = 0; 809 810 if (is_snap == 0) { 811 /* 812 * We're setting first filter and it doesn't look like 813 * setting snaplen. We're probably using bpf directly. 814 * Upgrade immediately. 815 */ 816 need_upgrade = 1; 817 } else { 818 /* 819 * Do not require upgrade by first BIOCSETF 820 * (used to set snaplen) by pcap_open_live(). 821 */ 822 823 if (--d->bd_writer == 0) { 824 /* 825 * First snaplen filter has already 826 * been set. This is probably catch-all 827 * filter 828 */ 829 need_upgrade = 1; 830 } 831 } 832 833 CTR5(KTR_NET, 834 "%s: filter function set by pid %d, " 835 "bd_writer counter %d, snap %d upgrade %d", 836 __func__, d->bd_pid, d->bd_writer, 837 is_snap, need_upgrade); 838 839 return (need_upgrade); 840 } 841 842 /* 843 * Detach a file from its interface. 844 */ 845 static void 846 bpf_detachd(struct bpf_d *d) 847 { 848 BPF_LOCK(); 849 bpf_detachd_locked(d, false); 850 BPF_UNLOCK(); 851 } 852 853 static void 854 bpf_detachd_locked(struct bpf_d *d, bool detached_ifp) 855 { 856 struct bpf_if *bp; 857 struct ifnet *ifp; 858 int error; 859 860 BPF_LOCK_ASSERT(); 861 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); 862 863 /* Check if descriptor is attached */ 864 if ((bp = d->bd_bif) == NULL) 865 return; 866 867 BPFD_LOCK(d); 868 /* Remove d from the interface's descriptor list. */ 869 CK_LIST_REMOVE(d, bd_next); 870 /* Save bd_writer value */ 871 error = d->bd_writer; 872 ifp = bp->bif_ifp; 873 d->bd_bif = NULL; 874 if (detached_ifp) { 875 /* 876 * Notify descriptor as it's detached, so that any 877 * sleepers wake up and get ENXIO. 878 */ 879 bpf_wakeup(d); 880 } 881 BPFD_UNLOCK(d); 882 bpf_bpfd_cnt--; 883 884 /* Call event handler iff d is attached */ 885 if (error == 0) 886 EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); 887 888 /* 889 * Check if this descriptor had requested promiscuous mode. 890 * If so and ifnet is not detached, turn it off. 891 */ 892 if (d->bd_promisc && !detached_ifp) { 893 d->bd_promisc = 0; 894 CURVNET_SET(ifp->if_vnet); 895 error = ifpromisc(ifp, 0); 896 CURVNET_RESTORE(); 897 if (error != 0 && error != ENXIO) { 898 /* 899 * ENXIO can happen if a pccard is unplugged 900 * Something is really wrong if we were able to put 901 * the driver into promiscuous mode, but can't 902 * take it out. 903 */ 904 if_printf(bp->bif_ifp, 905 "bpf_detach: ifpromisc failed (%d)\n", error); 906 } 907 } 908 bpfif_rele(bp); 909 } 910 911 /* 912 * Close the descriptor by detaching it from its interface, 913 * deallocating its buffers, and marking it free. 914 */ 915 static void 916 bpf_dtor(void *data) 917 { 918 struct bpf_d *d = data; 919 920 BPFD_LOCK(d); 921 if (d->bd_state == BPF_WAITING) 922 callout_stop(&d->bd_callout); 923 d->bd_state = BPF_IDLE; 924 BPFD_UNLOCK(d); 925 funsetown(&d->bd_sigio); 926 bpf_detachd(d); 927 #ifdef MAC 928 mac_bpfdesc_destroy(d); 929 #endif /* MAC */ 930 seldrain(&d->bd_sel); 931 knlist_destroy(&d->bd_sel.si_note); 932 callout_drain(&d->bd_callout); 933 bpfd_rele(d); 934 } 935 936 /* 937 * Open ethernet device. Returns ENXIO for illegal minor device number, 938 * EBUSY if file is open by another process. 939 */ 940 /* ARGSUSED */ 941 static int 942 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 943 { 944 struct bpf_d *d; 945 int error; 946 947 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 948 error = devfs_set_cdevpriv(d, bpf_dtor); 949 if (error != 0) { 950 free(d, M_BPF); 951 return (error); 952 } 953 954 /* Setup counters */ 955 d->bd_rcount = counter_u64_alloc(M_WAITOK); 956 d->bd_dcount = counter_u64_alloc(M_WAITOK); 957 d->bd_fcount = counter_u64_alloc(M_WAITOK); 958 d->bd_wcount = counter_u64_alloc(M_WAITOK); 959 d->bd_wfcount = counter_u64_alloc(M_WAITOK); 960 d->bd_wdcount = counter_u64_alloc(M_WAITOK); 961 d->bd_zcopy = counter_u64_alloc(M_WAITOK); 962 963 /* 964 * For historical reasons, perform a one-time initialization call to 965 * the buffer routines, even though we're not yet committed to a 966 * particular buffer method. 967 */ 968 bpf_buffer_init(d); 969 if ((flags & FREAD) == 0) 970 d->bd_writer = 2; 971 d->bd_hbuf_in_use = 0; 972 d->bd_bufmode = BPF_BUFMODE_BUFFER; 973 d->bd_sig = SIGIO; 974 d->bd_direction = BPF_D_INOUT; 975 refcount_init(&d->bd_refcnt, 1); 976 BPF_PID_REFRESH(d, td); 977 #ifdef MAC 978 mac_bpfdesc_init(d); 979 mac_bpfdesc_create(td->td_ucred, d); 980 #endif 981 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); 982 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); 983 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); 984 985 /* Disable VLAN pcp tagging. */ 986 d->bd_pcp = 0; 987 988 return (0); 989 } 990 991 /* 992 * bpfread - read next chunk of packets from buffers 993 */ 994 static int 995 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 996 { 997 struct bpf_d *d; 998 int error; 999 int non_block; 1000 int timed_out; 1001 1002 error = devfs_get_cdevpriv((void **)&d); 1003 if (error != 0) 1004 return (error); 1005 1006 /* 1007 * Restrict application to use a buffer the same size as 1008 * as kernel buffers. 1009 */ 1010 if (uio->uio_resid != d->bd_bufsize) 1011 return (EINVAL); 1012 1013 non_block = ((ioflag & O_NONBLOCK) != 0); 1014 1015 BPFD_LOCK(d); 1016 BPF_PID_REFRESH_CUR(d); 1017 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 1018 BPFD_UNLOCK(d); 1019 return (EOPNOTSUPP); 1020 } 1021 if (d->bd_state == BPF_WAITING) 1022 callout_stop(&d->bd_callout); 1023 timed_out = (d->bd_state == BPF_TIMED_OUT); 1024 d->bd_state = BPF_IDLE; 1025 while (d->bd_hbuf_in_use) { 1026 error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1027 PRINET|PCATCH, "bd_hbuf", 0); 1028 if (error != 0) { 1029 BPFD_UNLOCK(d); 1030 return (error); 1031 } 1032 } 1033 /* 1034 * If the hold buffer is empty, then do a timed sleep, which 1035 * ends when the timeout expires or when enough packets 1036 * have arrived to fill the store buffer. 1037 */ 1038 while (d->bd_hbuf == NULL) { 1039 if (d->bd_slen != 0) { 1040 /* 1041 * A packet(s) either arrived since the previous 1042 * read or arrived while we were asleep. 1043 */ 1044 if (d->bd_immediate || non_block || timed_out) { 1045 /* 1046 * Rotate the buffers and return what's here 1047 * if we are in immediate mode, non-blocking 1048 * flag is set, or this descriptor timed out. 1049 */ 1050 ROTATE_BUFFERS(d); 1051 break; 1052 } 1053 } 1054 1055 /* 1056 * No data is available, check to see if the bpf device 1057 * is still pointed at a real interface. If not, return 1058 * ENXIO so that the userland process knows to rebind 1059 * it before using it again. 1060 */ 1061 if (d->bd_bif == NULL) { 1062 BPFD_UNLOCK(d); 1063 return (ENXIO); 1064 } 1065 1066 if (non_block) { 1067 BPFD_UNLOCK(d); 1068 return (EWOULDBLOCK); 1069 } 1070 error = msleep(d, &d->bd_lock, PRINET|PCATCH, 1071 "bpf", d->bd_rtout); 1072 if (error == EINTR || error == ERESTART) { 1073 BPFD_UNLOCK(d); 1074 return (error); 1075 } 1076 if (error == EWOULDBLOCK) { 1077 /* 1078 * On a timeout, return what's in the buffer, 1079 * which may be nothing. If there is something 1080 * in the store buffer, we can rotate the buffers. 1081 */ 1082 if (d->bd_hbuf) 1083 /* 1084 * We filled up the buffer in between 1085 * getting the timeout and arriving 1086 * here, so we don't need to rotate. 1087 */ 1088 break; 1089 1090 if (d->bd_slen == 0) { 1091 BPFD_UNLOCK(d); 1092 return (0); 1093 } 1094 ROTATE_BUFFERS(d); 1095 break; 1096 } 1097 } 1098 /* 1099 * At this point, we know we have something in the hold slot. 1100 */ 1101 d->bd_hbuf_in_use = 1; 1102 BPFD_UNLOCK(d); 1103 1104 /* 1105 * Move data from hold buffer into user space. 1106 * We know the entire buffer is transferred since 1107 * we checked above that the read buffer is bpf_bufsize bytes. 1108 * 1109 * We do not have to worry about simultaneous reads because 1110 * we waited for sole access to the hold buffer above. 1111 */ 1112 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 1113 1114 BPFD_LOCK(d); 1115 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); 1116 d->bd_fbuf = d->bd_hbuf; 1117 d->bd_hbuf = NULL; 1118 d->bd_hlen = 0; 1119 bpf_buf_reclaimed(d); 1120 d->bd_hbuf_in_use = 0; 1121 wakeup(&d->bd_hbuf_in_use); 1122 BPFD_UNLOCK(d); 1123 1124 return (error); 1125 } 1126 1127 /* 1128 * If there are processes sleeping on this descriptor, wake them up. 1129 */ 1130 static __inline void 1131 bpf_wakeup(struct bpf_d *d) 1132 { 1133 1134 BPFD_LOCK_ASSERT(d); 1135 if (d->bd_state == BPF_WAITING) { 1136 callout_stop(&d->bd_callout); 1137 d->bd_state = BPF_IDLE; 1138 } 1139 wakeup(d); 1140 if (d->bd_async && d->bd_sig && d->bd_sigio) 1141 pgsigio(&d->bd_sigio, d->bd_sig, 0); 1142 1143 selwakeuppri(&d->bd_sel, PRINET); 1144 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 1145 } 1146 1147 static void 1148 bpf_timed_out(void *arg) 1149 { 1150 struct bpf_d *d = (struct bpf_d *)arg; 1151 1152 BPFD_LOCK_ASSERT(d); 1153 1154 if (callout_pending(&d->bd_callout) || 1155 !callout_active(&d->bd_callout)) 1156 return; 1157 if (d->bd_state == BPF_WAITING) { 1158 d->bd_state = BPF_TIMED_OUT; 1159 if (d->bd_slen != 0) 1160 bpf_wakeup(d); 1161 } 1162 } 1163 1164 static int 1165 bpf_ready(struct bpf_d *d) 1166 { 1167 1168 BPFD_LOCK_ASSERT(d); 1169 1170 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 1171 return (1); 1172 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1173 d->bd_slen != 0) 1174 return (1); 1175 return (0); 1176 } 1177 1178 static int 1179 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 1180 { 1181 struct route ro; 1182 struct sockaddr dst; 1183 struct epoch_tracker et; 1184 struct bpf_if *bp; 1185 struct bpf_d *d; 1186 struct ifnet *ifp; 1187 struct mbuf *m, *mc; 1188 int error, hlen; 1189 1190 error = devfs_get_cdevpriv((void **)&d); 1191 if (error != 0) 1192 return (error); 1193 1194 NET_EPOCH_ENTER(et); 1195 BPFD_LOCK(d); 1196 BPF_PID_REFRESH_CUR(d); 1197 counter_u64_add(d->bd_wcount, 1); 1198 if ((bp = d->bd_bif) == NULL) { 1199 error = ENXIO; 1200 goto out_locked; 1201 } 1202 1203 ifp = bp->bif_ifp; 1204 if ((ifp->if_flags & IFF_UP) == 0) { 1205 error = ENETDOWN; 1206 goto out_locked; 1207 } 1208 1209 if (uio->uio_resid == 0) 1210 goto out_locked; 1211 1212 bzero(&dst, sizeof(dst)); 1213 m = NULL; 1214 hlen = 0; 1215 1216 /* 1217 * Take extra reference, unlock d and exit from epoch section, 1218 * since bpf_movein() can sleep. 1219 */ 1220 bpfd_ref(d); 1221 NET_EPOCH_EXIT(et); 1222 BPFD_UNLOCK(d); 1223 1224 error = bpf_movein(uio, (int)bp->bif_dlt, ifp, 1225 &m, &dst, &hlen, d); 1226 1227 if (error != 0) { 1228 counter_u64_add(d->bd_wdcount, 1); 1229 bpfd_rele(d); 1230 return (error); 1231 } 1232 1233 BPFD_LOCK(d); 1234 /* 1235 * Check that descriptor is still attached to the interface. 1236 * This can happen on bpfdetach(). To avoid access to detached 1237 * ifnet, free mbuf and return ENXIO. 1238 */ 1239 if (d->bd_bif == NULL) { 1240 counter_u64_add(d->bd_wdcount, 1); 1241 BPFD_UNLOCK(d); 1242 bpfd_rele(d); 1243 m_freem(m); 1244 return (ENXIO); 1245 } 1246 counter_u64_add(d->bd_wfcount, 1); 1247 if (d->bd_hdrcmplt) 1248 dst.sa_family = pseudo_AF_HDRCMPLT; 1249 1250 if (d->bd_feedback) { 1251 mc = m_dup(m, M_NOWAIT); 1252 if (mc != NULL) 1253 mc->m_pkthdr.rcvif = ifp; 1254 /* Set M_PROMISC for outgoing packets to be discarded. */ 1255 if (d->bd_direction == BPF_D_INOUT) 1256 m->m_flags |= M_PROMISC; 1257 } else 1258 mc = NULL; 1259 1260 m->m_pkthdr.len -= hlen; 1261 m->m_len -= hlen; 1262 m->m_data += hlen; /* XXX */ 1263 1264 CURVNET_SET(ifp->if_vnet); 1265 #ifdef MAC 1266 mac_bpfdesc_create_mbuf(d, m); 1267 if (mc != NULL) 1268 mac_bpfdesc_create_mbuf(d, mc); 1269 #endif 1270 1271 bzero(&ro, sizeof(ro)); 1272 if (hlen != 0) { 1273 ro.ro_prepend = (u_char *)&dst.sa_data; 1274 ro.ro_plen = hlen; 1275 ro.ro_flags = RT_HAS_HEADER; 1276 } 1277 1278 if (d->bd_pcp != 0) 1279 vlan_set_pcp(m, d->bd_pcp); 1280 1281 /* Avoid possible recursion on BPFD_LOCK(). */ 1282 NET_EPOCH_ENTER(et); 1283 BPFD_UNLOCK(d); 1284 error = (*ifp->if_output)(ifp, m, &dst, &ro); 1285 if (error) 1286 counter_u64_add(d->bd_wdcount, 1); 1287 1288 if (mc != NULL) { 1289 if (error == 0) 1290 (*ifp->if_input)(ifp, mc); 1291 else 1292 m_freem(mc); 1293 } 1294 NET_EPOCH_EXIT(et); 1295 CURVNET_RESTORE(); 1296 bpfd_rele(d); 1297 return (error); 1298 1299 out_locked: 1300 counter_u64_add(d->bd_wdcount, 1); 1301 NET_EPOCH_EXIT(et); 1302 BPFD_UNLOCK(d); 1303 return (error); 1304 } 1305 1306 /* 1307 * Reset a descriptor by flushing its packet buffer and clearing the receive 1308 * and drop counts. This is doable for kernel-only buffers, but with 1309 * zero-copy buffers, we can't write to (or rotate) buffers that are 1310 * currently owned by userspace. It would be nice if we could encapsulate 1311 * this logic in the buffer code rather than here. 1312 */ 1313 static void 1314 reset_d(struct bpf_d *d) 1315 { 1316 1317 BPFD_LOCK_ASSERT(d); 1318 1319 while (d->bd_hbuf_in_use) 1320 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, 1321 "bd_hbuf", 0); 1322 if ((d->bd_hbuf != NULL) && 1323 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { 1324 /* Free the hold buffer. */ 1325 d->bd_fbuf = d->bd_hbuf; 1326 d->bd_hbuf = NULL; 1327 d->bd_hlen = 0; 1328 bpf_buf_reclaimed(d); 1329 } 1330 if (bpf_canwritebuf(d)) 1331 d->bd_slen = 0; 1332 counter_u64_zero(d->bd_rcount); 1333 counter_u64_zero(d->bd_dcount); 1334 counter_u64_zero(d->bd_fcount); 1335 counter_u64_zero(d->bd_wcount); 1336 counter_u64_zero(d->bd_wfcount); 1337 counter_u64_zero(d->bd_wdcount); 1338 counter_u64_zero(d->bd_zcopy); 1339 } 1340 1341 /* 1342 * FIONREAD Check for read packet available. 1343 * BIOCGBLEN Get buffer len [for read()]. 1344 * BIOCSETF Set read filter. 1345 * BIOCSETFNR Set read filter without resetting descriptor. 1346 * BIOCSETWF Set write filter. 1347 * BIOCFLUSH Flush read packet buffer. 1348 * BIOCPROMISC Put interface into promiscuous mode. 1349 * BIOCGDLT Get link layer type. 1350 * BIOCGETIF Get interface name. 1351 * BIOCSETIF Set interface. 1352 * BIOCSRTIMEOUT Set read timeout. 1353 * BIOCGRTIMEOUT Get read timeout. 1354 * BIOCGSTATS Get packet stats. 1355 * BIOCIMMEDIATE Set immediate mode. 1356 * BIOCVERSION Get filter language version. 1357 * BIOCGHDRCMPLT Get "header already complete" flag 1358 * BIOCSHDRCMPLT Set "header already complete" flag 1359 * BIOCGDIRECTION Get packet direction flag 1360 * BIOCSDIRECTION Set packet direction flag 1361 * BIOCGTSTAMP Get time stamp format and resolution. 1362 * BIOCSTSTAMP Set time stamp format and resolution. 1363 * BIOCLOCK Set "locked" flag 1364 * BIOCFEEDBACK Set packet feedback mode. 1365 * BIOCSETZBUF Set current zero-copy buffer locations. 1366 * BIOCGETZMAX Get maximum zero-copy buffer size. 1367 * BIOCROTZBUF Force rotation of zero-copy buffer 1368 * BIOCSETBUFMODE Set buffer mode. 1369 * BIOCGETBUFMODE Get current buffer mode. 1370 * BIOCSETVLANPCP Set VLAN PCP tag. 1371 */ 1372 /* ARGSUSED */ 1373 static int 1374 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1375 struct thread *td) 1376 { 1377 struct bpf_d *d; 1378 int error; 1379 1380 error = devfs_get_cdevpriv((void **)&d); 1381 if (error != 0) 1382 return (error); 1383 1384 /* 1385 * Refresh PID associated with this descriptor. 1386 */ 1387 BPFD_LOCK(d); 1388 BPF_PID_REFRESH(d, td); 1389 if (d->bd_state == BPF_WAITING) 1390 callout_stop(&d->bd_callout); 1391 d->bd_state = BPF_IDLE; 1392 BPFD_UNLOCK(d); 1393 1394 if (d->bd_locked == 1) { 1395 switch (cmd) { 1396 case BIOCGBLEN: 1397 case BIOCFLUSH: 1398 case BIOCGDLT: 1399 case BIOCGDLTLIST: 1400 #ifdef COMPAT_FREEBSD32 1401 case BIOCGDLTLIST32: 1402 #endif 1403 case BIOCGETIF: 1404 case BIOCGRTIMEOUT: 1405 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1406 case BIOCGRTIMEOUT32: 1407 #endif 1408 case BIOCGSTATS: 1409 case BIOCVERSION: 1410 case BIOCGRSIG: 1411 case BIOCGHDRCMPLT: 1412 case BIOCSTSTAMP: 1413 case BIOCFEEDBACK: 1414 case FIONREAD: 1415 case BIOCLOCK: 1416 case BIOCSRTIMEOUT: 1417 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1418 case BIOCSRTIMEOUT32: 1419 #endif 1420 case BIOCIMMEDIATE: 1421 case TIOCGPGRP: 1422 case BIOCROTZBUF: 1423 break; 1424 default: 1425 return (EPERM); 1426 } 1427 } 1428 #ifdef COMPAT_FREEBSD32 1429 /* 1430 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so 1431 * that it will get 32-bit packet headers. 1432 */ 1433 switch (cmd) { 1434 case BIOCSETF32: 1435 case BIOCSETFNR32: 1436 case BIOCSETWF32: 1437 case BIOCGDLTLIST32: 1438 case BIOCGRTIMEOUT32: 1439 case BIOCSRTIMEOUT32: 1440 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 1441 BPFD_LOCK(d); 1442 d->bd_compat32 = 1; 1443 BPFD_UNLOCK(d); 1444 } 1445 } 1446 #endif 1447 1448 CURVNET_SET(TD_TO_VNET(td)); 1449 switch (cmd) { 1450 default: 1451 error = EINVAL; 1452 break; 1453 1454 /* 1455 * Check for read packet available. 1456 */ 1457 case FIONREAD: 1458 { 1459 int n; 1460 1461 BPFD_LOCK(d); 1462 n = d->bd_slen; 1463 while (d->bd_hbuf_in_use) 1464 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1465 PRINET, "bd_hbuf", 0); 1466 if (d->bd_hbuf) 1467 n += d->bd_hlen; 1468 BPFD_UNLOCK(d); 1469 1470 *(int *)addr = n; 1471 break; 1472 } 1473 1474 /* 1475 * Get buffer len [for read()]. 1476 */ 1477 case BIOCGBLEN: 1478 BPFD_LOCK(d); 1479 *(u_int *)addr = d->bd_bufsize; 1480 BPFD_UNLOCK(d); 1481 break; 1482 1483 /* 1484 * Set buffer length. 1485 */ 1486 case BIOCSBLEN: 1487 error = bpf_ioctl_sblen(d, (u_int *)addr); 1488 break; 1489 1490 /* 1491 * Set link layer read filter. 1492 */ 1493 case BIOCSETF: 1494 case BIOCSETFNR: 1495 case BIOCSETWF: 1496 #ifdef COMPAT_FREEBSD32 1497 case BIOCSETF32: 1498 case BIOCSETFNR32: 1499 case BIOCSETWF32: 1500 #endif 1501 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1502 break; 1503 1504 /* 1505 * Flush read packet buffer. 1506 */ 1507 case BIOCFLUSH: 1508 BPFD_LOCK(d); 1509 reset_d(d); 1510 BPFD_UNLOCK(d); 1511 break; 1512 1513 /* 1514 * Put interface into promiscuous mode. 1515 */ 1516 case BIOCPROMISC: 1517 BPF_LOCK(); 1518 if (d->bd_bif == NULL) { 1519 /* 1520 * No interface attached yet. 1521 */ 1522 error = EINVAL; 1523 } else if (d->bd_promisc == 0) { 1524 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1525 if (error == 0) 1526 d->bd_promisc = 1; 1527 } 1528 BPF_UNLOCK(); 1529 break; 1530 1531 /* 1532 * Get current data link type. 1533 */ 1534 case BIOCGDLT: 1535 BPF_LOCK(); 1536 if (d->bd_bif == NULL) 1537 error = EINVAL; 1538 else 1539 *(u_int *)addr = d->bd_bif->bif_dlt; 1540 BPF_UNLOCK(); 1541 break; 1542 1543 /* 1544 * Get a list of supported data link types. 1545 */ 1546 #ifdef COMPAT_FREEBSD32 1547 case BIOCGDLTLIST32: 1548 { 1549 struct bpf_dltlist32 *list32; 1550 struct bpf_dltlist dltlist; 1551 1552 list32 = (struct bpf_dltlist32 *)addr; 1553 dltlist.bfl_len = list32->bfl_len; 1554 dltlist.bfl_list = PTRIN(list32->bfl_list); 1555 BPF_LOCK(); 1556 if (d->bd_bif == NULL) 1557 error = EINVAL; 1558 else { 1559 error = bpf_getdltlist(d, &dltlist); 1560 if (error == 0) 1561 list32->bfl_len = dltlist.bfl_len; 1562 } 1563 BPF_UNLOCK(); 1564 break; 1565 } 1566 #endif 1567 1568 case BIOCGDLTLIST: 1569 BPF_LOCK(); 1570 if (d->bd_bif == NULL) 1571 error = EINVAL; 1572 else 1573 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1574 BPF_UNLOCK(); 1575 break; 1576 1577 /* 1578 * Set data link type. 1579 */ 1580 case BIOCSDLT: 1581 BPF_LOCK(); 1582 if (d->bd_bif == NULL) 1583 error = EINVAL; 1584 else 1585 error = bpf_setdlt(d, *(u_int *)addr); 1586 BPF_UNLOCK(); 1587 break; 1588 1589 /* 1590 * Get interface name. 1591 */ 1592 case BIOCGETIF: 1593 BPF_LOCK(); 1594 if (d->bd_bif == NULL) 1595 error = EINVAL; 1596 else { 1597 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1598 struct ifreq *const ifr = (struct ifreq *)addr; 1599 1600 strlcpy(ifr->ifr_name, ifp->if_xname, 1601 sizeof(ifr->ifr_name)); 1602 } 1603 BPF_UNLOCK(); 1604 break; 1605 1606 /* 1607 * Set interface. 1608 */ 1609 case BIOCSETIF: 1610 { 1611 int alloc_buf, size; 1612 1613 /* 1614 * Behavior here depends on the buffering model. If 1615 * we're using kernel memory buffers, then we can 1616 * allocate them here. If we're using zero-copy, 1617 * then the user process must have registered buffers 1618 * by the time we get here. 1619 */ 1620 alloc_buf = 0; 1621 BPFD_LOCK(d); 1622 if (d->bd_bufmode == BPF_BUFMODE_BUFFER && 1623 d->bd_sbuf == NULL) 1624 alloc_buf = 1; 1625 BPFD_UNLOCK(d); 1626 if (alloc_buf) { 1627 size = d->bd_bufsize; 1628 error = bpf_buffer_ioctl_sblen(d, &size); 1629 if (error != 0) 1630 break; 1631 } 1632 BPF_LOCK(); 1633 error = bpf_setif(d, (struct ifreq *)addr); 1634 BPF_UNLOCK(); 1635 break; 1636 } 1637 1638 /* 1639 * Set read timeout. 1640 */ 1641 case BIOCSRTIMEOUT: 1642 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1643 case BIOCSRTIMEOUT32: 1644 #endif 1645 { 1646 struct timeval *tv = (struct timeval *)addr; 1647 #if defined(COMPAT_FREEBSD32) 1648 struct timeval32 *tv32; 1649 struct timeval tv64; 1650 1651 if (cmd == BIOCSRTIMEOUT32) { 1652 tv32 = (struct timeval32 *)addr; 1653 tv = &tv64; 1654 tv->tv_sec = tv32->tv_sec; 1655 tv->tv_usec = tv32->tv_usec; 1656 } else 1657 #endif 1658 tv = (struct timeval *)addr; 1659 1660 /* 1661 * Subtract 1 tick from tvtohz() since this isn't 1662 * a one-shot timer. 1663 */ 1664 if ((error = itimerfix(tv)) == 0) 1665 d->bd_rtout = tvtohz(tv) - 1; 1666 break; 1667 } 1668 1669 /* 1670 * Get read timeout. 1671 */ 1672 case BIOCGRTIMEOUT: 1673 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1674 case BIOCGRTIMEOUT32: 1675 #endif 1676 { 1677 struct timeval *tv; 1678 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1679 struct timeval32 *tv32; 1680 struct timeval tv64; 1681 1682 if (cmd == BIOCGRTIMEOUT32) 1683 tv = &tv64; 1684 else 1685 #endif 1686 tv = (struct timeval *)addr; 1687 1688 tv->tv_sec = d->bd_rtout / hz; 1689 tv->tv_usec = (d->bd_rtout % hz) * tick; 1690 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1691 if (cmd == BIOCGRTIMEOUT32) { 1692 tv32 = (struct timeval32 *)addr; 1693 tv32->tv_sec = tv->tv_sec; 1694 tv32->tv_usec = tv->tv_usec; 1695 } 1696 #endif 1697 1698 break; 1699 } 1700 1701 /* 1702 * Get packet stats. 1703 */ 1704 case BIOCGSTATS: 1705 { 1706 struct bpf_stat *bs = (struct bpf_stat *)addr; 1707 1708 /* XXXCSJP overflow */ 1709 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); 1710 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); 1711 break; 1712 } 1713 1714 /* 1715 * Set immediate mode. 1716 */ 1717 case BIOCIMMEDIATE: 1718 BPFD_LOCK(d); 1719 d->bd_immediate = *(u_int *)addr; 1720 BPFD_UNLOCK(d); 1721 break; 1722 1723 case BIOCVERSION: 1724 { 1725 struct bpf_version *bv = (struct bpf_version *)addr; 1726 1727 bv->bv_major = BPF_MAJOR_VERSION; 1728 bv->bv_minor = BPF_MINOR_VERSION; 1729 break; 1730 } 1731 1732 /* 1733 * Get "header already complete" flag 1734 */ 1735 case BIOCGHDRCMPLT: 1736 BPFD_LOCK(d); 1737 *(u_int *)addr = d->bd_hdrcmplt; 1738 BPFD_UNLOCK(d); 1739 break; 1740 1741 /* 1742 * Set "header already complete" flag 1743 */ 1744 case BIOCSHDRCMPLT: 1745 BPFD_LOCK(d); 1746 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1747 BPFD_UNLOCK(d); 1748 break; 1749 1750 /* 1751 * Get packet direction flag 1752 */ 1753 case BIOCGDIRECTION: 1754 BPFD_LOCK(d); 1755 *(u_int *)addr = d->bd_direction; 1756 BPFD_UNLOCK(d); 1757 break; 1758 1759 /* 1760 * Set packet direction flag 1761 */ 1762 case BIOCSDIRECTION: 1763 { 1764 u_int direction; 1765 1766 direction = *(u_int *)addr; 1767 switch (direction) { 1768 case BPF_D_IN: 1769 case BPF_D_INOUT: 1770 case BPF_D_OUT: 1771 BPFD_LOCK(d); 1772 d->bd_direction = direction; 1773 BPFD_UNLOCK(d); 1774 break; 1775 default: 1776 error = EINVAL; 1777 } 1778 } 1779 break; 1780 1781 /* 1782 * Get packet timestamp format and resolution. 1783 */ 1784 case BIOCGTSTAMP: 1785 BPFD_LOCK(d); 1786 *(u_int *)addr = d->bd_tstamp; 1787 BPFD_UNLOCK(d); 1788 break; 1789 1790 /* 1791 * Set packet timestamp format and resolution. 1792 */ 1793 case BIOCSTSTAMP: 1794 { 1795 u_int func; 1796 1797 func = *(u_int *)addr; 1798 if (BPF_T_VALID(func)) 1799 d->bd_tstamp = func; 1800 else 1801 error = EINVAL; 1802 } 1803 break; 1804 1805 case BIOCFEEDBACK: 1806 BPFD_LOCK(d); 1807 d->bd_feedback = *(u_int *)addr; 1808 BPFD_UNLOCK(d); 1809 break; 1810 1811 case BIOCLOCK: 1812 BPFD_LOCK(d); 1813 d->bd_locked = 1; 1814 BPFD_UNLOCK(d); 1815 break; 1816 1817 case FIONBIO: /* Non-blocking I/O */ 1818 break; 1819 1820 case FIOASYNC: /* Send signal on receive packets */ 1821 BPFD_LOCK(d); 1822 d->bd_async = *(int *)addr; 1823 BPFD_UNLOCK(d); 1824 break; 1825 1826 case FIOSETOWN: 1827 /* 1828 * XXX: Add some sort of locking here? 1829 * fsetown() can sleep. 1830 */ 1831 error = fsetown(*(int *)addr, &d->bd_sigio); 1832 break; 1833 1834 case FIOGETOWN: 1835 BPFD_LOCK(d); 1836 *(int *)addr = fgetown(&d->bd_sigio); 1837 BPFD_UNLOCK(d); 1838 break; 1839 1840 /* This is deprecated, FIOSETOWN should be used instead. */ 1841 case TIOCSPGRP: 1842 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1843 break; 1844 1845 /* This is deprecated, FIOGETOWN should be used instead. */ 1846 case TIOCGPGRP: 1847 *(int *)addr = -fgetown(&d->bd_sigio); 1848 break; 1849 1850 case BIOCSRSIG: /* Set receive signal */ 1851 { 1852 u_int sig; 1853 1854 sig = *(u_int *)addr; 1855 1856 if (sig >= NSIG) 1857 error = EINVAL; 1858 else { 1859 BPFD_LOCK(d); 1860 d->bd_sig = sig; 1861 BPFD_UNLOCK(d); 1862 } 1863 break; 1864 } 1865 case BIOCGRSIG: 1866 BPFD_LOCK(d); 1867 *(u_int *)addr = d->bd_sig; 1868 BPFD_UNLOCK(d); 1869 break; 1870 1871 case BIOCGETBUFMODE: 1872 BPFD_LOCK(d); 1873 *(u_int *)addr = d->bd_bufmode; 1874 BPFD_UNLOCK(d); 1875 break; 1876 1877 case BIOCSETBUFMODE: 1878 /* 1879 * Allow the buffering mode to be changed as long as we 1880 * haven't yet committed to a particular mode. Our 1881 * definition of commitment, for now, is whether or not a 1882 * buffer has been allocated or an interface attached, since 1883 * that's the point where things get tricky. 1884 */ 1885 switch (*(u_int *)addr) { 1886 case BPF_BUFMODE_BUFFER: 1887 break; 1888 1889 case BPF_BUFMODE_ZBUF: 1890 if (bpf_zerocopy_enable) 1891 break; 1892 /* FALLSTHROUGH */ 1893 1894 default: 1895 CURVNET_RESTORE(); 1896 return (EINVAL); 1897 } 1898 1899 BPFD_LOCK(d); 1900 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1901 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1902 BPFD_UNLOCK(d); 1903 CURVNET_RESTORE(); 1904 return (EBUSY); 1905 } 1906 d->bd_bufmode = *(u_int *)addr; 1907 BPFD_UNLOCK(d); 1908 break; 1909 1910 case BIOCGETZMAX: 1911 error = bpf_ioctl_getzmax(td, d, (size_t *)addr); 1912 break; 1913 1914 case BIOCSETZBUF: 1915 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); 1916 break; 1917 1918 case BIOCROTZBUF: 1919 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); 1920 break; 1921 1922 case BIOCSETVLANPCP: 1923 { 1924 u_int pcp; 1925 1926 pcp = *(u_int *)addr; 1927 if (pcp > BPF_PRIO_MAX || pcp < 0) { 1928 error = EINVAL; 1929 break; 1930 } 1931 d->bd_pcp = pcp; 1932 break; 1933 } 1934 } 1935 CURVNET_RESTORE(); 1936 return (error); 1937 } 1938 1939 /* 1940 * Set d's packet filter program to fp. If this file already has a filter, 1941 * free it and replace it. Returns EINVAL for bogus requests. 1942 * 1943 * Note we use global lock here to serialize bpf_setf() and bpf_setif() 1944 * calls. 1945 */ 1946 static int 1947 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1948 { 1949 #ifdef COMPAT_FREEBSD32 1950 struct bpf_program fp_swab; 1951 struct bpf_program32 *fp32; 1952 #endif 1953 struct bpf_program_buffer *fcode; 1954 struct bpf_insn *filter; 1955 #ifdef BPF_JITTER 1956 bpf_jit_filter *jfunc; 1957 #endif 1958 size_t size; 1959 u_int flen; 1960 bool track_event; 1961 1962 #ifdef COMPAT_FREEBSD32 1963 switch (cmd) { 1964 case BIOCSETF32: 1965 case BIOCSETWF32: 1966 case BIOCSETFNR32: 1967 fp32 = (struct bpf_program32 *)fp; 1968 fp_swab.bf_len = fp32->bf_len; 1969 fp_swab.bf_insns = 1970 (struct bpf_insn *)(uintptr_t)fp32->bf_insns; 1971 fp = &fp_swab; 1972 switch (cmd) { 1973 case BIOCSETF32: 1974 cmd = BIOCSETF; 1975 break; 1976 case BIOCSETWF32: 1977 cmd = BIOCSETWF; 1978 break; 1979 } 1980 break; 1981 } 1982 #endif 1983 1984 filter = NULL; 1985 #ifdef BPF_JITTER 1986 jfunc = NULL; 1987 #endif 1988 /* 1989 * Check new filter validness before acquiring any locks. 1990 * Allocate memory for new filter, if needed. 1991 */ 1992 flen = fp->bf_len; 1993 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) 1994 return (EINVAL); 1995 size = flen * sizeof(*fp->bf_insns); 1996 if (size > 0) { 1997 /* We're setting up new filter. Copy and check actual data. */ 1998 fcode = bpf_program_buffer_alloc(size, M_WAITOK); 1999 filter = (struct bpf_insn *)fcode->buffer; 2000 if (copyin(fp->bf_insns, filter, size) != 0 || 2001 !bpf_validate(filter, flen)) { 2002 free(fcode, M_BPF); 2003 return (EINVAL); 2004 } 2005 #ifdef BPF_JITTER 2006 if (cmd != BIOCSETWF) { 2007 /* 2008 * Filter is copied inside fcode and is 2009 * perfectly valid. 2010 */ 2011 jfunc = bpf_jitter(filter, flen); 2012 } 2013 #endif 2014 } 2015 2016 track_event = false; 2017 fcode = NULL; 2018 2019 BPF_LOCK(); 2020 BPFD_LOCK(d); 2021 /* Set up new filter. */ 2022 if (cmd == BIOCSETWF) { 2023 if (d->bd_wfilter != NULL) { 2024 fcode = __containerof((void *)d->bd_wfilter, 2025 struct bpf_program_buffer, buffer); 2026 #ifdef BPF_JITTER 2027 fcode->func = NULL; 2028 #endif 2029 } 2030 d->bd_wfilter = filter; 2031 } else { 2032 if (d->bd_rfilter != NULL) { 2033 fcode = __containerof((void *)d->bd_rfilter, 2034 struct bpf_program_buffer, buffer); 2035 #ifdef BPF_JITTER 2036 fcode->func = d->bd_bfilter; 2037 #endif 2038 } 2039 d->bd_rfilter = filter; 2040 #ifdef BPF_JITTER 2041 d->bd_bfilter = jfunc; 2042 #endif 2043 if (cmd == BIOCSETF) 2044 reset_d(d); 2045 2046 if (bpf_check_upgrade(cmd, d, filter, flen) != 0) { 2047 /* 2048 * Filter can be set several times without 2049 * specifying interface. In this case just mark d 2050 * as reader. 2051 */ 2052 d->bd_writer = 0; 2053 if (d->bd_bif != NULL) { 2054 /* 2055 * Remove descriptor from writers-only list 2056 * and add it to active readers list. 2057 */ 2058 CK_LIST_REMOVE(d, bd_next); 2059 CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist, 2060 d, bd_next); 2061 CTR2(KTR_NET, 2062 "%s: upgrade required by pid %d", 2063 __func__, d->bd_pid); 2064 track_event = true; 2065 } 2066 } 2067 } 2068 BPFD_UNLOCK(d); 2069 2070 if (fcode != NULL) 2071 NET_EPOCH_CALL(bpf_program_buffer_free, &fcode->epoch_ctx); 2072 2073 if (track_event) 2074 EVENTHANDLER_INVOKE(bpf_track, 2075 d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1); 2076 2077 BPF_UNLOCK(); 2078 return (0); 2079 } 2080 2081 /* 2082 * Detach a file from its current interface (if attached at all) and attach 2083 * to the interface indicated by the name stored in ifr. 2084 * Return an errno or 0. 2085 */ 2086 static int 2087 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 2088 { 2089 struct bpf_if *bp; 2090 struct ifnet *theywant; 2091 2092 BPF_LOCK_ASSERT(); 2093 2094 theywant = ifunit(ifr->ifr_name); 2095 if (theywant == NULL || theywant->if_bpf == NULL) 2096 return (ENXIO); 2097 2098 bp = theywant->if_bpf; 2099 /* 2100 * At this point, we expect the buffer is already allocated. If not, 2101 * return an error. 2102 */ 2103 switch (d->bd_bufmode) { 2104 case BPF_BUFMODE_BUFFER: 2105 case BPF_BUFMODE_ZBUF: 2106 if (d->bd_sbuf == NULL) 2107 return (EINVAL); 2108 break; 2109 2110 default: 2111 panic("bpf_setif: bufmode %d", d->bd_bufmode); 2112 } 2113 if (bp != d->bd_bif) 2114 bpf_attachd(d, bp); 2115 else { 2116 BPFD_LOCK(d); 2117 reset_d(d); 2118 BPFD_UNLOCK(d); 2119 } 2120 return (0); 2121 } 2122 2123 /* 2124 * Support for select() and poll() system calls 2125 * 2126 * Return true iff the specific operation will not block indefinitely. 2127 * Otherwise, return false but make a note that a selwakeup() must be done. 2128 */ 2129 static int 2130 bpfpoll(struct cdev *dev, int events, struct thread *td) 2131 { 2132 struct bpf_d *d; 2133 int revents; 2134 2135 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) 2136 return (events & 2137 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 2138 2139 /* 2140 * Refresh PID associated with this descriptor. 2141 */ 2142 revents = events & (POLLOUT | POLLWRNORM); 2143 BPFD_LOCK(d); 2144 BPF_PID_REFRESH(d, td); 2145 if (events & (POLLIN | POLLRDNORM)) { 2146 if (bpf_ready(d)) 2147 revents |= events & (POLLIN | POLLRDNORM); 2148 else { 2149 selrecord(td, &d->bd_sel); 2150 /* Start the read timeout if necessary. */ 2151 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2152 callout_reset(&d->bd_callout, d->bd_rtout, 2153 bpf_timed_out, d); 2154 d->bd_state = BPF_WAITING; 2155 } 2156 } 2157 } 2158 BPFD_UNLOCK(d); 2159 return (revents); 2160 } 2161 2162 /* 2163 * Support for kevent() system call. Register EVFILT_READ filters and 2164 * reject all others. 2165 */ 2166 int 2167 bpfkqfilter(struct cdev *dev, struct knote *kn) 2168 { 2169 struct bpf_d *d; 2170 2171 if (devfs_get_cdevpriv((void **)&d) != 0) 2172 return (1); 2173 2174 switch (kn->kn_filter) { 2175 case EVFILT_READ: 2176 kn->kn_fop = &bpfread_filtops; 2177 break; 2178 2179 case EVFILT_WRITE: 2180 kn->kn_fop = &bpfwrite_filtops; 2181 break; 2182 2183 default: 2184 return (1); 2185 } 2186 2187 /* 2188 * Refresh PID associated with this descriptor. 2189 */ 2190 BPFD_LOCK(d); 2191 BPF_PID_REFRESH_CUR(d); 2192 kn->kn_hook = d; 2193 knlist_add(&d->bd_sel.si_note, kn, 1); 2194 BPFD_UNLOCK(d); 2195 2196 return (0); 2197 } 2198 2199 static void 2200 filt_bpfdetach(struct knote *kn) 2201 { 2202 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2203 2204 knlist_remove(&d->bd_sel.si_note, kn, 0); 2205 } 2206 2207 static int 2208 filt_bpfread(struct knote *kn, long hint) 2209 { 2210 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2211 int ready; 2212 2213 BPFD_LOCK_ASSERT(d); 2214 ready = bpf_ready(d); 2215 if (ready) { 2216 kn->kn_data = d->bd_slen; 2217 /* 2218 * Ignore the hold buffer if it is being copied to user space. 2219 */ 2220 if (!d->bd_hbuf_in_use && d->bd_hbuf) 2221 kn->kn_data += d->bd_hlen; 2222 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2223 callout_reset(&d->bd_callout, d->bd_rtout, 2224 bpf_timed_out, d); 2225 d->bd_state = BPF_WAITING; 2226 } 2227 2228 return (ready); 2229 } 2230 2231 static int 2232 filt_bpfwrite(struct knote *kn, long hint) 2233 { 2234 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2235 2236 BPFD_LOCK_ASSERT(d); 2237 2238 if (d->bd_bif == NULL) { 2239 kn->kn_data = 0; 2240 return (0); 2241 } else { 2242 kn->kn_data = d->bd_bif->bif_ifp->if_mtu; 2243 return (1); 2244 } 2245 } 2246 2247 #define BPF_TSTAMP_NONE 0 2248 #define BPF_TSTAMP_FAST 1 2249 #define BPF_TSTAMP_NORMAL 2 2250 #define BPF_TSTAMP_EXTERN 3 2251 2252 static int 2253 bpf_ts_quality(int tstype) 2254 { 2255 2256 if (tstype == BPF_T_NONE) 2257 return (BPF_TSTAMP_NONE); 2258 if ((tstype & BPF_T_FAST) != 0) 2259 return (BPF_TSTAMP_FAST); 2260 2261 return (BPF_TSTAMP_NORMAL); 2262 } 2263 2264 static int 2265 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) 2266 { 2267 struct timespec ts; 2268 struct m_tag *tag; 2269 int quality; 2270 2271 quality = bpf_ts_quality(tstype); 2272 if (quality == BPF_TSTAMP_NONE) 2273 return (quality); 2274 2275 if (m != NULL) { 2276 if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | M_TSTMP)) { 2277 mbuf_tstmp2timespec(m, &ts); 2278 timespec2bintime(&ts, bt); 2279 return (BPF_TSTAMP_EXTERN); 2280 } 2281 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); 2282 if (tag != NULL) { 2283 *bt = *(struct bintime *)(tag + 1); 2284 return (BPF_TSTAMP_EXTERN); 2285 } 2286 } 2287 if (quality == BPF_TSTAMP_NORMAL) 2288 binuptime(bt); 2289 else 2290 getbinuptime(bt); 2291 2292 return (quality); 2293 } 2294 2295 /* 2296 * Incoming linkage from device drivers. Process the packet pkt, of length 2297 * pktlen, which is stored in a contiguous buffer. The packet is parsed 2298 * by each process' filter, and if accepted, stashed into the corresponding 2299 * buffer. 2300 */ 2301 void 2302 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2303 { 2304 struct epoch_tracker et; 2305 struct bintime bt; 2306 struct bpf_d *d; 2307 #ifdef BPF_JITTER 2308 bpf_jit_filter *bf; 2309 #endif 2310 u_int slen; 2311 int gottime; 2312 2313 gottime = BPF_TSTAMP_NONE; 2314 NET_EPOCH_ENTER(et); 2315 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2316 counter_u64_add(d->bd_rcount, 1); 2317 /* 2318 * NB: We dont call BPF_CHECK_DIRECTION() here since there 2319 * is no way for the caller to indiciate to us whether this 2320 * packet is inbound or outbound. In the bpf_mtap() routines, 2321 * we use the interface pointers on the mbuf to figure it out. 2322 */ 2323 #ifdef BPF_JITTER 2324 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2325 if (bf != NULL) 2326 slen = (*(bf->func))(pkt, pktlen, pktlen); 2327 else 2328 #endif 2329 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 2330 if (slen != 0) { 2331 /* 2332 * Filter matches. Let's to acquire write lock. 2333 */ 2334 BPFD_LOCK(d); 2335 counter_u64_add(d->bd_fcount, 1); 2336 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2337 gottime = bpf_gettime(&bt, d->bd_tstamp, 2338 NULL); 2339 #ifdef MAC 2340 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2341 #endif 2342 catchpacket(d, pkt, pktlen, slen, 2343 bpf_append_bytes, &bt); 2344 BPFD_UNLOCK(d); 2345 } 2346 } 2347 NET_EPOCH_EXIT(et); 2348 } 2349 2350 void 2351 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 2352 { 2353 if (bpf_peers_present(ifp->if_bpf)) 2354 bpf_tap(ifp->if_bpf, pkt, pktlen); 2355 } 2356 2357 #define BPF_CHECK_DIRECTION(d, r, i) \ 2358 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 2359 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 2360 2361 /* 2362 * Incoming linkage from device drivers, when packet is in an mbuf chain. 2363 * Locking model is explained in bpf_tap(). 2364 */ 2365 void 2366 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2367 { 2368 struct epoch_tracker et; 2369 struct bintime bt; 2370 struct bpf_d *d; 2371 #ifdef BPF_JITTER 2372 bpf_jit_filter *bf; 2373 #endif 2374 u_int pktlen, slen; 2375 int gottime; 2376 2377 /* Skip outgoing duplicate packets. */ 2378 if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) { 2379 m->m_flags &= ~M_PROMISC; 2380 return; 2381 } 2382 2383 pktlen = m_length(m, NULL); 2384 gottime = BPF_TSTAMP_NONE; 2385 2386 NET_EPOCH_ENTER(et); 2387 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2388 if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp)) 2389 continue; 2390 counter_u64_add(d->bd_rcount, 1); 2391 #ifdef BPF_JITTER 2392 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2393 /* XXX We cannot handle multiple mbufs. */ 2394 if (bf != NULL && m->m_next == NULL) 2395 slen = (*(bf->func))(mtod(m, u_char *), pktlen, 2396 pktlen); 2397 else 2398 #endif 2399 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 2400 if (slen != 0) { 2401 BPFD_LOCK(d); 2402 2403 counter_u64_add(d->bd_fcount, 1); 2404 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2405 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2406 #ifdef MAC 2407 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2408 #endif 2409 catchpacket(d, (u_char *)m, pktlen, slen, 2410 bpf_append_mbuf, &bt); 2411 BPFD_UNLOCK(d); 2412 } 2413 } 2414 NET_EPOCH_EXIT(et); 2415 } 2416 2417 void 2418 bpf_mtap_if(if_t ifp, struct mbuf *m) 2419 { 2420 if (bpf_peers_present(ifp->if_bpf)) { 2421 M_ASSERTVALID(m); 2422 bpf_mtap(ifp->if_bpf, m); 2423 } 2424 } 2425 2426 /* 2427 * Incoming linkage from device drivers, when packet is in 2428 * an mbuf chain and to be prepended by a contiguous header. 2429 */ 2430 void 2431 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 2432 { 2433 struct epoch_tracker et; 2434 struct bintime bt; 2435 struct mbuf mb; 2436 struct bpf_d *d; 2437 u_int pktlen, slen; 2438 int gottime; 2439 2440 /* Skip outgoing duplicate packets. */ 2441 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2442 m->m_flags &= ~M_PROMISC; 2443 return; 2444 } 2445 2446 pktlen = m_length(m, NULL); 2447 /* 2448 * Craft on-stack mbuf suitable for passing to bpf_filter. 2449 * Note that we cut corners here; we only setup what's 2450 * absolutely needed--this mbuf should never go anywhere else. 2451 */ 2452 mb.m_flags = 0; 2453 mb.m_next = m; 2454 mb.m_data = data; 2455 mb.m_len = dlen; 2456 pktlen += dlen; 2457 2458 gottime = BPF_TSTAMP_NONE; 2459 2460 NET_EPOCH_ENTER(et); 2461 CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2462 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2463 continue; 2464 counter_u64_add(d->bd_rcount, 1); 2465 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 2466 if (slen != 0) { 2467 BPFD_LOCK(d); 2468 2469 counter_u64_add(d->bd_fcount, 1); 2470 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2471 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2472 #ifdef MAC 2473 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2474 #endif 2475 catchpacket(d, (u_char *)&mb, pktlen, slen, 2476 bpf_append_mbuf, &bt); 2477 BPFD_UNLOCK(d); 2478 } 2479 } 2480 NET_EPOCH_EXIT(et); 2481 } 2482 2483 void 2484 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 2485 { 2486 if (bpf_peers_present(ifp->if_bpf)) { 2487 M_ASSERTVALID(m); 2488 bpf_mtap2(ifp->if_bpf, data, dlen, m); 2489 } 2490 } 2491 2492 #undef BPF_CHECK_DIRECTION 2493 #undef BPF_TSTAMP_NONE 2494 #undef BPF_TSTAMP_FAST 2495 #undef BPF_TSTAMP_NORMAL 2496 #undef BPF_TSTAMP_EXTERN 2497 2498 static int 2499 bpf_hdrlen(struct bpf_d *d) 2500 { 2501 int hdrlen; 2502 2503 hdrlen = d->bd_bif->bif_hdrlen; 2504 #ifndef BURN_BRIDGES 2505 if (d->bd_tstamp == BPF_T_NONE || 2506 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) 2507 #ifdef COMPAT_FREEBSD32 2508 if (d->bd_compat32) 2509 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); 2510 else 2511 #endif 2512 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); 2513 else 2514 #endif 2515 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); 2516 #ifdef COMPAT_FREEBSD32 2517 if (d->bd_compat32) 2518 hdrlen = BPF_WORDALIGN32(hdrlen); 2519 else 2520 #endif 2521 hdrlen = BPF_WORDALIGN(hdrlen); 2522 2523 return (hdrlen - d->bd_bif->bif_hdrlen); 2524 } 2525 2526 static void 2527 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) 2528 { 2529 struct bintime bt2, boottimebin; 2530 struct timeval tsm; 2531 struct timespec tsn; 2532 2533 if ((tstype & BPF_T_MONOTONIC) == 0) { 2534 bt2 = *bt; 2535 getboottimebin(&boottimebin); 2536 bintime_add(&bt2, &boottimebin); 2537 bt = &bt2; 2538 } 2539 switch (BPF_T_FORMAT(tstype)) { 2540 case BPF_T_MICROTIME: 2541 bintime2timeval(bt, &tsm); 2542 ts->bt_sec = tsm.tv_sec; 2543 ts->bt_frac = tsm.tv_usec; 2544 break; 2545 case BPF_T_NANOTIME: 2546 bintime2timespec(bt, &tsn); 2547 ts->bt_sec = tsn.tv_sec; 2548 ts->bt_frac = tsn.tv_nsec; 2549 break; 2550 case BPF_T_BINTIME: 2551 ts->bt_sec = bt->sec; 2552 ts->bt_frac = bt->frac; 2553 break; 2554 } 2555 } 2556 2557 /* 2558 * Move the packet data from interface memory (pkt) into the 2559 * store buffer. "cpfn" is the routine called to do the actual data 2560 * transfer. bcopy is passed in to copy contiguous chunks, while 2561 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 2562 * pkt is really an mbuf. 2563 */ 2564 static void 2565 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 2566 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 2567 struct bintime *bt) 2568 { 2569 static char zeroes[BPF_ALIGNMENT]; 2570 struct bpf_xhdr hdr; 2571 #ifndef BURN_BRIDGES 2572 struct bpf_hdr hdr_old; 2573 #ifdef COMPAT_FREEBSD32 2574 struct bpf_hdr32 hdr32_old; 2575 #endif 2576 #endif 2577 int caplen, curlen, hdrlen, pad, totlen; 2578 int do_wakeup = 0; 2579 int do_timestamp; 2580 int tstype; 2581 2582 BPFD_LOCK_ASSERT(d); 2583 if (d->bd_bif == NULL) { 2584 /* Descriptor was detached in concurrent thread */ 2585 counter_u64_add(d->bd_dcount, 1); 2586 return; 2587 } 2588 2589 /* 2590 * Detect whether user space has released a buffer back to us, and if 2591 * so, move it from being a hold buffer to a free buffer. This may 2592 * not be the best place to do it (for example, we might only want to 2593 * run this check if we need the space), but for now it's a reliable 2594 * spot to do it. 2595 */ 2596 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 2597 d->bd_fbuf = d->bd_hbuf; 2598 d->bd_hbuf = NULL; 2599 d->bd_hlen = 0; 2600 bpf_buf_reclaimed(d); 2601 } 2602 2603 /* 2604 * Figure out how many bytes to move. If the packet is 2605 * greater or equal to the snapshot length, transfer that 2606 * much. Otherwise, transfer the whole packet (unless 2607 * we hit the buffer size limit). 2608 */ 2609 hdrlen = bpf_hdrlen(d); 2610 totlen = hdrlen + min(snaplen, pktlen); 2611 if (totlen > d->bd_bufsize) 2612 totlen = d->bd_bufsize; 2613 2614 /* 2615 * Round up the end of the previous packet to the next longword. 2616 * 2617 * Drop the packet if there's no room and no hope of room 2618 * If the packet would overflow the storage buffer or the storage 2619 * buffer is considered immutable by the buffer model, try to rotate 2620 * the buffer and wakeup pending processes. 2621 */ 2622 #ifdef COMPAT_FREEBSD32 2623 if (d->bd_compat32) 2624 curlen = BPF_WORDALIGN32(d->bd_slen); 2625 else 2626 #endif 2627 curlen = BPF_WORDALIGN(d->bd_slen); 2628 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 2629 if (d->bd_fbuf == NULL) { 2630 /* 2631 * There's no room in the store buffer, and no 2632 * prospect of room, so drop the packet. Notify the 2633 * buffer model. 2634 */ 2635 bpf_buffull(d); 2636 counter_u64_add(d->bd_dcount, 1); 2637 return; 2638 } 2639 KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use")); 2640 ROTATE_BUFFERS(d); 2641 do_wakeup = 1; 2642 curlen = 0; 2643 } else { 2644 if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 2645 /* 2646 * Immediate mode is set, or the read timeout has 2647 * already expired during a select call. A packet 2648 * arrived, so the reader should be woken up. 2649 */ 2650 do_wakeup = 1; 2651 } 2652 pad = curlen - d->bd_slen; 2653 KASSERT(pad >= 0 && pad <= sizeof(zeroes), 2654 ("%s: invalid pad byte count %d", __func__, pad)); 2655 if (pad > 0) { 2656 /* Zero pad bytes. */ 2657 bpf_append_bytes(d, d->bd_sbuf, d->bd_slen, zeroes, 2658 pad); 2659 } 2660 } 2661 2662 caplen = totlen - hdrlen; 2663 tstype = d->bd_tstamp; 2664 do_timestamp = tstype != BPF_T_NONE; 2665 #ifndef BURN_BRIDGES 2666 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { 2667 struct bpf_ts ts; 2668 if (do_timestamp) 2669 bpf_bintime2ts(bt, &ts, tstype); 2670 #ifdef COMPAT_FREEBSD32 2671 if (d->bd_compat32) { 2672 bzero(&hdr32_old, sizeof(hdr32_old)); 2673 if (do_timestamp) { 2674 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; 2675 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; 2676 } 2677 hdr32_old.bh_datalen = pktlen; 2678 hdr32_old.bh_hdrlen = hdrlen; 2679 hdr32_old.bh_caplen = caplen; 2680 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, 2681 sizeof(hdr32_old)); 2682 goto copy; 2683 } 2684 #endif 2685 bzero(&hdr_old, sizeof(hdr_old)); 2686 if (do_timestamp) { 2687 hdr_old.bh_tstamp.tv_sec = ts.bt_sec; 2688 hdr_old.bh_tstamp.tv_usec = ts.bt_frac; 2689 } 2690 hdr_old.bh_datalen = pktlen; 2691 hdr_old.bh_hdrlen = hdrlen; 2692 hdr_old.bh_caplen = caplen; 2693 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, 2694 sizeof(hdr_old)); 2695 goto copy; 2696 } 2697 #endif 2698 2699 /* 2700 * Append the bpf header. Note we append the actual header size, but 2701 * move forward the length of the header plus padding. 2702 */ 2703 bzero(&hdr, sizeof(hdr)); 2704 if (do_timestamp) 2705 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); 2706 hdr.bh_datalen = pktlen; 2707 hdr.bh_hdrlen = hdrlen; 2708 hdr.bh_caplen = caplen; 2709 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 2710 2711 /* 2712 * Copy the packet data into the store buffer and update its length. 2713 */ 2714 #ifndef BURN_BRIDGES 2715 copy: 2716 #endif 2717 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); 2718 d->bd_slen = curlen + totlen; 2719 2720 if (do_wakeup) 2721 bpf_wakeup(d); 2722 } 2723 2724 /* 2725 * Free buffers currently in use by a descriptor. 2726 * Called on close. 2727 */ 2728 static void 2729 bpfd_free(epoch_context_t ctx) 2730 { 2731 struct bpf_d *d; 2732 struct bpf_program_buffer *p; 2733 2734 /* 2735 * We don't need to lock out interrupts since this descriptor has 2736 * been detached from its interface and it yet hasn't been marked 2737 * free. 2738 */ 2739 d = __containerof(ctx, struct bpf_d, epoch_ctx); 2740 bpf_free(d); 2741 if (d->bd_rfilter != NULL) { 2742 p = __containerof((void *)d->bd_rfilter, 2743 struct bpf_program_buffer, buffer); 2744 #ifdef BPF_JITTER 2745 p->func = d->bd_bfilter; 2746 #endif 2747 bpf_program_buffer_free(&p->epoch_ctx); 2748 } 2749 if (d->bd_wfilter != NULL) { 2750 p = __containerof((void *)d->bd_wfilter, 2751 struct bpf_program_buffer, buffer); 2752 #ifdef BPF_JITTER 2753 p->func = NULL; 2754 #endif 2755 bpf_program_buffer_free(&p->epoch_ctx); 2756 } 2757 2758 mtx_destroy(&d->bd_lock); 2759 counter_u64_free(d->bd_rcount); 2760 counter_u64_free(d->bd_dcount); 2761 counter_u64_free(d->bd_fcount); 2762 counter_u64_free(d->bd_wcount); 2763 counter_u64_free(d->bd_wfcount); 2764 counter_u64_free(d->bd_wdcount); 2765 counter_u64_free(d->bd_zcopy); 2766 free(d, M_BPF); 2767 } 2768 2769 /* 2770 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 2771 * fixed size of the link header (variable length headers not yet supported). 2772 */ 2773 void 2774 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2775 { 2776 2777 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2778 } 2779 2780 /* 2781 * Attach an interface to bpf. ifp is a pointer to the structure 2782 * defining the interface to be attached, dlt is the link layer type, 2783 * and hdrlen is the fixed size of the link header (variable length 2784 * headers are not yet supporrted). 2785 */ 2786 void 2787 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, 2788 struct bpf_if **driverp) 2789 { 2790 struct bpf_if *bp; 2791 2792 KASSERT(*driverp == NULL, 2793 ("bpfattach2: driverp already initialized")); 2794 2795 bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO); 2796 2797 CK_LIST_INIT(&bp->bif_dlist); 2798 CK_LIST_INIT(&bp->bif_wlist); 2799 bp->bif_ifp = ifp; 2800 bp->bif_dlt = dlt; 2801 bp->bif_hdrlen = hdrlen; 2802 bp->bif_bpf = driverp; 2803 refcount_init(&bp->bif_refcnt, 1); 2804 *driverp = bp; 2805 /* 2806 * Reference ifnet pointer, so it won't freed until 2807 * we release it. 2808 */ 2809 if_ref(ifp); 2810 BPF_LOCK(); 2811 CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 2812 BPF_UNLOCK(); 2813 2814 if (bootverbose && IS_DEFAULT_VNET(curvnet)) 2815 if_printf(ifp, "bpf attached\n"); 2816 } 2817 2818 #ifdef VIMAGE 2819 /* 2820 * When moving interfaces between vnet instances we need a way to 2821 * query the dlt and hdrlen before detach so we can re-attch the if_bpf 2822 * after the vmove. We unfortunately have no device driver infrastructure 2823 * to query the interface for these values after creation/attach, thus 2824 * add this as a workaround. 2825 */ 2826 int 2827 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen) 2828 { 2829 2830 if (bp == NULL) 2831 return (ENXIO); 2832 if (bif_dlt == NULL && bif_hdrlen == NULL) 2833 return (0); 2834 2835 if (bif_dlt != NULL) 2836 *bif_dlt = bp->bif_dlt; 2837 if (bif_hdrlen != NULL) 2838 *bif_hdrlen = bp->bif_hdrlen; 2839 2840 return (0); 2841 } 2842 #endif 2843 2844 /* 2845 * Detach bpf from an interface. This involves detaching each descriptor 2846 * associated with the interface. Notify each descriptor as it's detached 2847 * so that any sleepers wake up and get ENXIO. 2848 */ 2849 void 2850 bpfdetach(struct ifnet *ifp) 2851 { 2852 struct bpf_if *bp, *bp_temp; 2853 struct bpf_d *d; 2854 2855 BPF_LOCK(); 2856 /* Find all bpf_if struct's which reference ifp and detach them. */ 2857 CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) { 2858 if (ifp != bp->bif_ifp) 2859 continue; 2860 2861 CK_LIST_REMOVE(bp, bif_next); 2862 *bp->bif_bpf = (struct bpf_if *)&dead_bpf_if; 2863 2864 CTR4(KTR_NET, 2865 "%s: sheduling free for encap %d (%p) for if %p", 2866 __func__, bp->bif_dlt, bp, ifp); 2867 2868 /* Detach common descriptors */ 2869 while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) { 2870 bpf_detachd_locked(d, true); 2871 } 2872 2873 /* Detach writer-only descriptors */ 2874 while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) { 2875 bpf_detachd_locked(d, true); 2876 } 2877 bpfif_rele(bp); 2878 } 2879 BPF_UNLOCK(); 2880 } 2881 2882 /* 2883 * Get a list of available data link type of the interface. 2884 */ 2885 static int 2886 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2887 { 2888 struct ifnet *ifp; 2889 struct bpf_if *bp; 2890 u_int *lst; 2891 int error, n, n1; 2892 2893 BPF_LOCK_ASSERT(); 2894 2895 ifp = d->bd_bif->bif_ifp; 2896 n1 = 0; 2897 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2898 if (bp->bif_ifp == ifp) 2899 n1++; 2900 } 2901 if (bfl->bfl_list == NULL) { 2902 bfl->bfl_len = n1; 2903 return (0); 2904 } 2905 if (n1 > bfl->bfl_len) 2906 return (ENOMEM); 2907 2908 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); 2909 n = 0; 2910 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2911 if (bp->bif_ifp != ifp) 2912 continue; 2913 lst[n++] = bp->bif_dlt; 2914 } 2915 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); 2916 free(lst, M_TEMP); 2917 bfl->bfl_len = n; 2918 return (error); 2919 } 2920 2921 /* 2922 * Set the data link type of a BPF instance. 2923 */ 2924 static int 2925 bpf_setdlt(struct bpf_d *d, u_int dlt) 2926 { 2927 int error, opromisc; 2928 struct ifnet *ifp; 2929 struct bpf_if *bp; 2930 2931 BPF_LOCK_ASSERT(); 2932 MPASS(d->bd_bif != NULL); 2933 2934 /* 2935 * It is safe to check bd_bif without BPFD_LOCK, it can not be 2936 * changed while we hold global lock. 2937 */ 2938 if (d->bd_bif->bif_dlt == dlt) 2939 return (0); 2940 2941 ifp = d->bd_bif->bif_ifp; 2942 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2943 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 2944 break; 2945 } 2946 if (bp == NULL) 2947 return (EINVAL); 2948 2949 opromisc = d->bd_promisc; 2950 bpf_attachd(d, bp); 2951 if (opromisc) { 2952 error = ifpromisc(bp->bif_ifp, 1); 2953 if (error) 2954 if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n", 2955 __func__, error); 2956 else 2957 d->bd_promisc = 1; 2958 } 2959 return (0); 2960 } 2961 2962 static void 2963 bpf_drvinit(void *unused) 2964 { 2965 struct cdev *dev; 2966 2967 sx_init(&bpf_sx, "bpf global lock"); 2968 CK_LIST_INIT(&bpf_iflist); 2969 2970 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); 2971 /* For compatibility */ 2972 make_dev_alias(dev, "bpf0"); 2973 } 2974 2975 /* 2976 * Zero out the various packet counters associated with all of the bpf 2977 * descriptors. At some point, we will probably want to get a bit more 2978 * granular and allow the user to specify descriptors to be zeroed. 2979 */ 2980 static void 2981 bpf_zero_counters(void) 2982 { 2983 struct bpf_if *bp; 2984 struct bpf_d *bd; 2985 2986 BPF_LOCK(); 2987 /* 2988 * We are protected by global lock here, interfaces and 2989 * descriptors can not be deleted while we hold it. 2990 */ 2991 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2992 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2993 counter_u64_zero(bd->bd_rcount); 2994 counter_u64_zero(bd->bd_dcount); 2995 counter_u64_zero(bd->bd_fcount); 2996 counter_u64_zero(bd->bd_wcount); 2997 counter_u64_zero(bd->bd_wfcount); 2998 counter_u64_zero(bd->bd_zcopy); 2999 } 3000 } 3001 BPF_UNLOCK(); 3002 } 3003 3004 /* 3005 * Fill filter statistics 3006 */ 3007 static void 3008 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 3009 { 3010 3011 BPF_LOCK_ASSERT(); 3012 bzero(d, sizeof(*d)); 3013 d->bd_structsize = sizeof(*d); 3014 d->bd_immediate = bd->bd_immediate; 3015 d->bd_promisc = bd->bd_promisc; 3016 d->bd_hdrcmplt = bd->bd_hdrcmplt; 3017 d->bd_direction = bd->bd_direction; 3018 d->bd_feedback = bd->bd_feedback; 3019 d->bd_async = bd->bd_async; 3020 d->bd_rcount = counter_u64_fetch(bd->bd_rcount); 3021 d->bd_dcount = counter_u64_fetch(bd->bd_dcount); 3022 d->bd_fcount = counter_u64_fetch(bd->bd_fcount); 3023 d->bd_sig = bd->bd_sig; 3024 d->bd_slen = bd->bd_slen; 3025 d->bd_hlen = bd->bd_hlen; 3026 d->bd_bufsize = bd->bd_bufsize; 3027 d->bd_pid = bd->bd_pid; 3028 strlcpy(d->bd_ifname, 3029 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 3030 d->bd_locked = bd->bd_locked; 3031 d->bd_wcount = counter_u64_fetch(bd->bd_wcount); 3032 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); 3033 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); 3034 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); 3035 d->bd_bufmode = bd->bd_bufmode; 3036 } 3037 3038 /* 3039 * Handle `netstat -B' stats request 3040 */ 3041 static int 3042 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 3043 { 3044 static const struct xbpf_d zerostats; 3045 struct xbpf_d *xbdbuf, *xbd, tempstats; 3046 int index, error; 3047 struct bpf_if *bp; 3048 struct bpf_d *bd; 3049 3050 /* 3051 * XXX This is not technically correct. It is possible for non 3052 * privileged users to open bpf devices. It would make sense 3053 * if the users who opened the devices were able to retrieve 3054 * the statistics for them, too. 3055 */ 3056 error = priv_check(req->td, PRIV_NET_BPF); 3057 if (error) 3058 return (error); 3059 /* 3060 * Check to see if the user is requesting that the counters be 3061 * zeroed out. Explicitly check that the supplied data is zeroed, 3062 * as we aren't allowing the user to set the counters currently. 3063 */ 3064 if (req->newptr != NULL) { 3065 if (req->newlen != sizeof(tempstats)) 3066 return (EINVAL); 3067 memset(&tempstats, 0, sizeof(tempstats)); 3068 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); 3069 if (error) 3070 return (error); 3071 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) 3072 return (EINVAL); 3073 bpf_zero_counters(); 3074 return (0); 3075 } 3076 if (req->oldptr == NULL) 3077 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 3078 if (bpf_bpfd_cnt == 0) 3079 return (SYSCTL_OUT(req, 0, 0)); 3080 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 3081 BPF_LOCK(); 3082 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 3083 BPF_UNLOCK(); 3084 free(xbdbuf, M_BPF); 3085 return (ENOMEM); 3086 } 3087 index = 0; 3088 CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) { 3089 /* Send writers-only first */ 3090 CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { 3091 xbd = &xbdbuf[index++]; 3092 bpfstats_fill_xbpf(xbd, bd); 3093 } 3094 CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 3095 xbd = &xbdbuf[index++]; 3096 bpfstats_fill_xbpf(xbd, bd); 3097 } 3098 } 3099 BPF_UNLOCK(); 3100 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 3101 free(xbdbuf, M_BPF); 3102 return (error); 3103 } 3104 3105 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 3106 3107 #else /* !DEV_BPF && !NETGRAPH_BPF */ 3108 3109 /* 3110 * NOP stubs to allow bpf-using drivers to load and function. 3111 * 3112 * A 'better' implementation would allow the core bpf functionality 3113 * to be loaded at runtime. 3114 */ 3115 3116 void 3117 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 3118 { 3119 } 3120 3121 void 3122 bpf_tap_if(if_t ifp, u_char *pkt, u_int pktlen) 3123 { 3124 } 3125 3126 void 3127 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 3128 { 3129 } 3130 3131 void 3132 bpf_mtap_if(if_t ifp, struct mbuf *m) 3133 { 3134 } 3135 3136 void 3137 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 3138 { 3139 } 3140 3141 void 3142 bpf_mtap2_if(if_t ifp, void *data, u_int dlen, struct mbuf *m) 3143 { 3144 } 3145 3146 void 3147 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 3148 { 3149 3150 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 3151 } 3152 3153 void 3154 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 3155 { 3156 3157 *driverp = (struct bpf_if *)&dead_bpf_if; 3158 } 3159 3160 void 3161 bpfdetach(struct ifnet *ifp) 3162 { 3163 } 3164 3165 u_int 3166 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 3167 { 3168 return -1; /* "no filter" behaviour */ 3169 } 3170 3171 int 3172 bpf_validate(const struct bpf_insn *f, int len) 3173 { 3174 return 0; /* false */ 3175 } 3176 3177 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 3178 3179 #ifdef DDB 3180 static void 3181 bpf_show_bpf_if(struct bpf_if *bpf_if) 3182 { 3183 3184 if (bpf_if == NULL) 3185 return; 3186 db_printf("%p:\n", bpf_if); 3187 #define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e); 3188 #define BPF_DB_PRINTF_RAW(f, e) db_printf(" %s = " f "\n", #e, e); 3189 /* bif_ext.bif_next */ 3190 /* bif_ext.bif_dlist */ 3191 BPF_DB_PRINTF("%#x", bif_dlt); 3192 BPF_DB_PRINTF("%u", bif_hdrlen); 3193 /* bif_wlist */ 3194 BPF_DB_PRINTF("%p", bif_ifp); 3195 BPF_DB_PRINTF("%p", bif_bpf); 3196 BPF_DB_PRINTF_RAW("%u", refcount_load(&bpf_if->bif_refcnt)); 3197 } 3198 3199 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if) 3200 { 3201 3202 if (!have_addr) { 3203 db_printf("usage: show bpf_if <struct bpf_if *>\n"); 3204 return; 3205 } 3206 3207 bpf_show_bpf_if((struct bpf_if *)addr); 3208 } 3209 #endif 3210