1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_bpf.h" 43 #include "opt_ddb.h" 44 #include "opt_netgraph.h" 45 46 #include <sys/types.h> 47 #include <sys/param.h> 48 #include <sys/lock.h> 49 #include <sys/rwlock.h> 50 #include <sys/systm.h> 51 #include <sys/conf.h> 52 #include <sys/fcntl.h> 53 #include <sys/jail.h> 54 #include <sys/malloc.h> 55 #include <sys/mbuf.h> 56 #include <sys/time.h> 57 #include <sys/priv.h> 58 #include <sys/proc.h> 59 #include <sys/signalvar.h> 60 #include <sys/filio.h> 61 #include <sys/sockio.h> 62 #include <sys/ttycom.h> 63 #include <sys/uio.h> 64 #include <sys/sysent.h> 65 66 #include <sys/event.h> 67 #include <sys/file.h> 68 #include <sys/poll.h> 69 #include <sys/proc.h> 70 71 #include <sys/socket.h> 72 73 #ifdef DDB 74 #include <ddb/ddb.h> 75 #endif 76 77 #include <net/if.h> 78 #include <net/if_var.h> 79 #include <net/if_dl.h> 80 #include <net/bpf.h> 81 #include <net/bpf_buffer.h> 82 #ifdef BPF_JITTER 83 #include <net/bpf_jitter.h> 84 #endif 85 #include <net/bpf_zerocopy.h> 86 #include <net/bpfdesc.h> 87 #include <net/route.h> 88 #include <net/vnet.h> 89 90 #include <netinet/in.h> 91 #include <netinet/if_ether.h> 92 #include <sys/kernel.h> 93 #include <sys/sysctl.h> 94 95 #include <net80211/ieee80211_freebsd.h> 96 97 #include <security/mac/mac_framework.h> 98 99 MALLOC_DEFINE(M_BPF, "BPF", "BPF data"); 100 101 struct bpf_if { 102 #define bif_next bif_ext.bif_next 103 #define bif_dlist bif_ext.bif_dlist 104 struct bpf_if_ext bif_ext; /* public members */ 105 u_int bif_dlt; /* link layer type */ 106 u_int bif_hdrlen; /* length of link header */ 107 struct ifnet *bif_ifp; /* corresponding interface */ 108 struct rwlock bif_lock; /* interface lock */ 109 LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */ 110 int bif_flags; /* Interface flags */ 111 struct bpf_if **bif_bpf; /* Pointer to pointer to us */ 112 }; 113 114 CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); 115 116 #if defined(DEV_BPF) || defined(NETGRAPH_BPF) 117 118 #define PRINET 26 /* interruptible */ 119 120 #define SIZEOF_BPF_HDR(type) \ 121 (offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen)) 122 123 #ifdef COMPAT_FREEBSD32 124 #include <sys/mount.h> 125 #include <compat/freebsd32/freebsd32.h> 126 #define BPF_ALIGNMENT32 sizeof(int32_t) 127 #define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32) 128 129 #ifndef BURN_BRIDGES 130 /* 131 * 32-bit version of structure prepended to each packet. We use this header 132 * instead of the standard one for 32-bit streams. We mark the a stream as 133 * 32-bit the first time we see a 32-bit compat ioctl request. 134 */ 135 struct bpf_hdr32 { 136 struct timeval32 bh_tstamp; /* time stamp */ 137 uint32_t bh_caplen; /* length of captured portion */ 138 uint32_t bh_datalen; /* original length of packet */ 139 uint16_t bh_hdrlen; /* length of bpf header (this struct 140 plus alignment padding) */ 141 }; 142 #endif 143 144 struct bpf_program32 { 145 u_int bf_len; 146 uint32_t bf_insns; 147 }; 148 149 struct bpf_dltlist32 { 150 u_int bfl_len; 151 u_int bfl_list; 152 }; 153 154 #define BIOCSETF32 _IOW('B', 103, struct bpf_program32) 155 #define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32) 156 #define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32) 157 #define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32) 158 #define BIOCSETWF32 _IOW('B', 123, struct bpf_program32) 159 #define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32) 160 #endif 161 162 #define BPF_LOCK() sx_xlock(&bpf_sx) 163 #define BPF_UNLOCK() sx_xunlock(&bpf_sx) 164 #define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED) 165 /* 166 * bpf_iflist is a list of BPF interface structures, each corresponding to a 167 * specific DLT. The same network interface might have several BPF interface 168 * structures registered by different layers in the stack (i.e., 802.11 169 * frames, ethernet frames, etc). 170 */ 171 static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist; 172 static struct sx bpf_sx; /* bpf global lock */ 173 static int bpf_bpfd_cnt; 174 175 static void bpf_attachd(struct bpf_d *, struct bpf_if *); 176 static void bpf_detachd(struct bpf_d *); 177 static void bpf_detachd_locked(struct bpf_d *); 178 static void bpf_freed(struct bpf_d *); 179 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, 180 struct sockaddr *, int *, struct bpf_d *); 181 static int bpf_setif(struct bpf_d *, struct ifreq *); 182 static void bpf_timed_out(void *); 183 static __inline void 184 bpf_wakeup(struct bpf_d *); 185 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, 186 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int), 187 struct bintime *); 188 static void reset_d(struct bpf_d *); 189 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd); 190 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 191 static int bpf_setdlt(struct bpf_d *, u_int); 192 static void filt_bpfdetach(struct knote *); 193 static int filt_bpfread(struct knote *, long); 194 static void bpf_drvinit(void *); 195 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS); 196 197 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl"); 198 int bpf_maxinsns = BPF_MAXINSNS; 199 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW, 200 &bpf_maxinsns, 0, "Maximum bpf program instructions"); 201 static int bpf_zerocopy_enable = 0; 202 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, 203 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions"); 204 static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, 205 bpf_stats_sysctl, "bpf statistics portal"); 206 207 static VNET_DEFINE(int, bpf_optimize_writers) = 0; 208 #define V_bpf_optimize_writers VNET(bpf_optimize_writers) 209 SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW, 210 &VNET_NAME(bpf_optimize_writers), 0, 211 "Do not send packets until BPF program is set"); 212 213 static d_open_t bpfopen; 214 static d_read_t bpfread; 215 static d_write_t bpfwrite; 216 static d_ioctl_t bpfioctl; 217 static d_poll_t bpfpoll; 218 static d_kqfilter_t bpfkqfilter; 219 220 static struct cdevsw bpf_cdevsw = { 221 .d_version = D_VERSION, 222 .d_open = bpfopen, 223 .d_read = bpfread, 224 .d_write = bpfwrite, 225 .d_ioctl = bpfioctl, 226 .d_poll = bpfpoll, 227 .d_name = "bpf", 228 .d_kqfilter = bpfkqfilter, 229 }; 230 231 static struct filterops bpfread_filtops = { 232 .f_isfd = 1, 233 .f_detach = filt_bpfdetach, 234 .f_event = filt_bpfread, 235 }; 236 237 eventhandler_tag bpf_ifdetach_cookie = NULL; 238 239 /* 240 * LOCKING MODEL USED BY BPF: 241 * Locks: 242 * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal, 243 * some global counters and every bpf_if reference. 244 * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters. 245 * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields 246 * used by bpf_mtap code. 247 * 248 * Lock order: 249 * 250 * Global lock, interface lock, descriptor lock 251 * 252 * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2] 253 * working model. In many places (like bpf_detachd) we start with BPF descriptor 254 * (and we need to at least rlock it to get reliable interface pointer). This 255 * gives us potential LOR. As a result, we use global lock to protect from bpf_if 256 * change in every such place. 257 * 258 * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and 259 * 3) descriptor main wlock. 260 * Reading bd_bif can be protected by any of these locks, typically global lock. 261 * 262 * Changing read/write BPF filter is protected by the same three locks, 263 * the same applies for reading. 264 * 265 * Sleeping in global lock is not allowed due to bpfdetach() using it. 266 */ 267 268 /* 269 * Wrapper functions for various buffering methods. If the set of buffer 270 * modes expands, we will probably want to introduce a switch data structure 271 * similar to protosw, et. 272 */ 273 static void 274 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 275 u_int len) 276 { 277 278 BPFD_LOCK_ASSERT(d); 279 280 switch (d->bd_bufmode) { 281 case BPF_BUFMODE_BUFFER: 282 return (bpf_buffer_append_bytes(d, buf, offset, src, len)); 283 284 case BPF_BUFMODE_ZBUF: 285 counter_u64_add(d->bd_zcopy, 1); 286 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len)); 287 288 default: 289 panic("bpf_buf_append_bytes"); 290 } 291 } 292 293 static void 294 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, 295 u_int len) 296 { 297 298 BPFD_LOCK_ASSERT(d); 299 300 switch (d->bd_bufmode) { 301 case BPF_BUFMODE_BUFFER: 302 return (bpf_buffer_append_mbuf(d, buf, offset, src, len)); 303 304 case BPF_BUFMODE_ZBUF: 305 counter_u64_add(d->bd_zcopy, 1); 306 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len)); 307 308 default: 309 panic("bpf_buf_append_mbuf"); 310 } 311 } 312 313 /* 314 * This function gets called when the free buffer is re-assigned. 315 */ 316 static void 317 bpf_buf_reclaimed(struct bpf_d *d) 318 { 319 320 BPFD_LOCK_ASSERT(d); 321 322 switch (d->bd_bufmode) { 323 case BPF_BUFMODE_BUFFER: 324 return; 325 326 case BPF_BUFMODE_ZBUF: 327 bpf_zerocopy_buf_reclaimed(d); 328 return; 329 330 default: 331 panic("bpf_buf_reclaimed"); 332 } 333 } 334 335 /* 336 * If the buffer mechanism has a way to decide that a held buffer can be made 337 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is 338 * returned if the buffer can be discarded, (0) is returned if it cannot. 339 */ 340 static int 341 bpf_canfreebuf(struct bpf_d *d) 342 { 343 344 BPFD_LOCK_ASSERT(d); 345 346 switch (d->bd_bufmode) { 347 case BPF_BUFMODE_ZBUF: 348 return (bpf_zerocopy_canfreebuf(d)); 349 } 350 return (0); 351 } 352 353 /* 354 * Allow the buffer model to indicate that the current store buffer is 355 * immutable, regardless of the appearance of space. Return (1) if the 356 * buffer is writable, and (0) if not. 357 */ 358 static int 359 bpf_canwritebuf(struct bpf_d *d) 360 { 361 BPFD_LOCK_ASSERT(d); 362 363 switch (d->bd_bufmode) { 364 case BPF_BUFMODE_ZBUF: 365 return (bpf_zerocopy_canwritebuf(d)); 366 } 367 return (1); 368 } 369 370 /* 371 * Notify buffer model that an attempt to write to the store buffer has 372 * resulted in a dropped packet, in which case the buffer may be considered 373 * full. 374 */ 375 static void 376 bpf_buffull(struct bpf_d *d) 377 { 378 379 BPFD_LOCK_ASSERT(d); 380 381 switch (d->bd_bufmode) { 382 case BPF_BUFMODE_ZBUF: 383 bpf_zerocopy_buffull(d); 384 break; 385 } 386 } 387 388 /* 389 * Notify the buffer model that a buffer has moved into the hold position. 390 */ 391 void 392 bpf_bufheld(struct bpf_d *d) 393 { 394 395 BPFD_LOCK_ASSERT(d); 396 397 switch (d->bd_bufmode) { 398 case BPF_BUFMODE_ZBUF: 399 bpf_zerocopy_bufheld(d); 400 break; 401 } 402 } 403 404 static void 405 bpf_free(struct bpf_d *d) 406 { 407 408 switch (d->bd_bufmode) { 409 case BPF_BUFMODE_BUFFER: 410 return (bpf_buffer_free(d)); 411 412 case BPF_BUFMODE_ZBUF: 413 return (bpf_zerocopy_free(d)); 414 415 default: 416 panic("bpf_buf_free"); 417 } 418 } 419 420 static int 421 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio) 422 { 423 424 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 425 return (EOPNOTSUPP); 426 return (bpf_buffer_uiomove(d, buf, len, uio)); 427 } 428 429 static int 430 bpf_ioctl_sblen(struct bpf_d *d, u_int *i) 431 { 432 433 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) 434 return (EOPNOTSUPP); 435 return (bpf_buffer_ioctl_sblen(d, i)); 436 } 437 438 static int 439 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i) 440 { 441 442 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 443 return (EOPNOTSUPP); 444 return (bpf_zerocopy_ioctl_getzmax(td, d, i)); 445 } 446 447 static int 448 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 449 { 450 451 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 452 return (EOPNOTSUPP); 453 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz)); 454 } 455 456 static int 457 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz) 458 { 459 460 if (d->bd_bufmode != BPF_BUFMODE_ZBUF) 461 return (EOPNOTSUPP); 462 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz)); 463 } 464 465 /* 466 * General BPF functions. 467 */ 468 static int 469 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, 470 struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) 471 { 472 const struct ieee80211_bpf_params *p; 473 struct ether_header *eh; 474 struct mbuf *m; 475 int error; 476 int len; 477 int hlen; 478 int slen; 479 480 /* 481 * Build a sockaddr based on the data link layer type. 482 * We do this at this level because the ethernet header 483 * is copied directly into the data field of the sockaddr. 484 * In the case of SLIP, there is no header and the packet 485 * is forwarded as is. 486 * Also, we are careful to leave room at the front of the mbuf 487 * for the link level header. 488 */ 489 switch (linktype) { 490 491 case DLT_SLIP: 492 sockp->sa_family = AF_INET; 493 hlen = 0; 494 break; 495 496 case DLT_EN10MB: 497 sockp->sa_family = AF_UNSPEC; 498 /* XXX Would MAXLINKHDR be better? */ 499 hlen = ETHER_HDR_LEN; 500 break; 501 502 case DLT_FDDI: 503 sockp->sa_family = AF_IMPLINK; 504 hlen = 0; 505 break; 506 507 case DLT_RAW: 508 sockp->sa_family = AF_UNSPEC; 509 hlen = 0; 510 break; 511 512 case DLT_NULL: 513 /* 514 * null interface types require a 4 byte pseudo header which 515 * corresponds to the address family of the packet. 516 */ 517 sockp->sa_family = AF_UNSPEC; 518 hlen = 4; 519 break; 520 521 case DLT_ATM_RFC1483: 522 /* 523 * en atm driver requires 4-byte atm pseudo header. 524 * though it isn't standard, vpi:vci needs to be 525 * specified anyway. 526 */ 527 sockp->sa_family = AF_UNSPEC; 528 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ 529 break; 530 531 case DLT_PPP: 532 sockp->sa_family = AF_UNSPEC; 533 hlen = 4; /* This should match PPP_HDRLEN */ 534 break; 535 536 case DLT_IEEE802_11: /* IEEE 802.11 wireless */ 537 sockp->sa_family = AF_IEEE80211; 538 hlen = 0; 539 break; 540 541 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */ 542 sockp->sa_family = AF_IEEE80211; 543 sockp->sa_len = 12; /* XXX != 0 */ 544 hlen = sizeof(struct ieee80211_bpf_params); 545 break; 546 547 default: 548 return (EIO); 549 } 550 551 len = uio->uio_resid; 552 if (len < hlen || len - hlen > ifp->if_mtu) 553 return (EMSGSIZE); 554 555 m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR); 556 if (m == NULL) 557 return (EIO); 558 m->m_pkthdr.len = m->m_len = len; 559 *mp = m; 560 561 error = uiomove(mtod(m, u_char *), len, uio); 562 if (error) 563 goto bad; 564 565 slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); 566 if (slen == 0) { 567 error = EPERM; 568 goto bad; 569 } 570 571 /* Check for multicast destination */ 572 switch (linktype) { 573 case DLT_EN10MB: 574 eh = mtod(m, struct ether_header *); 575 if (ETHER_IS_MULTICAST(eh->ether_dhost)) { 576 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, 577 ETHER_ADDR_LEN) == 0) 578 m->m_flags |= M_BCAST; 579 else 580 m->m_flags |= M_MCAST; 581 } 582 if (d->bd_hdrcmplt == 0) { 583 memcpy(eh->ether_shost, IF_LLADDR(ifp), 584 sizeof(eh->ether_shost)); 585 } 586 break; 587 } 588 589 /* 590 * Make room for link header, and copy it to sockaddr 591 */ 592 if (hlen != 0) { 593 if (sockp->sa_family == AF_IEEE80211) { 594 /* 595 * Collect true length from the parameter header 596 * NB: sockp is known to be zero'd so if we do a 597 * short copy unspecified parameters will be 598 * zero. 599 * NB: packet may not be aligned after stripping 600 * bpf params 601 * XXX check ibp_vers 602 */ 603 p = mtod(m, const struct ieee80211_bpf_params *); 604 hlen = p->ibp_len; 605 if (hlen > sizeof(sockp->sa_data)) { 606 error = EINVAL; 607 goto bad; 608 } 609 } 610 bcopy(mtod(m, const void *), sockp->sa_data, hlen); 611 } 612 *hdrlen = hlen; 613 614 return (0); 615 bad: 616 m_freem(m); 617 return (error); 618 } 619 620 /* 621 * Attach file to the bpf interface, i.e. make d listen on bp. 622 */ 623 static void 624 bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 625 { 626 int op_w; 627 628 BPF_LOCK_ASSERT(); 629 630 /* 631 * Save sysctl value to protect from sysctl change 632 * between reads 633 */ 634 op_w = V_bpf_optimize_writers || d->bd_writer; 635 636 if (d->bd_bif != NULL) 637 bpf_detachd_locked(d); 638 /* 639 * Point d at bp, and add d to the interface's list. 640 * Since there are many applications using BPF for 641 * sending raw packets only (dhcpd, cdpd are good examples) 642 * we can delay adding d to the list of active listeners until 643 * some filter is configured. 644 */ 645 646 BPFIF_WLOCK(bp); 647 BPFD_LOCK(d); 648 649 d->bd_bif = bp; 650 651 if (op_w != 0) { 652 /* Add to writers-only list */ 653 LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next); 654 /* 655 * We decrement bd_writer on every filter set operation. 656 * First BIOCSETF is done by pcap_open_live() to set up 657 * snap length. After that appliation usually sets its own filter 658 */ 659 d->bd_writer = 2; 660 } else 661 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 662 663 BPFD_UNLOCK(d); 664 BPFIF_WUNLOCK(bp); 665 666 bpf_bpfd_cnt++; 667 668 CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list", 669 __func__, d->bd_pid, d->bd_writer ? "writer" : "active"); 670 671 if (op_w == 0) 672 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 673 } 674 675 /* 676 * Check if we need to upgrade our descriptor @d from write-only mode. 677 */ 678 static int 679 bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen) 680 { 681 int is_snap, need_upgrade; 682 683 /* 684 * Check if we've already upgraded or new filter is empty. 685 */ 686 if (d->bd_writer == 0 || fcode == NULL) 687 return (0); 688 689 need_upgrade = 0; 690 691 /* 692 * Check if cmd looks like snaplen setting from 693 * pcap_bpf.c:pcap_open_live(). 694 * Note we're not checking .k value here: 695 * while pcap_open_live() definitely sets to non-zero value, 696 * we'd prefer to treat k=0 (deny ALL) case the same way: e.g. 697 * do not consider upgrading immediately 698 */ 699 if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K)) 700 is_snap = 1; 701 else 702 is_snap = 0; 703 704 if (is_snap == 0) { 705 /* 706 * We're setting first filter and it doesn't look like 707 * setting snaplen. We're probably using bpf directly. 708 * Upgrade immediately. 709 */ 710 need_upgrade = 1; 711 } else { 712 /* 713 * Do not require upgrade by first BIOCSETF 714 * (used to set snaplen) by pcap_open_live(). 715 */ 716 717 if (--d->bd_writer == 0) { 718 /* 719 * First snaplen filter has already 720 * been set. This is probably catch-all 721 * filter 722 */ 723 need_upgrade = 1; 724 } 725 } 726 727 CTR5(KTR_NET, 728 "%s: filter function set by pid %d, " 729 "bd_writer counter %d, snap %d upgrade %d", 730 __func__, d->bd_pid, d->bd_writer, 731 is_snap, need_upgrade); 732 733 return (need_upgrade); 734 } 735 736 /* 737 * Add d to the list of active bp filters. 738 * Requires bpf_attachd() to be called before. 739 */ 740 static void 741 bpf_upgraded(struct bpf_d *d) 742 { 743 struct bpf_if *bp; 744 745 BPF_LOCK_ASSERT(); 746 747 bp = d->bd_bif; 748 749 /* 750 * Filter can be set several times without specifying interface. 751 * Mark d as reader and exit. 752 */ 753 if (bp == NULL) { 754 BPFD_LOCK(d); 755 d->bd_writer = 0; 756 BPFD_UNLOCK(d); 757 return; 758 } 759 760 BPFIF_WLOCK(bp); 761 BPFD_LOCK(d); 762 763 /* Remove from writers-only list */ 764 LIST_REMOVE(d, bd_next); 765 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next); 766 /* Mark d as reader */ 767 d->bd_writer = 0; 768 769 BPFD_UNLOCK(d); 770 BPFIF_WUNLOCK(bp); 771 772 CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid); 773 774 EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1); 775 } 776 777 /* 778 * Detach a file from its interface. 779 */ 780 static void 781 bpf_detachd(struct bpf_d *d) 782 { 783 BPF_LOCK(); 784 bpf_detachd_locked(d); 785 BPF_UNLOCK(); 786 } 787 788 static void 789 bpf_detachd_locked(struct bpf_d *d) 790 { 791 int error; 792 struct bpf_if *bp; 793 struct ifnet *ifp; 794 795 CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid); 796 797 BPF_LOCK_ASSERT(); 798 799 /* Check if descriptor is attached */ 800 if ((bp = d->bd_bif) == NULL) 801 return; 802 803 BPFIF_WLOCK(bp); 804 BPFD_LOCK(d); 805 806 /* Save bd_writer value */ 807 error = d->bd_writer; 808 809 /* 810 * Remove d from the interface's descriptor list. 811 */ 812 LIST_REMOVE(d, bd_next); 813 814 ifp = bp->bif_ifp; 815 d->bd_bif = NULL; 816 BPFD_UNLOCK(d); 817 BPFIF_WUNLOCK(bp); 818 819 bpf_bpfd_cnt--; 820 821 /* Call event handler iff d is attached */ 822 if (error == 0) 823 EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0); 824 825 /* 826 * Check if this descriptor had requested promiscuous mode. 827 * If so, turn it off. 828 */ 829 if (d->bd_promisc) { 830 d->bd_promisc = 0; 831 CURVNET_SET(ifp->if_vnet); 832 error = ifpromisc(ifp, 0); 833 CURVNET_RESTORE(); 834 if (error != 0 && error != ENXIO) { 835 /* 836 * ENXIO can happen if a pccard is unplugged 837 * Something is really wrong if we were able to put 838 * the driver into promiscuous mode, but can't 839 * take it out. 840 */ 841 if_printf(bp->bif_ifp, 842 "bpf_detach: ifpromisc failed (%d)\n", error); 843 } 844 } 845 } 846 847 /* 848 * Close the descriptor by detaching it from its interface, 849 * deallocating its buffers, and marking it free. 850 */ 851 static void 852 bpf_dtor(void *data) 853 { 854 struct bpf_d *d = data; 855 856 BPFD_LOCK(d); 857 if (d->bd_state == BPF_WAITING) 858 callout_stop(&d->bd_callout); 859 d->bd_state = BPF_IDLE; 860 BPFD_UNLOCK(d); 861 funsetown(&d->bd_sigio); 862 bpf_detachd(d); 863 #ifdef MAC 864 mac_bpfdesc_destroy(d); 865 #endif /* MAC */ 866 seldrain(&d->bd_sel); 867 knlist_destroy(&d->bd_sel.si_note); 868 callout_drain(&d->bd_callout); 869 bpf_freed(d); 870 free(d, M_BPF); 871 } 872 873 /* 874 * Open ethernet device. Returns ENXIO for illegal minor device number, 875 * EBUSY if file is open by another process. 876 */ 877 /* ARGSUSED */ 878 static int 879 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td) 880 { 881 struct bpf_d *d; 882 int error; 883 884 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO); 885 error = devfs_set_cdevpriv(d, bpf_dtor); 886 if (error != 0) { 887 free(d, M_BPF); 888 return (error); 889 } 890 891 /* Setup counters */ 892 d->bd_rcount = counter_u64_alloc(M_WAITOK); 893 d->bd_dcount = counter_u64_alloc(M_WAITOK); 894 d->bd_fcount = counter_u64_alloc(M_WAITOK); 895 d->bd_wcount = counter_u64_alloc(M_WAITOK); 896 d->bd_wfcount = counter_u64_alloc(M_WAITOK); 897 d->bd_wdcount = counter_u64_alloc(M_WAITOK); 898 d->bd_zcopy = counter_u64_alloc(M_WAITOK); 899 900 /* 901 * For historical reasons, perform a one-time initialization call to 902 * the buffer routines, even though we're not yet committed to a 903 * particular buffer method. 904 */ 905 bpf_buffer_init(d); 906 if ((flags & FREAD) == 0) 907 d->bd_writer = 2; 908 d->bd_hbuf_in_use = 0; 909 d->bd_bufmode = BPF_BUFMODE_BUFFER; 910 d->bd_sig = SIGIO; 911 d->bd_direction = BPF_D_INOUT; 912 BPF_PID_REFRESH(d, td); 913 #ifdef MAC 914 mac_bpfdesc_init(d); 915 mac_bpfdesc_create(td->td_ucred, d); 916 #endif 917 mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); 918 callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); 919 knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); 920 921 return (0); 922 } 923 924 /* 925 * bpfread - read next chunk of packets from buffers 926 */ 927 static int 928 bpfread(struct cdev *dev, struct uio *uio, int ioflag) 929 { 930 struct bpf_d *d; 931 int error; 932 int non_block; 933 int timed_out; 934 935 error = devfs_get_cdevpriv((void **)&d); 936 if (error != 0) 937 return (error); 938 939 /* 940 * Restrict application to use a buffer the same size as 941 * as kernel buffers. 942 */ 943 if (uio->uio_resid != d->bd_bufsize) 944 return (EINVAL); 945 946 non_block = ((ioflag & O_NONBLOCK) != 0); 947 948 BPFD_LOCK(d); 949 BPF_PID_REFRESH_CUR(d); 950 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) { 951 BPFD_UNLOCK(d); 952 return (EOPNOTSUPP); 953 } 954 if (d->bd_state == BPF_WAITING) 955 callout_stop(&d->bd_callout); 956 timed_out = (d->bd_state == BPF_TIMED_OUT); 957 d->bd_state = BPF_IDLE; 958 while (d->bd_hbuf_in_use) { 959 error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 960 PRINET|PCATCH, "bd_hbuf", 0); 961 if (error != 0) { 962 BPFD_UNLOCK(d); 963 return (error); 964 } 965 } 966 /* 967 * If the hold buffer is empty, then do a timed sleep, which 968 * ends when the timeout expires or when enough packets 969 * have arrived to fill the store buffer. 970 */ 971 while (d->bd_hbuf == NULL) { 972 if (d->bd_slen != 0) { 973 /* 974 * A packet(s) either arrived since the previous 975 * read or arrived while we were asleep. 976 */ 977 if (d->bd_immediate || non_block || timed_out) { 978 /* 979 * Rotate the buffers and return what's here 980 * if we are in immediate mode, non-blocking 981 * flag is set, or this descriptor timed out. 982 */ 983 ROTATE_BUFFERS(d); 984 break; 985 } 986 } 987 988 /* 989 * No data is available, check to see if the bpf device 990 * is still pointed at a real interface. If not, return 991 * ENXIO so that the userland process knows to rebind 992 * it before using it again. 993 */ 994 if (d->bd_bif == NULL) { 995 BPFD_UNLOCK(d); 996 return (ENXIO); 997 } 998 999 if (non_block) { 1000 BPFD_UNLOCK(d); 1001 return (EWOULDBLOCK); 1002 } 1003 error = msleep(d, &d->bd_lock, PRINET|PCATCH, 1004 "bpf", d->bd_rtout); 1005 if (error == EINTR || error == ERESTART) { 1006 BPFD_UNLOCK(d); 1007 return (error); 1008 } 1009 if (error == EWOULDBLOCK) { 1010 /* 1011 * On a timeout, return what's in the buffer, 1012 * which may be nothing. If there is something 1013 * in the store buffer, we can rotate the buffers. 1014 */ 1015 if (d->bd_hbuf) 1016 /* 1017 * We filled up the buffer in between 1018 * getting the timeout and arriving 1019 * here, so we don't need to rotate. 1020 */ 1021 break; 1022 1023 if (d->bd_slen == 0) { 1024 BPFD_UNLOCK(d); 1025 return (0); 1026 } 1027 ROTATE_BUFFERS(d); 1028 break; 1029 } 1030 } 1031 /* 1032 * At this point, we know we have something in the hold slot. 1033 */ 1034 d->bd_hbuf_in_use = 1; 1035 BPFD_UNLOCK(d); 1036 1037 /* 1038 * Move data from hold buffer into user space. 1039 * We know the entire buffer is transferred since 1040 * we checked above that the read buffer is bpf_bufsize bytes. 1041 * 1042 * We do not have to worry about simultaneous reads because 1043 * we waited for sole access to the hold buffer above. 1044 */ 1045 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio); 1046 1047 BPFD_LOCK(d); 1048 KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf")); 1049 d->bd_fbuf = d->bd_hbuf; 1050 d->bd_hbuf = NULL; 1051 d->bd_hlen = 0; 1052 bpf_buf_reclaimed(d); 1053 d->bd_hbuf_in_use = 0; 1054 wakeup(&d->bd_hbuf_in_use); 1055 BPFD_UNLOCK(d); 1056 1057 return (error); 1058 } 1059 1060 /* 1061 * If there are processes sleeping on this descriptor, wake them up. 1062 */ 1063 static __inline void 1064 bpf_wakeup(struct bpf_d *d) 1065 { 1066 1067 BPFD_LOCK_ASSERT(d); 1068 if (d->bd_state == BPF_WAITING) { 1069 callout_stop(&d->bd_callout); 1070 d->bd_state = BPF_IDLE; 1071 } 1072 wakeup(d); 1073 if (d->bd_async && d->bd_sig && d->bd_sigio) 1074 pgsigio(&d->bd_sigio, d->bd_sig, 0); 1075 1076 selwakeuppri(&d->bd_sel, PRINET); 1077 KNOTE_LOCKED(&d->bd_sel.si_note, 0); 1078 } 1079 1080 static void 1081 bpf_timed_out(void *arg) 1082 { 1083 struct bpf_d *d = (struct bpf_d *)arg; 1084 1085 BPFD_LOCK_ASSERT(d); 1086 1087 if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout)) 1088 return; 1089 if (d->bd_state == BPF_WAITING) { 1090 d->bd_state = BPF_TIMED_OUT; 1091 if (d->bd_slen != 0) 1092 bpf_wakeup(d); 1093 } 1094 } 1095 1096 static int 1097 bpf_ready(struct bpf_d *d) 1098 { 1099 1100 BPFD_LOCK_ASSERT(d); 1101 1102 if (!bpf_canfreebuf(d) && d->bd_hlen != 0) 1103 return (1); 1104 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1105 d->bd_slen != 0) 1106 return (1); 1107 return (0); 1108 } 1109 1110 static int 1111 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) 1112 { 1113 struct bpf_d *d; 1114 struct ifnet *ifp; 1115 struct mbuf *m, *mc; 1116 struct sockaddr dst; 1117 struct route ro; 1118 int error, hlen; 1119 1120 error = devfs_get_cdevpriv((void **)&d); 1121 if (error != 0) 1122 return (error); 1123 1124 BPF_PID_REFRESH_CUR(d); 1125 counter_u64_add(d->bd_wcount, 1); 1126 /* XXX: locking required */ 1127 if (d->bd_bif == NULL) { 1128 counter_u64_add(d->bd_wdcount, 1); 1129 return (ENXIO); 1130 } 1131 1132 ifp = d->bd_bif->bif_ifp; 1133 1134 if ((ifp->if_flags & IFF_UP) == 0) { 1135 counter_u64_add(d->bd_wdcount, 1); 1136 return (ENETDOWN); 1137 } 1138 1139 if (uio->uio_resid == 0) { 1140 counter_u64_add(d->bd_wdcount, 1); 1141 return (0); 1142 } 1143 1144 bzero(&dst, sizeof(dst)); 1145 m = NULL; 1146 hlen = 0; 1147 /* XXX: bpf_movein() can sleep */ 1148 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, 1149 &m, &dst, &hlen, d); 1150 if (error) { 1151 counter_u64_add(d->bd_wdcount, 1); 1152 return (error); 1153 } 1154 counter_u64_add(d->bd_wfcount, 1); 1155 if (d->bd_hdrcmplt) 1156 dst.sa_family = pseudo_AF_HDRCMPLT; 1157 1158 if (d->bd_feedback) { 1159 mc = m_dup(m, M_NOWAIT); 1160 if (mc != NULL) 1161 mc->m_pkthdr.rcvif = ifp; 1162 /* Set M_PROMISC for outgoing packets to be discarded. */ 1163 if (d->bd_direction == BPF_D_INOUT) 1164 m->m_flags |= M_PROMISC; 1165 } else 1166 mc = NULL; 1167 1168 m->m_pkthdr.len -= hlen; 1169 m->m_len -= hlen; 1170 m->m_data += hlen; /* XXX */ 1171 1172 CURVNET_SET(ifp->if_vnet); 1173 #ifdef MAC 1174 BPFD_LOCK(d); 1175 mac_bpfdesc_create_mbuf(d, m); 1176 if (mc != NULL) 1177 mac_bpfdesc_create_mbuf(d, mc); 1178 BPFD_UNLOCK(d); 1179 #endif 1180 1181 bzero(&ro, sizeof(ro)); 1182 if (hlen != 0) { 1183 ro.ro_prepend = (u_char *)&dst.sa_data; 1184 ro.ro_plen = hlen; 1185 ro.ro_flags = RT_HAS_HEADER; 1186 } 1187 1188 error = (*ifp->if_output)(ifp, m, &dst, &ro); 1189 if (error) 1190 counter_u64_add(d->bd_wdcount, 1); 1191 1192 if (mc != NULL) { 1193 if (error == 0) 1194 (*ifp->if_input)(ifp, mc); 1195 else 1196 m_freem(mc); 1197 } 1198 CURVNET_RESTORE(); 1199 1200 return (error); 1201 } 1202 1203 /* 1204 * Reset a descriptor by flushing its packet buffer and clearing the receive 1205 * and drop counts. This is doable for kernel-only buffers, but with 1206 * zero-copy buffers, we can't write to (or rotate) buffers that are 1207 * currently owned by userspace. It would be nice if we could encapsulate 1208 * this logic in the buffer code rather than here. 1209 */ 1210 static void 1211 reset_d(struct bpf_d *d) 1212 { 1213 1214 BPFD_LOCK_ASSERT(d); 1215 1216 while (d->bd_hbuf_in_use) 1217 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, 1218 "bd_hbuf", 0); 1219 if ((d->bd_hbuf != NULL) && 1220 (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) { 1221 /* Free the hold buffer. */ 1222 d->bd_fbuf = d->bd_hbuf; 1223 d->bd_hbuf = NULL; 1224 d->bd_hlen = 0; 1225 bpf_buf_reclaimed(d); 1226 } 1227 if (bpf_canwritebuf(d)) 1228 d->bd_slen = 0; 1229 counter_u64_zero(d->bd_rcount); 1230 counter_u64_zero(d->bd_dcount); 1231 counter_u64_zero(d->bd_fcount); 1232 counter_u64_zero(d->bd_wcount); 1233 counter_u64_zero(d->bd_wfcount); 1234 counter_u64_zero(d->bd_wdcount); 1235 counter_u64_zero(d->bd_zcopy); 1236 } 1237 1238 /* 1239 * FIONREAD Check for read packet available. 1240 * BIOCGBLEN Get buffer len [for read()]. 1241 * BIOCSETF Set read filter. 1242 * BIOCSETFNR Set read filter without resetting descriptor. 1243 * BIOCSETWF Set write filter. 1244 * BIOCFLUSH Flush read packet buffer. 1245 * BIOCPROMISC Put interface into promiscuous mode. 1246 * BIOCGDLT Get link layer type. 1247 * BIOCGETIF Get interface name. 1248 * BIOCSETIF Set interface. 1249 * BIOCSRTIMEOUT Set read timeout. 1250 * BIOCGRTIMEOUT Get read timeout. 1251 * BIOCGSTATS Get packet stats. 1252 * BIOCIMMEDIATE Set immediate mode. 1253 * BIOCVERSION Get filter language version. 1254 * BIOCGHDRCMPLT Get "header already complete" flag 1255 * BIOCSHDRCMPLT Set "header already complete" flag 1256 * BIOCGDIRECTION Get packet direction flag 1257 * BIOCSDIRECTION Set packet direction flag 1258 * BIOCGTSTAMP Get time stamp format and resolution. 1259 * BIOCSTSTAMP Set time stamp format and resolution. 1260 * BIOCLOCK Set "locked" flag 1261 * BIOCFEEDBACK Set packet feedback mode. 1262 * BIOCSETZBUF Set current zero-copy buffer locations. 1263 * BIOCGETZMAX Get maximum zero-copy buffer size. 1264 * BIOCROTZBUF Force rotation of zero-copy buffer 1265 * BIOCSETBUFMODE Set buffer mode. 1266 * BIOCGETBUFMODE Get current buffer mode. 1267 */ 1268 /* ARGSUSED */ 1269 static int 1270 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1271 struct thread *td) 1272 { 1273 struct bpf_d *d; 1274 int error; 1275 1276 error = devfs_get_cdevpriv((void **)&d); 1277 if (error != 0) 1278 return (error); 1279 1280 /* 1281 * Refresh PID associated with this descriptor. 1282 */ 1283 BPFD_LOCK(d); 1284 BPF_PID_REFRESH(d, td); 1285 if (d->bd_state == BPF_WAITING) 1286 callout_stop(&d->bd_callout); 1287 d->bd_state = BPF_IDLE; 1288 BPFD_UNLOCK(d); 1289 1290 if (d->bd_locked == 1) { 1291 switch (cmd) { 1292 case BIOCGBLEN: 1293 case BIOCFLUSH: 1294 case BIOCGDLT: 1295 case BIOCGDLTLIST: 1296 #ifdef COMPAT_FREEBSD32 1297 case BIOCGDLTLIST32: 1298 #endif 1299 case BIOCGETIF: 1300 case BIOCGRTIMEOUT: 1301 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1302 case BIOCGRTIMEOUT32: 1303 #endif 1304 case BIOCGSTATS: 1305 case BIOCVERSION: 1306 case BIOCGRSIG: 1307 case BIOCGHDRCMPLT: 1308 case BIOCSTSTAMP: 1309 case BIOCFEEDBACK: 1310 case FIONREAD: 1311 case BIOCLOCK: 1312 case BIOCSRTIMEOUT: 1313 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1314 case BIOCSRTIMEOUT32: 1315 #endif 1316 case BIOCIMMEDIATE: 1317 case TIOCGPGRP: 1318 case BIOCROTZBUF: 1319 break; 1320 default: 1321 return (EPERM); 1322 } 1323 } 1324 #ifdef COMPAT_FREEBSD32 1325 /* 1326 * If we see a 32-bit compat ioctl, mark the stream as 32-bit so 1327 * that it will get 32-bit packet headers. 1328 */ 1329 switch (cmd) { 1330 case BIOCSETF32: 1331 case BIOCSETFNR32: 1332 case BIOCSETWF32: 1333 case BIOCGDLTLIST32: 1334 case BIOCGRTIMEOUT32: 1335 case BIOCSRTIMEOUT32: 1336 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 1337 BPFD_LOCK(d); 1338 d->bd_compat32 = 1; 1339 BPFD_UNLOCK(d); 1340 } 1341 } 1342 #endif 1343 1344 CURVNET_SET(TD_TO_VNET(td)); 1345 switch (cmd) { 1346 1347 default: 1348 error = EINVAL; 1349 break; 1350 1351 /* 1352 * Check for read packet available. 1353 */ 1354 case FIONREAD: 1355 { 1356 int n; 1357 1358 BPFD_LOCK(d); 1359 n = d->bd_slen; 1360 while (d->bd_hbuf_in_use) 1361 mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, 1362 PRINET, "bd_hbuf", 0); 1363 if (d->bd_hbuf) 1364 n += d->bd_hlen; 1365 BPFD_UNLOCK(d); 1366 1367 *(int *)addr = n; 1368 break; 1369 } 1370 1371 /* 1372 * Get buffer len [for read()]. 1373 */ 1374 case BIOCGBLEN: 1375 BPFD_LOCK(d); 1376 *(u_int *)addr = d->bd_bufsize; 1377 BPFD_UNLOCK(d); 1378 break; 1379 1380 /* 1381 * Set buffer length. 1382 */ 1383 case BIOCSBLEN: 1384 error = bpf_ioctl_sblen(d, (u_int *)addr); 1385 break; 1386 1387 /* 1388 * Set link layer read filter. 1389 */ 1390 case BIOCSETF: 1391 case BIOCSETFNR: 1392 case BIOCSETWF: 1393 #ifdef COMPAT_FREEBSD32 1394 case BIOCSETF32: 1395 case BIOCSETFNR32: 1396 case BIOCSETWF32: 1397 #endif 1398 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 1399 break; 1400 1401 /* 1402 * Flush read packet buffer. 1403 */ 1404 case BIOCFLUSH: 1405 BPFD_LOCK(d); 1406 reset_d(d); 1407 BPFD_UNLOCK(d); 1408 break; 1409 1410 /* 1411 * Put interface into promiscuous mode. 1412 */ 1413 case BIOCPROMISC: 1414 if (d->bd_bif == NULL) { 1415 /* 1416 * No interface attached yet. 1417 */ 1418 error = EINVAL; 1419 break; 1420 } 1421 if (d->bd_promisc == 0) { 1422 error = ifpromisc(d->bd_bif->bif_ifp, 1); 1423 if (error == 0) 1424 d->bd_promisc = 1; 1425 } 1426 break; 1427 1428 /* 1429 * Get current data link type. 1430 */ 1431 case BIOCGDLT: 1432 BPF_LOCK(); 1433 if (d->bd_bif == NULL) 1434 error = EINVAL; 1435 else 1436 *(u_int *)addr = d->bd_bif->bif_dlt; 1437 BPF_UNLOCK(); 1438 break; 1439 1440 /* 1441 * Get a list of supported data link types. 1442 */ 1443 #ifdef COMPAT_FREEBSD32 1444 case BIOCGDLTLIST32: 1445 { 1446 struct bpf_dltlist32 *list32; 1447 struct bpf_dltlist dltlist; 1448 1449 list32 = (struct bpf_dltlist32 *)addr; 1450 dltlist.bfl_len = list32->bfl_len; 1451 dltlist.bfl_list = PTRIN(list32->bfl_list); 1452 BPF_LOCK(); 1453 if (d->bd_bif == NULL) 1454 error = EINVAL; 1455 else { 1456 error = bpf_getdltlist(d, &dltlist); 1457 if (error == 0) 1458 list32->bfl_len = dltlist.bfl_len; 1459 } 1460 BPF_UNLOCK(); 1461 break; 1462 } 1463 #endif 1464 1465 case BIOCGDLTLIST: 1466 BPF_LOCK(); 1467 if (d->bd_bif == NULL) 1468 error = EINVAL; 1469 else 1470 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 1471 BPF_UNLOCK(); 1472 break; 1473 1474 /* 1475 * Set data link type. 1476 */ 1477 case BIOCSDLT: 1478 BPF_LOCK(); 1479 if (d->bd_bif == NULL) 1480 error = EINVAL; 1481 else 1482 error = bpf_setdlt(d, *(u_int *)addr); 1483 BPF_UNLOCK(); 1484 break; 1485 1486 /* 1487 * Get interface name. 1488 */ 1489 case BIOCGETIF: 1490 BPF_LOCK(); 1491 if (d->bd_bif == NULL) 1492 error = EINVAL; 1493 else { 1494 struct ifnet *const ifp = d->bd_bif->bif_ifp; 1495 struct ifreq *const ifr = (struct ifreq *)addr; 1496 1497 strlcpy(ifr->ifr_name, ifp->if_xname, 1498 sizeof(ifr->ifr_name)); 1499 } 1500 BPF_UNLOCK(); 1501 break; 1502 1503 /* 1504 * Set interface. 1505 */ 1506 case BIOCSETIF: 1507 { 1508 int alloc_buf, size; 1509 1510 /* 1511 * Behavior here depends on the buffering model. If 1512 * we're using kernel memory buffers, then we can 1513 * allocate them here. If we're using zero-copy, 1514 * then the user process must have registered buffers 1515 * by the time we get here. 1516 */ 1517 alloc_buf = 0; 1518 BPFD_LOCK(d); 1519 if (d->bd_bufmode == BPF_BUFMODE_BUFFER && 1520 d->bd_sbuf == NULL) 1521 alloc_buf = 1; 1522 BPFD_UNLOCK(d); 1523 if (alloc_buf) { 1524 size = d->bd_bufsize; 1525 error = bpf_buffer_ioctl_sblen(d, &size); 1526 if (error != 0) 1527 break; 1528 } 1529 BPF_LOCK(); 1530 error = bpf_setif(d, (struct ifreq *)addr); 1531 BPF_UNLOCK(); 1532 break; 1533 } 1534 1535 /* 1536 * Set read timeout. 1537 */ 1538 case BIOCSRTIMEOUT: 1539 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1540 case BIOCSRTIMEOUT32: 1541 #endif 1542 { 1543 struct timeval *tv = (struct timeval *)addr; 1544 #if defined(COMPAT_FREEBSD32) && !defined(__mips__) 1545 struct timeval32 *tv32; 1546 struct timeval tv64; 1547 1548 if (cmd == BIOCSRTIMEOUT32) { 1549 tv32 = (struct timeval32 *)addr; 1550 tv = &tv64; 1551 tv->tv_sec = tv32->tv_sec; 1552 tv->tv_usec = tv32->tv_usec; 1553 } else 1554 #endif 1555 tv = (struct timeval *)addr; 1556 1557 /* 1558 * Subtract 1 tick from tvtohz() since this isn't 1559 * a one-shot timer. 1560 */ 1561 if ((error = itimerfix(tv)) == 0) 1562 d->bd_rtout = tvtohz(tv) - 1; 1563 break; 1564 } 1565 1566 /* 1567 * Get read timeout. 1568 */ 1569 case BIOCGRTIMEOUT: 1570 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1571 case BIOCGRTIMEOUT32: 1572 #endif 1573 { 1574 struct timeval *tv; 1575 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1576 struct timeval32 *tv32; 1577 struct timeval tv64; 1578 1579 if (cmd == BIOCGRTIMEOUT32) 1580 tv = &tv64; 1581 else 1582 #endif 1583 tv = (struct timeval *)addr; 1584 1585 tv->tv_sec = d->bd_rtout / hz; 1586 tv->tv_usec = (d->bd_rtout % hz) * tick; 1587 #if defined(COMPAT_FREEBSD32) && defined(__amd64__) 1588 if (cmd == BIOCGRTIMEOUT32) { 1589 tv32 = (struct timeval32 *)addr; 1590 tv32->tv_sec = tv->tv_sec; 1591 tv32->tv_usec = tv->tv_usec; 1592 } 1593 #endif 1594 1595 break; 1596 } 1597 1598 /* 1599 * Get packet stats. 1600 */ 1601 case BIOCGSTATS: 1602 { 1603 struct bpf_stat *bs = (struct bpf_stat *)addr; 1604 1605 /* XXXCSJP overflow */ 1606 bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount); 1607 bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount); 1608 break; 1609 } 1610 1611 /* 1612 * Set immediate mode. 1613 */ 1614 case BIOCIMMEDIATE: 1615 BPFD_LOCK(d); 1616 d->bd_immediate = *(u_int *)addr; 1617 BPFD_UNLOCK(d); 1618 break; 1619 1620 case BIOCVERSION: 1621 { 1622 struct bpf_version *bv = (struct bpf_version *)addr; 1623 1624 bv->bv_major = BPF_MAJOR_VERSION; 1625 bv->bv_minor = BPF_MINOR_VERSION; 1626 break; 1627 } 1628 1629 /* 1630 * Get "header already complete" flag 1631 */ 1632 case BIOCGHDRCMPLT: 1633 BPFD_LOCK(d); 1634 *(u_int *)addr = d->bd_hdrcmplt; 1635 BPFD_UNLOCK(d); 1636 break; 1637 1638 /* 1639 * Set "header already complete" flag 1640 */ 1641 case BIOCSHDRCMPLT: 1642 BPFD_LOCK(d); 1643 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1644 BPFD_UNLOCK(d); 1645 break; 1646 1647 /* 1648 * Get packet direction flag 1649 */ 1650 case BIOCGDIRECTION: 1651 BPFD_LOCK(d); 1652 *(u_int *)addr = d->bd_direction; 1653 BPFD_UNLOCK(d); 1654 break; 1655 1656 /* 1657 * Set packet direction flag 1658 */ 1659 case BIOCSDIRECTION: 1660 { 1661 u_int direction; 1662 1663 direction = *(u_int *)addr; 1664 switch (direction) { 1665 case BPF_D_IN: 1666 case BPF_D_INOUT: 1667 case BPF_D_OUT: 1668 BPFD_LOCK(d); 1669 d->bd_direction = direction; 1670 BPFD_UNLOCK(d); 1671 break; 1672 default: 1673 error = EINVAL; 1674 } 1675 } 1676 break; 1677 1678 /* 1679 * Get packet timestamp format and resolution. 1680 */ 1681 case BIOCGTSTAMP: 1682 BPFD_LOCK(d); 1683 *(u_int *)addr = d->bd_tstamp; 1684 BPFD_UNLOCK(d); 1685 break; 1686 1687 /* 1688 * Set packet timestamp format and resolution. 1689 */ 1690 case BIOCSTSTAMP: 1691 { 1692 u_int func; 1693 1694 func = *(u_int *)addr; 1695 if (BPF_T_VALID(func)) 1696 d->bd_tstamp = func; 1697 else 1698 error = EINVAL; 1699 } 1700 break; 1701 1702 case BIOCFEEDBACK: 1703 BPFD_LOCK(d); 1704 d->bd_feedback = *(u_int *)addr; 1705 BPFD_UNLOCK(d); 1706 break; 1707 1708 case BIOCLOCK: 1709 BPFD_LOCK(d); 1710 d->bd_locked = 1; 1711 BPFD_UNLOCK(d); 1712 break; 1713 1714 case FIONBIO: /* Non-blocking I/O */ 1715 break; 1716 1717 case FIOASYNC: /* Send signal on receive packets */ 1718 BPFD_LOCK(d); 1719 d->bd_async = *(int *)addr; 1720 BPFD_UNLOCK(d); 1721 break; 1722 1723 case FIOSETOWN: 1724 /* 1725 * XXX: Add some sort of locking here? 1726 * fsetown() can sleep. 1727 */ 1728 error = fsetown(*(int *)addr, &d->bd_sigio); 1729 break; 1730 1731 case FIOGETOWN: 1732 BPFD_LOCK(d); 1733 *(int *)addr = fgetown(&d->bd_sigio); 1734 BPFD_UNLOCK(d); 1735 break; 1736 1737 /* This is deprecated, FIOSETOWN should be used instead. */ 1738 case TIOCSPGRP: 1739 error = fsetown(-(*(int *)addr), &d->bd_sigio); 1740 break; 1741 1742 /* This is deprecated, FIOGETOWN should be used instead. */ 1743 case TIOCGPGRP: 1744 *(int *)addr = -fgetown(&d->bd_sigio); 1745 break; 1746 1747 case BIOCSRSIG: /* Set receive signal */ 1748 { 1749 u_int sig; 1750 1751 sig = *(u_int *)addr; 1752 1753 if (sig >= NSIG) 1754 error = EINVAL; 1755 else { 1756 BPFD_LOCK(d); 1757 d->bd_sig = sig; 1758 BPFD_UNLOCK(d); 1759 } 1760 break; 1761 } 1762 case BIOCGRSIG: 1763 BPFD_LOCK(d); 1764 *(u_int *)addr = d->bd_sig; 1765 BPFD_UNLOCK(d); 1766 break; 1767 1768 case BIOCGETBUFMODE: 1769 BPFD_LOCK(d); 1770 *(u_int *)addr = d->bd_bufmode; 1771 BPFD_UNLOCK(d); 1772 break; 1773 1774 case BIOCSETBUFMODE: 1775 /* 1776 * Allow the buffering mode to be changed as long as we 1777 * haven't yet committed to a particular mode. Our 1778 * definition of commitment, for now, is whether or not a 1779 * buffer has been allocated or an interface attached, since 1780 * that's the point where things get tricky. 1781 */ 1782 switch (*(u_int *)addr) { 1783 case BPF_BUFMODE_BUFFER: 1784 break; 1785 1786 case BPF_BUFMODE_ZBUF: 1787 if (bpf_zerocopy_enable) 1788 break; 1789 /* FALLSTHROUGH */ 1790 1791 default: 1792 CURVNET_RESTORE(); 1793 return (EINVAL); 1794 } 1795 1796 BPFD_LOCK(d); 1797 if (d->bd_sbuf != NULL || d->bd_hbuf != NULL || 1798 d->bd_fbuf != NULL || d->bd_bif != NULL) { 1799 BPFD_UNLOCK(d); 1800 CURVNET_RESTORE(); 1801 return (EBUSY); 1802 } 1803 d->bd_bufmode = *(u_int *)addr; 1804 BPFD_UNLOCK(d); 1805 break; 1806 1807 case BIOCGETZMAX: 1808 error = bpf_ioctl_getzmax(td, d, (size_t *)addr); 1809 break; 1810 1811 case BIOCSETZBUF: 1812 error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr); 1813 break; 1814 1815 case BIOCROTZBUF: 1816 error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr); 1817 break; 1818 } 1819 CURVNET_RESTORE(); 1820 return (error); 1821 } 1822 1823 /* 1824 * Set d's packet filter program to fp. If this file already has a filter, 1825 * free it and replace it. Returns EINVAL for bogus requests. 1826 * 1827 * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls 1828 * since reading d->bd_bif can't be protected by d or interface lock due to 1829 * lock order. 1830 * 1831 * Additionally, we have to acquire interface write lock due to bpf_mtap() uses 1832 * interface read lock to read all filers. 1833 * 1834 */ 1835 static int 1836 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1837 { 1838 #ifdef COMPAT_FREEBSD32 1839 struct bpf_program fp_swab; 1840 struct bpf_program32 *fp32; 1841 #endif 1842 struct bpf_insn *fcode, *old; 1843 #ifdef BPF_JITTER 1844 bpf_jit_filter *jfunc, *ofunc; 1845 #endif 1846 size_t size; 1847 u_int flen; 1848 int need_upgrade; 1849 1850 #ifdef COMPAT_FREEBSD32 1851 switch (cmd) { 1852 case BIOCSETF32: 1853 case BIOCSETWF32: 1854 case BIOCSETFNR32: 1855 fp32 = (struct bpf_program32 *)fp; 1856 fp_swab.bf_len = fp32->bf_len; 1857 fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns; 1858 fp = &fp_swab; 1859 switch (cmd) { 1860 case BIOCSETF32: 1861 cmd = BIOCSETF; 1862 break; 1863 case BIOCSETWF32: 1864 cmd = BIOCSETWF; 1865 break; 1866 } 1867 break; 1868 } 1869 #endif 1870 1871 fcode = NULL; 1872 #ifdef BPF_JITTER 1873 jfunc = ofunc = NULL; 1874 #endif 1875 need_upgrade = 0; 1876 1877 /* 1878 * Check new filter validness before acquiring any locks. 1879 * Allocate memory for new filter, if needed. 1880 */ 1881 flen = fp->bf_len; 1882 if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0)) 1883 return (EINVAL); 1884 size = flen * sizeof(*fp->bf_insns); 1885 if (size > 0) { 1886 /* We're setting up new filter. Copy and check actual data. */ 1887 fcode = malloc(size, M_BPF, M_WAITOK); 1888 if (copyin(fp->bf_insns, fcode, size) != 0 || 1889 !bpf_validate(fcode, flen)) { 1890 free(fcode, M_BPF); 1891 return (EINVAL); 1892 } 1893 #ifdef BPF_JITTER 1894 /* Filter is copied inside fcode and is perfectly valid. */ 1895 jfunc = bpf_jitter(fcode, flen); 1896 #endif 1897 } 1898 1899 BPF_LOCK(); 1900 1901 /* 1902 * Set up new filter. 1903 * Protect filter change by interface lock. 1904 * Additionally, we are protected by global lock here. 1905 */ 1906 if (d->bd_bif != NULL) 1907 BPFIF_WLOCK(d->bd_bif); 1908 BPFD_LOCK(d); 1909 if (cmd == BIOCSETWF) { 1910 old = d->bd_wfilter; 1911 d->bd_wfilter = fcode; 1912 } else { 1913 old = d->bd_rfilter; 1914 d->bd_rfilter = fcode; 1915 #ifdef BPF_JITTER 1916 ofunc = d->bd_bfilter; 1917 d->bd_bfilter = jfunc; 1918 #endif 1919 if (cmd == BIOCSETF) 1920 reset_d(d); 1921 1922 need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen); 1923 } 1924 BPFD_UNLOCK(d); 1925 if (d->bd_bif != NULL) 1926 BPFIF_WUNLOCK(d->bd_bif); 1927 if (old != NULL) 1928 free(old, M_BPF); 1929 #ifdef BPF_JITTER 1930 if (ofunc != NULL) 1931 bpf_destroy_jit_filter(ofunc); 1932 #endif 1933 1934 /* Move d to active readers list. */ 1935 if (need_upgrade != 0) 1936 bpf_upgraded(d); 1937 1938 BPF_UNLOCK(); 1939 return (0); 1940 } 1941 1942 /* 1943 * Detach a file from its current interface (if attached at all) and attach 1944 * to the interface indicated by the name stored in ifr. 1945 * Return an errno or 0. 1946 */ 1947 static int 1948 bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1949 { 1950 struct bpf_if *bp; 1951 struct ifnet *theywant; 1952 1953 BPF_LOCK_ASSERT(); 1954 1955 theywant = ifunit(ifr->ifr_name); 1956 if (theywant == NULL || theywant->if_bpf == NULL) 1957 return (ENXIO); 1958 1959 bp = theywant->if_bpf; 1960 1961 /* Check if interface is not being detached from BPF */ 1962 BPFIF_RLOCK(bp); 1963 if (bp->bif_flags & BPFIF_FLAG_DYING) { 1964 BPFIF_RUNLOCK(bp); 1965 return (ENXIO); 1966 } 1967 BPFIF_RUNLOCK(bp); 1968 1969 /* 1970 * At this point, we expect the buffer is already allocated. If not, 1971 * return an error. 1972 */ 1973 switch (d->bd_bufmode) { 1974 case BPF_BUFMODE_BUFFER: 1975 case BPF_BUFMODE_ZBUF: 1976 if (d->bd_sbuf == NULL) 1977 return (EINVAL); 1978 break; 1979 1980 default: 1981 panic("bpf_setif: bufmode %d", d->bd_bufmode); 1982 } 1983 if (bp != d->bd_bif) 1984 bpf_attachd(d, bp); 1985 BPFD_LOCK(d); 1986 reset_d(d); 1987 BPFD_UNLOCK(d); 1988 return (0); 1989 } 1990 1991 /* 1992 * Support for select() and poll() system calls 1993 * 1994 * Return true iff the specific operation will not block indefinitely. 1995 * Otherwise, return false but make a note that a selwakeup() must be done. 1996 */ 1997 static int 1998 bpfpoll(struct cdev *dev, int events, struct thread *td) 1999 { 2000 struct bpf_d *d; 2001 int revents; 2002 2003 if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL) 2004 return (events & 2005 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); 2006 2007 /* 2008 * Refresh PID associated with this descriptor. 2009 */ 2010 revents = events & (POLLOUT | POLLWRNORM); 2011 BPFD_LOCK(d); 2012 BPF_PID_REFRESH(d, td); 2013 if (events & (POLLIN | POLLRDNORM)) { 2014 if (bpf_ready(d)) 2015 revents |= events & (POLLIN | POLLRDNORM); 2016 else { 2017 selrecord(td, &d->bd_sel); 2018 /* Start the read timeout if necessary. */ 2019 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2020 callout_reset(&d->bd_callout, d->bd_rtout, 2021 bpf_timed_out, d); 2022 d->bd_state = BPF_WAITING; 2023 } 2024 } 2025 } 2026 BPFD_UNLOCK(d); 2027 return (revents); 2028 } 2029 2030 /* 2031 * Support for kevent() system call. Register EVFILT_READ filters and 2032 * reject all others. 2033 */ 2034 int 2035 bpfkqfilter(struct cdev *dev, struct knote *kn) 2036 { 2037 struct bpf_d *d; 2038 2039 if (devfs_get_cdevpriv((void **)&d) != 0 || 2040 kn->kn_filter != EVFILT_READ) 2041 return (1); 2042 2043 /* 2044 * Refresh PID associated with this descriptor. 2045 */ 2046 BPFD_LOCK(d); 2047 BPF_PID_REFRESH_CUR(d); 2048 kn->kn_fop = &bpfread_filtops; 2049 kn->kn_hook = d; 2050 knlist_add(&d->bd_sel.si_note, kn, 1); 2051 BPFD_UNLOCK(d); 2052 2053 return (0); 2054 } 2055 2056 static void 2057 filt_bpfdetach(struct knote *kn) 2058 { 2059 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2060 2061 knlist_remove(&d->bd_sel.si_note, kn, 0); 2062 } 2063 2064 static int 2065 filt_bpfread(struct knote *kn, long hint) 2066 { 2067 struct bpf_d *d = (struct bpf_d *)kn->kn_hook; 2068 int ready; 2069 2070 BPFD_LOCK_ASSERT(d); 2071 ready = bpf_ready(d); 2072 if (ready) { 2073 kn->kn_data = d->bd_slen; 2074 /* 2075 * Ignore the hold buffer if it is being copied to user space. 2076 */ 2077 if (!d->bd_hbuf_in_use && d->bd_hbuf) 2078 kn->kn_data += d->bd_hlen; 2079 } else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 2080 callout_reset(&d->bd_callout, d->bd_rtout, 2081 bpf_timed_out, d); 2082 d->bd_state = BPF_WAITING; 2083 } 2084 2085 return (ready); 2086 } 2087 2088 #define BPF_TSTAMP_NONE 0 2089 #define BPF_TSTAMP_FAST 1 2090 #define BPF_TSTAMP_NORMAL 2 2091 #define BPF_TSTAMP_EXTERN 3 2092 2093 static int 2094 bpf_ts_quality(int tstype) 2095 { 2096 2097 if (tstype == BPF_T_NONE) 2098 return (BPF_TSTAMP_NONE); 2099 if ((tstype & BPF_T_FAST) != 0) 2100 return (BPF_TSTAMP_FAST); 2101 2102 return (BPF_TSTAMP_NORMAL); 2103 } 2104 2105 static int 2106 bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m) 2107 { 2108 struct m_tag *tag; 2109 int quality; 2110 2111 quality = bpf_ts_quality(tstype); 2112 if (quality == BPF_TSTAMP_NONE) 2113 return (quality); 2114 2115 if (m != NULL) { 2116 tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL); 2117 if (tag != NULL) { 2118 *bt = *(struct bintime *)(tag + 1); 2119 return (BPF_TSTAMP_EXTERN); 2120 } 2121 } 2122 if (quality == BPF_TSTAMP_NORMAL) 2123 binuptime(bt); 2124 else 2125 getbinuptime(bt); 2126 2127 return (quality); 2128 } 2129 2130 /* 2131 * Incoming linkage from device drivers. Process the packet pkt, of length 2132 * pktlen, which is stored in a contiguous buffer. The packet is parsed 2133 * by each process' filter, and if accepted, stashed into the corresponding 2134 * buffer. 2135 */ 2136 void 2137 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2138 { 2139 struct bintime bt; 2140 struct bpf_d *d; 2141 #ifdef BPF_JITTER 2142 bpf_jit_filter *bf; 2143 #endif 2144 u_int slen; 2145 int gottime; 2146 2147 gottime = BPF_TSTAMP_NONE; 2148 2149 BPFIF_RLOCK(bp); 2150 2151 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2152 /* 2153 * We are not using any locks for d here because: 2154 * 1) any filter change is protected by interface 2155 * write lock 2156 * 2) destroying/detaching d is protected by interface 2157 * write lock, too 2158 */ 2159 2160 counter_u64_add(d->bd_rcount, 1); 2161 /* 2162 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no 2163 * way for the caller to indiciate to us whether this packet 2164 * is inbound or outbound. In the bpf_mtap() routines, we use 2165 * the interface pointers on the mbuf to figure it out. 2166 */ 2167 #ifdef BPF_JITTER 2168 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2169 if (bf != NULL) 2170 slen = (*(bf->func))(pkt, pktlen, pktlen); 2171 else 2172 #endif 2173 slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen); 2174 if (slen != 0) { 2175 /* 2176 * Filter matches. Let's to acquire write lock. 2177 */ 2178 BPFD_LOCK(d); 2179 2180 counter_u64_add(d->bd_fcount, 1); 2181 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2182 gottime = bpf_gettime(&bt, d->bd_tstamp, NULL); 2183 #ifdef MAC 2184 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2185 #endif 2186 catchpacket(d, pkt, pktlen, slen, 2187 bpf_append_bytes, &bt); 2188 BPFD_UNLOCK(d); 2189 } 2190 } 2191 BPFIF_RUNLOCK(bp); 2192 } 2193 2194 #define BPF_CHECK_DIRECTION(d, r, i) \ 2195 (((d)->bd_direction == BPF_D_IN && (r) != (i)) || \ 2196 ((d)->bd_direction == BPF_D_OUT && (r) == (i))) 2197 2198 /* 2199 * Incoming linkage from device drivers, when packet is in an mbuf chain. 2200 * Locking model is explained in bpf_tap(). 2201 */ 2202 void 2203 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2204 { 2205 struct bintime bt; 2206 struct bpf_d *d; 2207 #ifdef BPF_JITTER 2208 bpf_jit_filter *bf; 2209 #endif 2210 u_int pktlen, slen; 2211 int gottime; 2212 2213 /* Skip outgoing duplicate packets. */ 2214 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2215 m->m_flags &= ~M_PROMISC; 2216 return; 2217 } 2218 2219 pktlen = m_length(m, NULL); 2220 gottime = BPF_TSTAMP_NONE; 2221 2222 BPFIF_RLOCK(bp); 2223 2224 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2225 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2226 continue; 2227 counter_u64_add(d->bd_rcount, 1); 2228 #ifdef BPF_JITTER 2229 bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL; 2230 /* XXX We cannot handle multiple mbufs. */ 2231 if (bf != NULL && m->m_next == NULL) 2232 slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen); 2233 else 2234 #endif 2235 slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0); 2236 if (slen != 0) { 2237 BPFD_LOCK(d); 2238 2239 counter_u64_add(d->bd_fcount, 1); 2240 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2241 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2242 #ifdef MAC 2243 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2244 #endif 2245 catchpacket(d, (u_char *)m, pktlen, slen, 2246 bpf_append_mbuf, &bt); 2247 BPFD_UNLOCK(d); 2248 } 2249 } 2250 BPFIF_RUNLOCK(bp); 2251 } 2252 2253 /* 2254 * Incoming linkage from device drivers, when packet is in 2255 * an mbuf chain and to be prepended by a contiguous header. 2256 */ 2257 void 2258 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m) 2259 { 2260 struct bintime bt; 2261 struct mbuf mb; 2262 struct bpf_d *d; 2263 u_int pktlen, slen; 2264 int gottime; 2265 2266 /* Skip outgoing duplicate packets. */ 2267 if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) { 2268 m->m_flags &= ~M_PROMISC; 2269 return; 2270 } 2271 2272 pktlen = m_length(m, NULL); 2273 /* 2274 * Craft on-stack mbuf suitable for passing to bpf_filter. 2275 * Note that we cut corners here; we only setup what's 2276 * absolutely needed--this mbuf should never go anywhere else. 2277 */ 2278 mb.m_next = m; 2279 mb.m_data = data; 2280 mb.m_len = dlen; 2281 pktlen += dlen; 2282 2283 gottime = BPF_TSTAMP_NONE; 2284 2285 BPFIF_RLOCK(bp); 2286 2287 LIST_FOREACH(d, &bp->bif_dlist, bd_next) { 2288 if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp)) 2289 continue; 2290 counter_u64_add(d->bd_rcount, 1); 2291 slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0); 2292 if (slen != 0) { 2293 BPFD_LOCK(d); 2294 2295 counter_u64_add(d->bd_fcount, 1); 2296 if (gottime < bpf_ts_quality(d->bd_tstamp)) 2297 gottime = bpf_gettime(&bt, d->bd_tstamp, m); 2298 #ifdef MAC 2299 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0) 2300 #endif 2301 catchpacket(d, (u_char *)&mb, pktlen, slen, 2302 bpf_append_mbuf, &bt); 2303 BPFD_UNLOCK(d); 2304 } 2305 } 2306 BPFIF_RUNLOCK(bp); 2307 } 2308 2309 #undef BPF_CHECK_DIRECTION 2310 2311 #undef BPF_TSTAMP_NONE 2312 #undef BPF_TSTAMP_FAST 2313 #undef BPF_TSTAMP_NORMAL 2314 #undef BPF_TSTAMP_EXTERN 2315 2316 static int 2317 bpf_hdrlen(struct bpf_d *d) 2318 { 2319 int hdrlen; 2320 2321 hdrlen = d->bd_bif->bif_hdrlen; 2322 #ifndef BURN_BRIDGES 2323 if (d->bd_tstamp == BPF_T_NONE || 2324 BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME) 2325 #ifdef COMPAT_FREEBSD32 2326 if (d->bd_compat32) 2327 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32); 2328 else 2329 #endif 2330 hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr); 2331 else 2332 #endif 2333 hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr); 2334 #ifdef COMPAT_FREEBSD32 2335 if (d->bd_compat32) 2336 hdrlen = BPF_WORDALIGN32(hdrlen); 2337 else 2338 #endif 2339 hdrlen = BPF_WORDALIGN(hdrlen); 2340 2341 return (hdrlen - d->bd_bif->bif_hdrlen); 2342 } 2343 2344 static void 2345 bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype) 2346 { 2347 struct bintime bt2, boottimebin; 2348 struct timeval tsm; 2349 struct timespec tsn; 2350 2351 if ((tstype & BPF_T_MONOTONIC) == 0) { 2352 bt2 = *bt; 2353 getboottimebin(&boottimebin); 2354 bintime_add(&bt2, &boottimebin); 2355 bt = &bt2; 2356 } 2357 switch (BPF_T_FORMAT(tstype)) { 2358 case BPF_T_MICROTIME: 2359 bintime2timeval(bt, &tsm); 2360 ts->bt_sec = tsm.tv_sec; 2361 ts->bt_frac = tsm.tv_usec; 2362 break; 2363 case BPF_T_NANOTIME: 2364 bintime2timespec(bt, &tsn); 2365 ts->bt_sec = tsn.tv_sec; 2366 ts->bt_frac = tsn.tv_nsec; 2367 break; 2368 case BPF_T_BINTIME: 2369 ts->bt_sec = bt->sec; 2370 ts->bt_frac = bt->frac; 2371 break; 2372 } 2373 } 2374 2375 /* 2376 * Move the packet data from interface memory (pkt) into the 2377 * store buffer. "cpfn" is the routine called to do the actual data 2378 * transfer. bcopy is passed in to copy contiguous chunks, while 2379 * bpf_append_mbuf is passed in to copy mbuf chains. In the latter case, 2380 * pkt is really an mbuf. 2381 */ 2382 static void 2383 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, 2384 void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int), 2385 struct bintime *bt) 2386 { 2387 struct bpf_xhdr hdr; 2388 #ifndef BURN_BRIDGES 2389 struct bpf_hdr hdr_old; 2390 #ifdef COMPAT_FREEBSD32 2391 struct bpf_hdr32 hdr32_old; 2392 #endif 2393 #endif 2394 int caplen, curlen, hdrlen, totlen; 2395 int do_wakeup = 0; 2396 int do_timestamp; 2397 int tstype; 2398 2399 BPFD_LOCK_ASSERT(d); 2400 2401 /* 2402 * Detect whether user space has released a buffer back to us, and if 2403 * so, move it from being a hold buffer to a free buffer. This may 2404 * not be the best place to do it (for example, we might only want to 2405 * run this check if we need the space), but for now it's a reliable 2406 * spot to do it. 2407 */ 2408 if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) { 2409 d->bd_fbuf = d->bd_hbuf; 2410 d->bd_hbuf = NULL; 2411 d->bd_hlen = 0; 2412 bpf_buf_reclaimed(d); 2413 } 2414 2415 /* 2416 * Figure out how many bytes to move. If the packet is 2417 * greater or equal to the snapshot length, transfer that 2418 * much. Otherwise, transfer the whole packet (unless 2419 * we hit the buffer size limit). 2420 */ 2421 hdrlen = bpf_hdrlen(d); 2422 totlen = hdrlen + min(snaplen, pktlen); 2423 if (totlen > d->bd_bufsize) 2424 totlen = d->bd_bufsize; 2425 2426 /* 2427 * Round up the end of the previous packet to the next longword. 2428 * 2429 * Drop the packet if there's no room and no hope of room 2430 * If the packet would overflow the storage buffer or the storage 2431 * buffer is considered immutable by the buffer model, try to rotate 2432 * the buffer and wakeup pending processes. 2433 */ 2434 #ifdef COMPAT_FREEBSD32 2435 if (d->bd_compat32) 2436 curlen = BPF_WORDALIGN32(d->bd_slen); 2437 else 2438 #endif 2439 curlen = BPF_WORDALIGN(d->bd_slen); 2440 if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { 2441 if (d->bd_fbuf == NULL) { 2442 /* 2443 * There's no room in the store buffer, and no 2444 * prospect of room, so drop the packet. Notify the 2445 * buffer model. 2446 */ 2447 bpf_buffull(d); 2448 counter_u64_add(d->bd_dcount, 1); 2449 return; 2450 } 2451 KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use")); 2452 ROTATE_BUFFERS(d); 2453 do_wakeup = 1; 2454 curlen = 0; 2455 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) 2456 /* 2457 * Immediate mode is set, or the read timeout has already 2458 * expired during a select call. A packet arrived, so the 2459 * reader should be woken up. 2460 */ 2461 do_wakeup = 1; 2462 caplen = totlen - hdrlen; 2463 tstype = d->bd_tstamp; 2464 do_timestamp = tstype != BPF_T_NONE; 2465 #ifndef BURN_BRIDGES 2466 if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) { 2467 struct bpf_ts ts; 2468 if (do_timestamp) 2469 bpf_bintime2ts(bt, &ts, tstype); 2470 #ifdef COMPAT_FREEBSD32 2471 if (d->bd_compat32) { 2472 bzero(&hdr32_old, sizeof(hdr32_old)); 2473 if (do_timestamp) { 2474 hdr32_old.bh_tstamp.tv_sec = ts.bt_sec; 2475 hdr32_old.bh_tstamp.tv_usec = ts.bt_frac; 2476 } 2477 hdr32_old.bh_datalen = pktlen; 2478 hdr32_old.bh_hdrlen = hdrlen; 2479 hdr32_old.bh_caplen = caplen; 2480 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old, 2481 sizeof(hdr32_old)); 2482 goto copy; 2483 } 2484 #endif 2485 bzero(&hdr_old, sizeof(hdr_old)); 2486 if (do_timestamp) { 2487 hdr_old.bh_tstamp.tv_sec = ts.bt_sec; 2488 hdr_old.bh_tstamp.tv_usec = ts.bt_frac; 2489 } 2490 hdr_old.bh_datalen = pktlen; 2491 hdr_old.bh_hdrlen = hdrlen; 2492 hdr_old.bh_caplen = caplen; 2493 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old, 2494 sizeof(hdr_old)); 2495 goto copy; 2496 } 2497 #endif 2498 2499 /* 2500 * Append the bpf header. Note we append the actual header size, but 2501 * move forward the length of the header plus padding. 2502 */ 2503 bzero(&hdr, sizeof(hdr)); 2504 if (do_timestamp) 2505 bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype); 2506 hdr.bh_datalen = pktlen; 2507 hdr.bh_hdrlen = hdrlen; 2508 hdr.bh_caplen = caplen; 2509 bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr)); 2510 2511 /* 2512 * Copy the packet data into the store buffer and update its length. 2513 */ 2514 #ifndef BURN_BRIDGES 2515 copy: 2516 #endif 2517 (*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen); 2518 d->bd_slen = curlen + totlen; 2519 2520 if (do_wakeup) 2521 bpf_wakeup(d); 2522 } 2523 2524 /* 2525 * Free buffers currently in use by a descriptor. 2526 * Called on close. 2527 */ 2528 static void 2529 bpf_freed(struct bpf_d *d) 2530 { 2531 2532 /* 2533 * We don't need to lock out interrupts since this descriptor has 2534 * been detached from its interface and it yet hasn't been marked 2535 * free. 2536 */ 2537 bpf_free(d); 2538 if (d->bd_rfilter != NULL) { 2539 free((caddr_t)d->bd_rfilter, M_BPF); 2540 #ifdef BPF_JITTER 2541 if (d->bd_bfilter != NULL) 2542 bpf_destroy_jit_filter(d->bd_bfilter); 2543 #endif 2544 } 2545 if (d->bd_wfilter != NULL) 2546 free((caddr_t)d->bd_wfilter, M_BPF); 2547 mtx_destroy(&d->bd_lock); 2548 2549 counter_u64_free(d->bd_rcount); 2550 counter_u64_free(d->bd_dcount); 2551 counter_u64_free(d->bd_fcount); 2552 counter_u64_free(d->bd_wcount); 2553 counter_u64_free(d->bd_wfcount); 2554 counter_u64_free(d->bd_wdcount); 2555 counter_u64_free(d->bd_zcopy); 2556 2557 } 2558 2559 /* 2560 * Attach an interface to bpf. dlt is the link layer type; hdrlen is the 2561 * fixed size of the link header (variable length headers not yet supported). 2562 */ 2563 void 2564 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 2565 { 2566 2567 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 2568 } 2569 2570 /* 2571 * Attach an interface to bpf. ifp is a pointer to the structure 2572 * defining the interface to be attached, dlt is the link layer type, 2573 * and hdrlen is the fixed size of the link header (variable length 2574 * headers are not yet supporrted). 2575 */ 2576 void 2577 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 2578 { 2579 struct bpf_if *bp; 2580 2581 bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO); 2582 if (bp == NULL) 2583 panic("bpfattach"); 2584 2585 LIST_INIT(&bp->bif_dlist); 2586 LIST_INIT(&bp->bif_wlist); 2587 bp->bif_ifp = ifp; 2588 bp->bif_dlt = dlt; 2589 rw_init(&bp->bif_lock, "bpf interface lock"); 2590 KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized")); 2591 bp->bif_bpf = driverp; 2592 *driverp = bp; 2593 2594 BPF_LOCK(); 2595 LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next); 2596 BPF_UNLOCK(); 2597 2598 bp->bif_hdrlen = hdrlen; 2599 2600 if (bootverbose && IS_DEFAULT_VNET(curvnet)) 2601 if_printf(ifp, "bpf attached\n"); 2602 } 2603 2604 #ifdef VIMAGE 2605 /* 2606 * When moving interfaces between vnet instances we need a way to 2607 * query the dlt and hdrlen before detach so we can re-attch the if_bpf 2608 * after the vmove. We unfortunately have no device driver infrastructure 2609 * to query the interface for these values after creation/attach, thus 2610 * add this as a workaround. 2611 */ 2612 int 2613 bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen) 2614 { 2615 2616 if (bp == NULL) 2617 return (ENXIO); 2618 if (bif_dlt == NULL && bif_hdrlen == NULL) 2619 return (0); 2620 2621 if (bif_dlt != NULL) 2622 *bif_dlt = bp->bif_dlt; 2623 if (bif_hdrlen != NULL) 2624 *bif_hdrlen = bp->bif_hdrlen; 2625 2626 return (0); 2627 } 2628 #endif 2629 2630 /* 2631 * Detach bpf from an interface. This involves detaching each descriptor 2632 * associated with the interface. Notify each descriptor as it's detached 2633 * so that any sleepers wake up and get ENXIO. 2634 */ 2635 void 2636 bpfdetach(struct ifnet *ifp) 2637 { 2638 struct bpf_if *bp, *bp_temp; 2639 struct bpf_d *d; 2640 int ndetached; 2641 2642 ndetached = 0; 2643 2644 BPF_LOCK(); 2645 /* Find all bpf_if struct's which reference ifp and detach them. */ 2646 LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) { 2647 if (ifp != bp->bif_ifp) 2648 continue; 2649 2650 LIST_REMOVE(bp, bif_next); 2651 /* Add to to-be-freed list */ 2652 LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next); 2653 2654 ndetached++; 2655 /* 2656 * Delay freeing bp till interface is detached 2657 * and all routes through this interface are removed. 2658 * Mark bp as detached to restrict new consumers. 2659 */ 2660 BPFIF_WLOCK(bp); 2661 bp->bif_flags |= BPFIF_FLAG_DYING; 2662 *bp->bif_bpf = NULL; 2663 BPFIF_WUNLOCK(bp); 2664 2665 CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p", 2666 __func__, bp->bif_dlt, bp, ifp); 2667 2668 /* Free common descriptors */ 2669 while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) { 2670 bpf_detachd_locked(d); 2671 BPFD_LOCK(d); 2672 bpf_wakeup(d); 2673 BPFD_UNLOCK(d); 2674 } 2675 2676 /* Free writer-only descriptors */ 2677 while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) { 2678 bpf_detachd_locked(d); 2679 BPFD_LOCK(d); 2680 bpf_wakeup(d); 2681 BPFD_UNLOCK(d); 2682 } 2683 } 2684 BPF_UNLOCK(); 2685 2686 #ifdef INVARIANTS 2687 if (ndetached == 0) 2688 printf("bpfdetach: %s was not attached\n", ifp->if_xname); 2689 #endif 2690 } 2691 2692 /* 2693 * Interface departure handler. 2694 * Note departure event does not guarantee interface is going down. 2695 * Interface renaming is currently done via departure/arrival event set. 2696 * 2697 * Departure handled is called after all routes pointing to 2698 * given interface are removed and interface is in down state 2699 * restricting any packets to be sent/received. We assume it is now safe 2700 * to free data allocated by BPF. 2701 */ 2702 static void 2703 bpf_ifdetach(void *arg __unused, struct ifnet *ifp) 2704 { 2705 struct bpf_if *bp, *bp_temp; 2706 int nmatched = 0; 2707 2708 /* Ignore ifnet renaming. */ 2709 if (ifp->if_flags & IFF_RENAMING) 2710 return; 2711 2712 BPF_LOCK(); 2713 /* 2714 * Find matching entries in free list. 2715 * Nothing should be found if bpfdetach() was not called. 2716 */ 2717 LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) { 2718 if (ifp != bp->bif_ifp) 2719 continue; 2720 2721 CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p", 2722 __func__, bp, ifp); 2723 2724 LIST_REMOVE(bp, bif_next); 2725 2726 rw_destroy(&bp->bif_lock); 2727 free(bp, M_BPF); 2728 2729 nmatched++; 2730 } 2731 BPF_UNLOCK(); 2732 } 2733 2734 /* 2735 * Get a list of available data link type of the interface. 2736 */ 2737 static int 2738 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 2739 { 2740 struct ifnet *ifp; 2741 struct bpf_if *bp; 2742 u_int *lst; 2743 int error, n, n1; 2744 2745 BPF_LOCK_ASSERT(); 2746 2747 ifp = d->bd_bif->bif_ifp; 2748 again: 2749 n1 = 0; 2750 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2751 if (bp->bif_ifp == ifp) 2752 n1++; 2753 } 2754 if (bfl->bfl_list == NULL) { 2755 bfl->bfl_len = n1; 2756 return (0); 2757 } 2758 if (n1 > bfl->bfl_len) 2759 return (ENOMEM); 2760 BPF_UNLOCK(); 2761 lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK); 2762 n = 0; 2763 BPF_LOCK(); 2764 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2765 if (bp->bif_ifp != ifp) 2766 continue; 2767 if (n >= n1) { 2768 free(lst, M_TEMP); 2769 goto again; 2770 } 2771 lst[n] = bp->bif_dlt; 2772 n++; 2773 } 2774 BPF_UNLOCK(); 2775 error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n); 2776 free(lst, M_TEMP); 2777 BPF_LOCK(); 2778 bfl->bfl_len = n; 2779 return (error); 2780 } 2781 2782 /* 2783 * Set the data link type of a BPF instance. 2784 */ 2785 static int 2786 bpf_setdlt(struct bpf_d *d, u_int dlt) 2787 { 2788 int error, opromisc; 2789 struct ifnet *ifp; 2790 struct bpf_if *bp; 2791 2792 BPF_LOCK_ASSERT(); 2793 2794 if (d->bd_bif->bif_dlt == dlt) 2795 return (0); 2796 ifp = d->bd_bif->bif_ifp; 2797 2798 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2799 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) 2800 break; 2801 } 2802 2803 if (bp != NULL) { 2804 opromisc = d->bd_promisc; 2805 bpf_attachd(d, bp); 2806 BPFD_LOCK(d); 2807 reset_d(d); 2808 BPFD_UNLOCK(d); 2809 if (opromisc) { 2810 error = ifpromisc(bp->bif_ifp, 1); 2811 if (error) 2812 if_printf(bp->bif_ifp, 2813 "bpf_setdlt: ifpromisc failed (%d)\n", 2814 error); 2815 else 2816 d->bd_promisc = 1; 2817 } 2818 } 2819 return (bp == NULL ? EINVAL : 0); 2820 } 2821 2822 static void 2823 bpf_drvinit(void *unused) 2824 { 2825 struct cdev *dev; 2826 2827 sx_init(&bpf_sx, "bpf global lock"); 2828 LIST_INIT(&bpf_iflist); 2829 LIST_INIT(&bpf_freelist); 2830 2831 dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf"); 2832 /* For compatibility */ 2833 make_dev_alias(dev, "bpf0"); 2834 2835 /* Register interface departure handler */ 2836 bpf_ifdetach_cookie = EVENTHANDLER_REGISTER( 2837 ifnet_departure_event, bpf_ifdetach, NULL, 2838 EVENTHANDLER_PRI_ANY); 2839 } 2840 2841 /* 2842 * Zero out the various packet counters associated with all of the bpf 2843 * descriptors. At some point, we will probably want to get a bit more 2844 * granular and allow the user to specify descriptors to be zeroed. 2845 */ 2846 static void 2847 bpf_zero_counters(void) 2848 { 2849 struct bpf_if *bp; 2850 struct bpf_d *bd; 2851 2852 BPF_LOCK(); 2853 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2854 BPFIF_RLOCK(bp); 2855 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2856 BPFD_LOCK(bd); 2857 counter_u64_zero(bd->bd_rcount); 2858 counter_u64_zero(bd->bd_dcount); 2859 counter_u64_zero(bd->bd_fcount); 2860 counter_u64_zero(bd->bd_wcount); 2861 counter_u64_zero(bd->bd_wfcount); 2862 counter_u64_zero(bd->bd_zcopy); 2863 BPFD_UNLOCK(bd); 2864 } 2865 BPFIF_RUNLOCK(bp); 2866 } 2867 BPF_UNLOCK(); 2868 } 2869 2870 /* 2871 * Fill filter statistics 2872 */ 2873 static void 2874 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd) 2875 { 2876 2877 bzero(d, sizeof(*d)); 2878 BPFD_LOCK_ASSERT(bd); 2879 d->bd_structsize = sizeof(*d); 2880 /* XXX: reading should be protected by global lock */ 2881 d->bd_immediate = bd->bd_immediate; 2882 d->bd_promisc = bd->bd_promisc; 2883 d->bd_hdrcmplt = bd->bd_hdrcmplt; 2884 d->bd_direction = bd->bd_direction; 2885 d->bd_feedback = bd->bd_feedback; 2886 d->bd_async = bd->bd_async; 2887 d->bd_rcount = counter_u64_fetch(bd->bd_rcount); 2888 d->bd_dcount = counter_u64_fetch(bd->bd_dcount); 2889 d->bd_fcount = counter_u64_fetch(bd->bd_fcount); 2890 d->bd_sig = bd->bd_sig; 2891 d->bd_slen = bd->bd_slen; 2892 d->bd_hlen = bd->bd_hlen; 2893 d->bd_bufsize = bd->bd_bufsize; 2894 d->bd_pid = bd->bd_pid; 2895 strlcpy(d->bd_ifname, 2896 bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ); 2897 d->bd_locked = bd->bd_locked; 2898 d->bd_wcount = counter_u64_fetch(bd->bd_wcount); 2899 d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount); 2900 d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount); 2901 d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy); 2902 d->bd_bufmode = bd->bd_bufmode; 2903 } 2904 2905 /* 2906 * Handle `netstat -B' stats request 2907 */ 2908 static int 2909 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS) 2910 { 2911 static const struct xbpf_d zerostats; 2912 struct xbpf_d *xbdbuf, *xbd, tempstats; 2913 int index, error; 2914 struct bpf_if *bp; 2915 struct bpf_d *bd; 2916 2917 /* 2918 * XXX This is not technically correct. It is possible for non 2919 * privileged users to open bpf devices. It would make sense 2920 * if the users who opened the devices were able to retrieve 2921 * the statistics for them, too. 2922 */ 2923 error = priv_check(req->td, PRIV_NET_BPF); 2924 if (error) 2925 return (error); 2926 /* 2927 * Check to see if the user is requesting that the counters be 2928 * zeroed out. Explicitly check that the supplied data is zeroed, 2929 * as we aren't allowing the user to set the counters currently. 2930 */ 2931 if (req->newptr != NULL) { 2932 if (req->newlen != sizeof(tempstats)) 2933 return (EINVAL); 2934 memset(&tempstats, 0, sizeof(tempstats)); 2935 error = SYSCTL_IN(req, &tempstats, sizeof(tempstats)); 2936 if (error) 2937 return (error); 2938 if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0) 2939 return (EINVAL); 2940 bpf_zero_counters(); 2941 return (0); 2942 } 2943 if (req->oldptr == NULL) 2944 return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd))); 2945 if (bpf_bpfd_cnt == 0) 2946 return (SYSCTL_OUT(req, 0, 0)); 2947 xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK); 2948 BPF_LOCK(); 2949 if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) { 2950 BPF_UNLOCK(); 2951 free(xbdbuf, M_BPF); 2952 return (ENOMEM); 2953 } 2954 index = 0; 2955 LIST_FOREACH(bp, &bpf_iflist, bif_next) { 2956 BPFIF_RLOCK(bp); 2957 /* Send writers-only first */ 2958 LIST_FOREACH(bd, &bp->bif_wlist, bd_next) { 2959 xbd = &xbdbuf[index++]; 2960 BPFD_LOCK(bd); 2961 bpfstats_fill_xbpf(xbd, bd); 2962 BPFD_UNLOCK(bd); 2963 } 2964 LIST_FOREACH(bd, &bp->bif_dlist, bd_next) { 2965 xbd = &xbdbuf[index++]; 2966 BPFD_LOCK(bd); 2967 bpfstats_fill_xbpf(xbd, bd); 2968 BPFD_UNLOCK(bd); 2969 } 2970 BPFIF_RUNLOCK(bp); 2971 } 2972 BPF_UNLOCK(); 2973 error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd)); 2974 free(xbdbuf, M_BPF); 2975 return (error); 2976 } 2977 2978 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL); 2979 2980 #else /* !DEV_BPF && !NETGRAPH_BPF */ 2981 /* 2982 * NOP stubs to allow bpf-using drivers to load and function. 2983 * 2984 * A 'better' implementation would allow the core bpf functionality 2985 * to be loaded at runtime. 2986 */ 2987 static struct bpf_if bp_null; 2988 2989 void 2990 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen) 2991 { 2992 } 2993 2994 void 2995 bpf_mtap(struct bpf_if *bp, struct mbuf *m) 2996 { 2997 } 2998 2999 void 3000 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m) 3001 { 3002 } 3003 3004 void 3005 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) 3006 { 3007 3008 bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf); 3009 } 3010 3011 void 3012 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp) 3013 { 3014 3015 *driverp = &bp_null; 3016 } 3017 3018 void 3019 bpfdetach(struct ifnet *ifp) 3020 { 3021 } 3022 3023 u_int 3024 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) 3025 { 3026 return -1; /* "no filter" behaviour */ 3027 } 3028 3029 int 3030 bpf_validate(const struct bpf_insn *f, int len) 3031 { 3032 return 0; /* false */ 3033 } 3034 3035 #endif /* !DEV_BPF && !NETGRAPH_BPF */ 3036 3037 #ifdef DDB 3038 static void 3039 bpf_show_bpf_if(struct bpf_if *bpf_if) 3040 { 3041 3042 if (bpf_if == NULL) 3043 return; 3044 db_printf("%p:\n", bpf_if); 3045 #define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e); 3046 /* bif_ext.bif_next */ 3047 /* bif_ext.bif_dlist */ 3048 BPF_DB_PRINTF("%#x", bif_dlt); 3049 BPF_DB_PRINTF("%u", bif_hdrlen); 3050 BPF_DB_PRINTF("%p", bif_ifp); 3051 /* bif_lock */ 3052 /* bif_wlist */ 3053 BPF_DB_PRINTF("%#x", bif_flags); 3054 } 3055 3056 DB_SHOW_COMMAND(bpf_if, db_show_bpf_if) 3057 { 3058 3059 if (!have_addr) { 3060 db_printf("usage: show bpf_if <struct bpf_if *>\n"); 3061 return; 3062 } 3063 3064 bpf_show_bpf_if((struct bpf_if *)addr); 3065 } 3066 #endif 3067