1 /* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 /* 41 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 42 * Use is subject to license terms. 43 * Copyright 2017 Joyent, Inc. 44 */ 45 46 /* 47 * The BPF implements the following access controls for zones attempting 48 * to read and write data. Writing of data requires that the net_rawaccess 49 * privilege is held whilst reading data requires either net_rawaccess or 50 * net_observerability. 51 * 52 * | Shared | Exclusive | Global 53 * -----------------------------+--------+------------+------------+ 54 * DLT_IPNET in local zone | Read | Read | Read | 55 * -----------------------------+--------+------------+------------+ 56 * Raw access to local zone NIC | None | Read/Write | Read/Write | 57 * -----------------------------+--------+------------+------------+ 58 * Raw access to all NICs | None | None | Read/Write | 59 * -----------------------------+--------+------------+------------+ 60 * 61 * The BPF driver is written as a cloning driver: each call to bpfopen() 62 * allocates a new minor number. This provides BPF with a 1:1 relationship 63 * between open's and close's. There is some amount of "descriptor state" 64 * that is kept per open. Pointers to this data are stored in a hash table 65 * (bpf_hash) that is index'd by the minor device number for each open file. 66 */ 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/time.h> 70 #include <sys/ioctl.h> 71 #include <sys/queue.h> 72 #include <sys/filio.h> 73 #include <sys/policy.h> 74 #include <sys/cmn_err.h> 75 #include <sys/uio.h> 76 #include <sys/file.h> 77 #include <sys/sysmacros.h> 78 #include <sys/zone.h> 79 80 #include <sys/socket.h> 81 #include <sys/errno.h> 82 #include <sys/poll.h> 83 #include <sys/dlpi.h> 84 #include <sys/neti.h> 85 86 #include <net/if.h> 87 88 #include <net/bpf.h> 89 #include <net/bpfdesc.h> 90 #include <net/dlt.h> 91 92 #include <netinet/in.h> 93 #include <sys/mac.h> 94 #include <sys/mac_client.h> 95 #include <sys/mac_impl.h> 96 #include <sys/time_std_impl.h> 97 #include <sys/hook.h> 98 #include <sys/hook_event.h> 99 100 101 #define mtod(_v, _t) (_t)((_v)->b_rptr) 102 #define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr) 103 104 /* 105 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 106 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 107 */ 108 #define BPF_BUFSIZE (32 * 1024) 109 110 typedef void *(*cp_fn_t)(void *, const void *, size_t); 111 112 /* 113 * The default read buffer size, and limit for BIOCSBLEN. 114 */ 115 int bpf_bufsize = BPF_BUFSIZE; 116 int bpf_maxbufsize = (16 * 1024 * 1024); 117 static mod_hash_t *bpf_hash = NULL; 118 119 /* 120 * Use a mutex to avoid a race condition between gathering the stats/peers 121 * and opening/closing the device. 122 */ 123 static kcondvar_t bpf_dlt_waiter; 124 static kmutex_t bpf_mtx; 125 static bpf_kstats_t ks_stats; 126 static bpf_kstats_t bpf_kstats = { 127 { "readWait", KSTAT_DATA_UINT64 }, 128 { "writeOk", KSTAT_DATA_UINT64 }, 129 { "writeError", KSTAT_DATA_UINT64 }, 130 { "receive", KSTAT_DATA_UINT64 }, 131 { "captured", KSTAT_DATA_UINT64 }, 132 { "dropped", KSTAT_DATA_UINT64 }, 133 }; 134 static kstat_t *bpf_ksp; 135 136 /* 137 * bpf_list is a list of the BPF descriptors currently open 138 */ 139 LIST_HEAD(, bpf_d) bpf_list; 140 141 static int bpf_allocbufs(struct bpf_d *); 142 static void bpf_clear_timeout(struct bpf_d *); 143 static void bpf_deliver(struct bpf_d *, cp_fn_t, 144 void *, uint_t, uint_t, boolean_t); 145 static void bpf_freed(struct bpf_d *); 146 static int bpf_ifname(struct bpf_d *d, char *, int); 147 static void *bpf_mcpy(void *, const void *, size_t); 148 static int bpf_attachd(struct bpf_d *, const char *, int); 149 static void bpf_detachd(struct bpf_d *); 150 static int bpf_setif(struct bpf_d *, char *, int); 151 static void bpf_timed_out(void *); 152 static inline void 153 bpf_wakeup(struct bpf_d *); 154 static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t, 155 cp_fn_t, struct timeval *); 156 static void reset_d(struct bpf_d *); 157 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 158 static int bpf_setdlt(struct bpf_d *, void *); 159 static void bpf_dev_add(struct bpf_d *); 160 static struct bpf_d *bpf_dev_find(minor_t); 161 static struct bpf_d *bpf_dev_get(minor_t); 162 static void bpf_dev_remove(struct bpf_d *); 163 164 static int 165 bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp) 166 { 167 mblk_t *m; 168 int error; 169 int len; 170 int hlen; 171 int align; 172 173 /* 174 * Build a sockaddr based on the data link layer type. 175 * We do this at this level because the ethernet header 176 * is copied directly into the data field of the sockaddr. 177 * In the case of SLIP, there is no header and the packet 178 * is forwarded as is. 179 * Also, we are careful to leave room at the front of the mbuf 180 * for the link level header. 181 */ 182 switch (linktype) { 183 184 case DLT_EN10MB: 185 hlen = sizeof (struct ether_header); 186 break; 187 188 case DLT_FDDI: 189 hlen = 16; 190 break; 191 192 case DLT_NULL: 193 hlen = 0; 194 break; 195 196 case DLT_IPOIB: 197 hlen = 44; 198 break; 199 200 default: 201 return (EIO); 202 } 203 204 align = 4 - (hlen & 3); 205 206 len = uio->uio_resid; 207 /* 208 * If there aren't enough bytes for a link level header or the 209 * packet length exceeds the interface mtu, return an error. 210 */ 211 if (len < hlen || len - hlen > mtu) 212 return (EMSGSIZE); 213 214 m = allocb(len + align, BPRI_MED); 215 if (m == NULL) { 216 error = ENOBUFS; 217 goto bad; 218 } 219 220 /* Insure the data is properly aligned */ 221 if (align > 0) 222 m->b_rptr += align; 223 m->b_wptr = m->b_rptr + len; 224 225 error = uiomove(mtod(m, void *), len, UIO_WRITE, uio); 226 if (error) 227 goto bad; 228 *mp = m; 229 return (0); 230 231 bad: 232 if (m != NULL) 233 freemsg(m); 234 return (error); 235 } 236 237 238 /* 239 * Attach file to the bpf interface, i.e. make d listen on bp. 240 */ 241 static int 242 bpf_attachd(struct bpf_d *d, const char *ifname, int dlt) 243 { 244 bpf_provider_list_t *bp; 245 bpf_provider_t *bpr; 246 boolean_t zonematch; 247 zoneid_t niczone; 248 uintptr_t mcip; 249 zoneid_t zone; 250 uint_t nicdlt; 251 uintptr_t mh; 252 int hdrlen; 253 int error; 254 255 ASSERT(d->bd_bif == (uintptr_t)NULL); 256 ASSERT(d->bd_mcip == (uintptr_t)NULL); 257 zone = d->bd_zone; 258 zonematch = B_TRUE; 259 error = 0; 260 bpr = NULL; 261 again: 262 mh = 0; 263 mcip = 0; 264 LIST_FOREACH(bp, &bpf_providers, bpl_next) { 265 bpr = bp->bpl_what; 266 error = MBPF_OPEN(bpr, ifname, &mh, zone); 267 if (error != 0) 268 goto next; 269 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 270 if (error != 0) 271 goto next; 272 error = MBPF_GET_DLT(bpr, mh, &nicdlt); 273 if (error != 0) 274 goto next; 275 276 nicdlt = bpf_dl_to_dlt(nicdlt); 277 if (dlt != -1 && dlt != nicdlt) { 278 error = ENOENT; 279 goto next; 280 } 281 282 error = MBPF_GET_ZONE(bpr, mh, &niczone); 283 if (error != 0) 284 goto next; 285 286 DTRACE_PROBE4(bpf__attach, struct bpf_provider_s *, bpr, 287 uintptr_t, mh, int, nicdlt, zoneid_t, niczone); 288 289 if (zonematch && niczone != zone) { 290 error = ENOENT; 291 goto next; 292 } 293 break; 294 next: 295 if (mcip != 0) { 296 MBPF_CLIENT_CLOSE(bpr, mcip); 297 mcip = 0; 298 } 299 if (mh != 0) { 300 MBPF_CLOSE(bpr, mh); 301 mh = 0; 302 } 303 } 304 if (error != 0) { 305 if (zonematch && (zone == GLOBAL_ZONEID)) { 306 /* 307 * If we failed to do an exact match for the global 308 * zone using the global zoneid, try again in case 309 * the network interface is owned by a local zone. 310 */ 311 zonematch = B_FALSE; 312 goto again; 313 } 314 return (error); 315 } 316 317 /* No providers? */ 318 if (bpr == NULL) 319 return (ENOENT); 320 321 d->bd_mac = *bpr; 322 d->bd_mcip = mcip; 323 d->bd_bif = mh; 324 d->bd_dlt = nicdlt; 325 hdrlen = bpf_dl_hdrsize(nicdlt); 326 d->bd_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 327 328 (void) strlcpy(d->bd_ifname, MBPF_CLIENT_NAME(&d->bd_mac, mcip), 329 sizeof (d->bd_ifname)); 330 331 (void) MBPF_GET_LINKID(&d->bd_mac, d->bd_ifname, &d->bd_linkid, 332 zone); 333 (void) MBPF_PROMISC_ADD(&d->bd_mac, d->bd_mcip, 0, d, 334 &d->bd_promisc_handle, d->bd_promisc_flags); 335 return (0); 336 } 337 338 /* 339 * Detach a file from its interface. 340 */ 341 static void 342 bpf_detachd(struct bpf_d *d) 343 { 344 uintptr_t mph; 345 uintptr_t mch; 346 uintptr_t mh; 347 348 ASSERT(d->bd_inuse == -1); 349 mch = d->bd_mcip; 350 d->bd_mcip = 0; 351 mh = d->bd_bif; 352 d->bd_bif = 0; 353 354 /* 355 * Check if this descriptor had requested promiscuous mode. 356 * If so, turn it off. There's no need to take any action 357 * here, that is done when MBPF_PROMISC_REMOVE is used; 358 * bd_promisc is just a local flag to stop promiscuous mode 359 * from being set more than once. 360 */ 361 if (d->bd_promisc) 362 d->bd_promisc = 0; 363 364 /* 365 * Take device out of "promiscuous" mode. Since we were able to 366 * enter "promiscuous" mode, we should be able to turn it off. 367 * Note, this field stores a pointer used to support both 368 * promiscuous and non-promiscuous callbacks for packets. 369 */ 370 mph = d->bd_promisc_handle; 371 d->bd_promisc_handle = 0; 372 373 /* 374 * The lock has to be dropped here because mac_promisc_remove may 375 * need to wait for mac_promisc_dispatch, which has called into 376 * bpf and catchpacket is waiting for bd_lock... 377 * i.e mac_promisc_remove() needs to be called with none of the 378 * locks held that are part of the bpf_mtap() call path. 379 */ 380 mutex_exit(&d->bd_lock); 381 if (mph != 0) 382 MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 383 384 if (mch != 0) 385 MBPF_CLIENT_CLOSE(&d->bd_mac, mch); 386 387 if (mh != 0) 388 MBPF_CLOSE(&d->bd_mac, mh); 389 390 /* 391 * Because this function is called with bd_lock held, so it must 392 * exit with it held. 393 */ 394 mutex_enter(&d->bd_lock); 395 *d->bd_ifname = '\0'; 396 (void) memset(&d->bd_mac, 0, sizeof (d->bd_mac)); 397 } 398 399 400 /* 401 * bpfilterattach() is called at load time. 402 */ 403 int 404 bpfilterattach(void) 405 { 406 407 bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31, 408 mod_hash_null_keydtor); 409 if (bpf_hash == NULL) 410 return (ENOMEM); 411 412 (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats)); 413 414 bpf_ksp = kstat_create("bpf", 0, "global", "misc", 415 KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t), 416 KSTAT_FLAG_VIRTUAL); 417 if (bpf_ksp != NULL) { 418 bpf_ksp->ks_data = &ks_stats; 419 kstat_install(bpf_ksp); 420 } else { 421 mod_hash_destroy_idhash(bpf_hash); 422 bpf_hash = NULL; 423 return (EEXIST); 424 } 425 426 cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL); 427 mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL); 428 429 LIST_INIT(&bpf_list); 430 431 return (0); 432 } 433 434 435 /* 436 * bpfilterdetach() is called at unload time. 437 */ 438 int 439 bpfilterdetach(void) 440 { 441 442 if (bpf_ksp != NULL) { 443 kstat_delete(bpf_ksp); 444 bpf_ksp = NULL; 445 } 446 447 mod_hash_destroy_idhash(bpf_hash); 448 bpf_hash = NULL; 449 450 cv_destroy(&bpf_dlt_waiter); 451 mutex_destroy(&bpf_mtx); 452 453 return (0); 454 } 455 456 /* 457 * Open ethernet device. Clones. 458 */ 459 /* ARGSUSED */ 460 int 461 bpfopen(dev_t *devp, int flag, int mode, cred_t *cred) 462 { 463 struct bpf_d *d; 464 uint_t dmin; 465 466 /* 467 * The security policy described at the top of this file is 468 * enforced here. 469 */ 470 if ((flag & FWRITE) != 0) { 471 if (secpolicy_net_rawaccess(cred) != 0) 472 return (EACCES); 473 } 474 475 if ((flag & FREAD) != 0) { 476 if ((secpolicy_net_observability(cred) != 0) && 477 (secpolicy_net_rawaccess(cred) != 0)) 478 return (EACCES); 479 } 480 481 if ((flag & (FWRITE|FREAD)) == 0) 482 return (ENXIO); 483 484 /* 485 * A structure is allocated per open file in BPF to store settings 486 * such as buffer capture size, provide private buffers, etc. 487 */ 488 d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP); 489 d->bd_bufsize = bpf_bufsize; 490 d->bd_fmode = flag; 491 d->bd_zone = crgetzoneid(cred); 492 d->bd_seesent = 1; 493 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS| 494 MAC_PROMISC_FLAGS_NO_COPY; 495 mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL); 496 cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL); 497 498 mutex_enter(&bpf_mtx); 499 /* 500 * Find an unused minor number. Obviously this is an O(n) algorithm 501 * and doesn't scale particularly well, so if there are large numbers 502 * of open file descriptors happening in real use, this design may 503 * need to be revisited. 504 */ 505 for (dmin = 0; dmin < L_MAXMIN; dmin++) 506 if (bpf_dev_find(dmin) == NULL) 507 break; 508 if (dmin == L_MAXMIN) { 509 mutex_exit(&bpf_mtx); 510 kmem_free(d, sizeof (*d)); 511 return (ENXIO); 512 } 513 d->bd_dev = dmin; 514 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 515 bpf_dev_add(d); 516 mutex_exit(&bpf_mtx); 517 518 *devp = makedevice(getmajor(*devp), dmin); 519 520 return (0); 521 } 522 523 /* 524 * Close the descriptor by detaching it from its interface, 525 * deallocating its buffers, and marking it free. 526 * 527 * Because we only allow a device to be opened once, there is always a 528 * 1 to 1 relationship between opens and closes supporting this function. 529 */ 530 /* ARGSUSED */ 531 int 532 bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p) 533 { 534 struct bpf_d *d = bpf_dev_get(getminor(dev)); 535 536 mutex_enter(&d->bd_lock); 537 538 while (d->bd_inuse != 0) { 539 d->bd_waiting++; 540 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 541 d->bd_waiting--; 542 mutex_exit(&d->bd_lock); 543 return (EINTR); 544 } 545 d->bd_waiting--; 546 } 547 548 d->bd_inuse = -1; 549 if (d->bd_state == BPF_WAITING) 550 bpf_clear_timeout(d); 551 d->bd_state = BPF_IDLE; 552 if (d->bd_bif) 553 bpf_detachd(d); 554 mutex_exit(&d->bd_lock); 555 556 mutex_enter(&bpf_mtx); 557 LIST_REMOVE(d, bd_list); 558 bpf_dev_remove(d); 559 mutex_exit(&bpf_mtx); 560 561 mutex_enter(&d->bd_lock); 562 mutex_destroy(&d->bd_lock); 563 cv_destroy(&d->bd_wait); 564 565 bpf_freed(d); 566 kmem_free(d, sizeof (*d)); 567 568 return (0); 569 } 570 571 /* 572 * Rotate the packet buffers in descriptor d. Move the store buffer 573 * into the hold slot, and the free buffer into the store slot. 574 * Zero the length of the new store buffer. 575 */ 576 #define ROTATE_BUFFERS(d) \ 577 (d)->bd_hbuf = (d)->bd_sbuf; \ 578 (d)->bd_hlen = (d)->bd_slen; \ 579 (d)->bd_sbuf = (d)->bd_fbuf; \ 580 (d)->bd_slen = 0; \ 581 (d)->bd_fbuf = 0; 582 /* 583 * bpfread - read next chunk of packets from buffers 584 */ 585 /* ARGSUSED */ 586 int 587 bpfread(dev_t dev, struct uio *uio, cred_t *cred) 588 { 589 struct bpf_d *d = bpf_dev_get(getminor(dev)); 590 int timed_out; 591 ulong_t delay; 592 int error; 593 594 if ((d->bd_fmode & FREAD) == 0) 595 return (EBADF); 596 597 /* 598 * Restrict application to use a buffer the same size as 599 * the kernel buffers. 600 */ 601 if (uio->uio_resid != d->bd_bufsize) 602 return (EINVAL); 603 604 mutex_enter(&d->bd_lock); 605 if (d->bd_state == BPF_WAITING) 606 bpf_clear_timeout(d); 607 timed_out = (d->bd_state == BPF_TIMED_OUT); 608 d->bd_state = BPF_IDLE; 609 /* 610 * If the hold buffer is empty, then do a timed sleep, which 611 * ends when the timeout expires or when enough packets 612 * have arrived to fill the store buffer. 613 */ 614 while (d->bd_hbuf == 0) { 615 if (d->bd_nonblock) { 616 if (d->bd_slen == 0) { 617 mutex_exit(&d->bd_lock); 618 return (EWOULDBLOCK); 619 } 620 ROTATE_BUFFERS(d); 621 break; 622 } 623 624 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 625 /* 626 * A packet(s) either arrived since the previous 627 * read or arrived while we were asleep. 628 * Rotate the buffers and return what's here. 629 */ 630 ROTATE_BUFFERS(d); 631 break; 632 } 633 ks_stats.kp_read_wait.value.ui64++; 634 delay = ddi_get_lbolt() + d->bd_rtout; 635 error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay); 636 if (error == 0) { 637 mutex_exit(&d->bd_lock); 638 return (EINTR); 639 } 640 if (error == -1) { 641 /* 642 * On a timeout, return what's in the buffer, 643 * which may be nothing. If there is something 644 * in the store buffer, we can rotate the buffers. 645 */ 646 if (d->bd_hbuf) 647 /* 648 * We filled up the buffer in between 649 * getting the timeout and arriving 650 * here, so we don't need to rotate. 651 */ 652 break; 653 654 if (d->bd_slen == 0) { 655 mutex_exit(&d->bd_lock); 656 return (0); 657 } 658 ROTATE_BUFFERS(d); 659 } 660 } 661 /* 662 * At this point, we know we have something in the hold slot. 663 */ 664 mutex_exit(&d->bd_lock); 665 666 /* 667 * Move data from hold buffer into user space. 668 * We know the entire buffer is transferred since 669 * we checked above that the read buffer is bpf_bufsize bytes. 670 */ 671 error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); 672 673 mutex_enter(&d->bd_lock); 674 d->bd_fbuf = d->bd_hbuf; 675 d->bd_hbuf = 0; 676 d->bd_hlen = 0; 677 done: 678 mutex_exit(&d->bd_lock); 679 return (error); 680 } 681 682 683 /* 684 * If there are processes sleeping on this descriptor, wake them up. 685 * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver, 686 * so there is no code here grabbing it. 687 */ 688 static inline void 689 bpf_wakeup(struct bpf_d *d) 690 { 691 cv_signal(&d->bd_wait); 692 } 693 694 static void 695 bpf_timed_out(void *arg) 696 { 697 struct bpf_d *d = arg; 698 699 mutex_enter(&d->bd_lock); 700 if (d->bd_state == BPF_WAITING) { 701 d->bd_state = BPF_TIMED_OUT; 702 if (d->bd_slen != 0) 703 cv_signal(&d->bd_wait); 704 } 705 mutex_exit(&d->bd_lock); 706 } 707 708 709 /* ARGSUSED */ 710 int 711 bpfwrite(dev_t dev, struct uio *uio, cred_t *cred) 712 { 713 struct bpf_d *d = bpf_dev_get(getminor(dev)); 714 uintptr_t mch; 715 uint_t mtu; 716 mblk_t *m; 717 int error; 718 int dlt; 719 720 if ((d->bd_fmode & FWRITE) == 0) 721 return (EBADF); 722 723 mutex_enter(&d->bd_lock); 724 if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif == 0) { 725 mutex_exit(&d->bd_lock); 726 return (EINTR); 727 } 728 729 if (uio->uio_resid == 0) { 730 mutex_exit(&d->bd_lock); 731 return (0); 732 } 733 734 while (d->bd_inuse < 0) { 735 d->bd_waiting++; 736 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 737 d->bd_waiting--; 738 mutex_exit(&d->bd_lock); 739 return (EINTR); 740 } 741 d->bd_waiting--; 742 } 743 744 mutex_exit(&d->bd_lock); 745 746 dlt = d->bd_dlt; 747 mch = d->bd_mcip; 748 MBPF_SDU_GET(&d->bd_mac, d->bd_bif, &mtu); 749 d->bd_inuse++; 750 751 m = NULL; 752 if (dlt == DLT_IPNET) { 753 error = EIO; 754 goto done; 755 } 756 757 error = bpf_movein(uio, dlt, mtu, &m); 758 if (error) 759 goto done; 760 761 DTRACE_PROBE4(bpf__tx, struct bpf_d *, d, int, dlt, 762 uint_t, mtu, mblk_t *, m); 763 764 if (M_LEN(m) > mtu) { 765 error = EMSGSIZE; 766 goto done; 767 } 768 769 error = MBPF_TX(&d->bd_mac, mch, m); 770 /* 771 * The "tx" action here is required to consume the mblk_t. 772 */ 773 m = NULL; 774 775 done: 776 if (error == 0) 777 ks_stats.kp_write_ok.value.ui64++; 778 else 779 ks_stats.kp_write_error.value.ui64++; 780 if (m != NULL) 781 freemsg(m); 782 783 mutex_enter(&d->bd_lock); 784 d->bd_inuse--; 785 if ((d->bd_inuse == 0) && (d->bd_waiting != 0)) 786 cv_signal(&d->bd_wait); 787 mutex_exit(&d->bd_lock); 788 789 /* 790 * The driver frees the mbuf. 791 */ 792 return (error); 793 } 794 795 796 /* 797 * Reset a descriptor by flushing its packet buffer and clearing the 798 * receive and drop counts. Should be called at splnet. 799 */ 800 static void 801 reset_d(struct bpf_d *d) 802 { 803 if (d->bd_hbuf) { 804 /* Free the hold buffer. */ 805 d->bd_fbuf = d->bd_hbuf; 806 d->bd_hbuf = 0; 807 } 808 d->bd_slen = 0; 809 d->bd_hlen = 0; 810 d->bd_rcount = 0; 811 d->bd_dcount = 0; 812 d->bd_ccount = 0; 813 } 814 815 /* 816 * FIONREAD Check for read packet available. 817 * BIOCGBLEN Get buffer len [for read()]. 818 * BIOCSETF Set ethernet read filter. 819 * BIOCFLUSH Flush read packet buffer. 820 * BIOCPROMISC Put interface into promiscuous mode. 821 * BIOCGDLT Get link layer type. 822 * BIOCGETIF Get interface name. 823 * BIOCSETIF Set interface. 824 * BIOCSRTIMEOUT Set read timeout. 825 * BIOCGRTIMEOUT Get read timeout. 826 * BIOCGSTATS Get packet stats. 827 * BIOCIMMEDIATE Set immediate mode. 828 * BIOCVERSION Get filter language version. 829 * BIOCGHDRCMPLT Get "header already complete" flag. 830 * BIOCSHDRCMPLT Set "header already complete" flag. 831 */ 832 /* ARGSUSED */ 833 int 834 bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval) 835 { 836 struct bpf_d *d = bpf_dev_get(getminor(dev)); 837 struct bpf_program prog; 838 struct lifreq lifreq; 839 struct ifreq ifreq; 840 int error = 0; 841 uint_t size; 842 843 /* 844 * Refresh the PID associated with this bpf file. 845 */ 846 mutex_enter(&d->bd_lock); 847 if (d->bd_state == BPF_WAITING) 848 bpf_clear_timeout(d); 849 d->bd_state = BPF_IDLE; 850 mutex_exit(&d->bd_lock); 851 852 switch (cmd) { 853 854 default: 855 error = EINVAL; 856 break; 857 858 /* 859 * Check for read packet available. 860 */ 861 case FIONREAD: 862 { 863 int n; 864 865 mutex_enter(&d->bd_lock); 866 n = d->bd_slen; 867 if (d->bd_hbuf) 868 n += d->bd_hlen; 869 mutex_exit(&d->bd_lock); 870 871 *(int *)addr = n; 872 break; 873 } 874 875 /* 876 * Get buffer len [for read()]. 877 */ 878 case BIOCGBLEN: 879 error = copyout(&d->bd_bufsize, (void *)addr, 880 sizeof (d->bd_bufsize)); 881 break; 882 883 /* 884 * Set buffer length. 885 */ 886 case BIOCSBLEN: 887 if (copyin((void *)addr, &size, sizeof (size)) != 0) { 888 error = EFAULT; 889 break; 890 } 891 892 mutex_enter(&d->bd_lock); 893 if (d->bd_bif != 0) { 894 error = EINVAL; 895 } else { 896 if (size > bpf_maxbufsize) 897 size = bpf_maxbufsize; 898 else if (size < BPF_MINBUFSIZE) 899 size = BPF_MINBUFSIZE; 900 901 d->bd_bufsize = size; 902 } 903 mutex_exit(&d->bd_lock); 904 905 if (error == 0) 906 error = copyout(&size, (void *)addr, sizeof (size)); 907 break; 908 909 /* 910 * Set link layer read filter. 911 */ 912 case BIOCSETF: 913 if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) { 914 error = EFAULT; 915 break; 916 } 917 error = bpf_setf(d, &prog); 918 break; 919 920 /* 921 * Flush read packet buffer. 922 */ 923 case BIOCFLUSH: 924 mutex_enter(&d->bd_lock); 925 reset_d(d); 926 mutex_exit(&d->bd_lock); 927 break; 928 929 /* 930 * Put interface into promiscuous mode. 931 * This is a one-way ioctl, it is not used to turn promiscuous 932 * mode off. 933 */ 934 case BIOCPROMISC: 935 if (d->bd_bif == 0) { 936 /* 937 * No interface attached yet. 938 */ 939 error = EINVAL; 940 break; 941 } 942 mutex_enter(&d->bd_lock); 943 if (d->bd_promisc == 0) { 944 945 if (d->bd_promisc_handle) { 946 uintptr_t mph; 947 948 mph = d->bd_promisc_handle; 949 d->bd_promisc_handle = 0; 950 951 mutex_exit(&d->bd_lock); 952 MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 953 mutex_enter(&d->bd_lock); 954 } 955 956 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY; 957 error = MBPF_PROMISC_ADD(&d->bd_mac, 958 d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d, 959 &d->bd_promisc_handle, d->bd_promisc_flags); 960 if (error == 0) 961 d->bd_promisc = 1; 962 } 963 mutex_exit(&d->bd_lock); 964 break; 965 966 /* 967 * Get device parameters. 968 */ 969 case BIOCGDLT: 970 if (d->bd_bif == 0) 971 error = EINVAL; 972 else 973 error = copyout(&d->bd_dlt, (void *)addr, 974 sizeof (d->bd_dlt)); 975 break; 976 977 /* 978 * Get a list of supported device parameters. 979 */ 980 case BIOCGDLTLIST: 981 if (d->bd_bif == 0) { 982 error = EINVAL; 983 } else { 984 struct bpf_dltlist list; 985 986 if (copyin((void *)addr, &list, sizeof (list)) != 0) { 987 error = EFAULT; 988 break; 989 } 990 error = bpf_getdltlist(d, &list); 991 if ((error == 0) && 992 copyout(&list, (void *)addr, sizeof (list)) != 0) 993 error = EFAULT; 994 } 995 break; 996 997 /* 998 * Set device parameters. 999 */ 1000 case BIOCSDLT: 1001 error = bpf_setdlt(d, (void *)addr); 1002 break; 1003 1004 /* 1005 * Get interface name. 1006 */ 1007 case BIOCGETIF: 1008 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 1009 error = EFAULT; 1010 break; 1011 } 1012 error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 1013 if ((error == 0) && 1014 copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) { 1015 error = EFAULT; 1016 break; 1017 } 1018 break; 1019 1020 /* 1021 * Set interface. 1022 */ 1023 case BIOCSETIF: 1024 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 1025 error = EFAULT; 1026 break; 1027 } 1028 error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 1029 break; 1030 1031 /* 1032 * Get interface name. 1033 */ 1034 case BIOCGETLIF: 1035 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 1036 error = EFAULT; 1037 break; 1038 } 1039 error = bpf_ifname(d, lifreq.lifr_name, 1040 sizeof (lifreq.lifr_name)); 1041 if ((error == 0) && 1042 copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) { 1043 error = EFAULT; 1044 break; 1045 } 1046 break; 1047 1048 /* 1049 * Set interface. 1050 */ 1051 case BIOCSETLIF: 1052 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 1053 error = EFAULT; 1054 break; 1055 } 1056 error = bpf_setif(d, lifreq.lifr_name, 1057 sizeof (lifreq.lifr_name)); 1058 break; 1059 1060 #ifdef _SYSCALL32_IMPL 1061 /* 1062 * Set read timeout. 1063 */ 1064 case BIOCSRTIMEOUT32: 1065 { 1066 struct timeval32 tv; 1067 1068 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 1069 error = EFAULT; 1070 break; 1071 } 1072 1073 /* Convert the timeout in microseconds to ticks */ 1074 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 1075 tv.tv_usec); 1076 if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 1077 d->bd_rtout = 1; 1078 break; 1079 } 1080 1081 /* 1082 * Get read timeout. 1083 */ 1084 case BIOCGRTIMEOUT32: 1085 { 1086 struct timeval32 tv; 1087 clock_t ticks; 1088 1089 ticks = drv_hztousec(d->bd_rtout); 1090 tv.tv_sec = ticks / 1000000; 1091 tv.tv_usec = ticks - (tv.tv_sec * 1000000); 1092 error = copyout(&tv, (void *)addr, sizeof (tv)); 1093 break; 1094 } 1095 1096 /* 1097 * Get a list of supported device parameters. 1098 */ 1099 case BIOCGDLTLIST32: 1100 if (d->bd_bif == 0) { 1101 error = EINVAL; 1102 } else { 1103 struct bpf_dltlist32 lst32; 1104 struct bpf_dltlist list; 1105 1106 if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) { 1107 error = EFAULT; 1108 break; 1109 } 1110 1111 list.bfl_len = lst32.bfl_len; 1112 list.bfl_list = (void *)(uint64_t)lst32.bfl_list; 1113 error = bpf_getdltlist(d, &list); 1114 if (error == 0) { 1115 lst32.bfl_len = list.bfl_len; 1116 1117 if (copyout(&lst32, (void *)addr, 1118 sizeof (lst32)) != 0) 1119 error = EFAULT; 1120 } 1121 } 1122 break; 1123 1124 /* 1125 * Set link layer read filter. 1126 */ 1127 case BIOCSETF32: { 1128 struct bpf_program32 prog32; 1129 1130 if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) { 1131 error = EFAULT; 1132 break; 1133 } 1134 prog.bf_len = prog32.bf_len; 1135 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns; 1136 error = bpf_setf(d, &prog); 1137 break; 1138 } 1139 #endif 1140 1141 /* 1142 * Set read timeout. 1143 */ 1144 case BIOCSRTIMEOUT: 1145 { 1146 struct timeval tv; 1147 1148 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 1149 error = EFAULT; 1150 break; 1151 } 1152 1153 /* Convert the timeout in microseconds to ticks */ 1154 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 1155 tv.tv_usec); 1156 if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 1157 d->bd_rtout = 1; 1158 break; 1159 } 1160 1161 /* 1162 * Get read timeout. 1163 */ 1164 case BIOCGRTIMEOUT: 1165 { 1166 struct timeval tv; 1167 clock_t ticks; 1168 1169 ticks = drv_hztousec(d->bd_rtout); 1170 tv.tv_sec = ticks / 1000000; 1171 tv.tv_usec = ticks - (tv.tv_sec * 1000000); 1172 if (copyout(&tv, (void *)addr, sizeof (tv)) != 0) 1173 error = EFAULT; 1174 break; 1175 } 1176 1177 /* 1178 * Get packet stats. 1179 */ 1180 case BIOCGSTATS: 1181 { 1182 struct bpf_stat bs; 1183 1184 bs.bs_recv = d->bd_rcount; 1185 bs.bs_drop = d->bd_dcount; 1186 bs.bs_capt = d->bd_ccount; 1187 if (copyout(&bs, (void *)addr, sizeof (bs)) != 0) 1188 error = EFAULT; 1189 break; 1190 } 1191 1192 /* 1193 * Set immediate mode. 1194 */ 1195 case BIOCIMMEDIATE: 1196 if (copyin((void *)addr, &d->bd_immediate, 1197 sizeof (d->bd_immediate)) != 0) 1198 error = EFAULT; 1199 break; 1200 1201 case BIOCVERSION: 1202 { 1203 struct bpf_version bv; 1204 1205 bv.bv_major = BPF_MAJOR_VERSION; 1206 bv.bv_minor = BPF_MINOR_VERSION; 1207 if (copyout(&bv, (void *)addr, sizeof (bv)) != 0) 1208 error = EFAULT; 1209 break; 1210 } 1211 1212 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1213 if (copyout(&d->bd_hdrcmplt, (void *)addr, 1214 sizeof (d->bd_hdrcmplt)) != 0) 1215 error = EFAULT; 1216 break; 1217 1218 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1219 if (copyin((void *)addr, &d->bd_hdrcmplt, 1220 sizeof (d->bd_hdrcmplt)) != 0) 1221 error = EFAULT; 1222 break; 1223 1224 /* 1225 * Get "see sent packets" flag 1226 */ 1227 case BIOCGSEESENT: 1228 if (copyout(&d->bd_seesent, (void *)addr, 1229 sizeof (d->bd_seesent)) != 0) 1230 error = EFAULT; 1231 break; 1232 1233 /* 1234 * Set "see sent" packets flag 1235 */ 1236 case BIOCSSEESENT: 1237 if (copyin((void *)addr, &d->bd_seesent, 1238 sizeof (d->bd_seesent)) != 0) 1239 error = EFAULT; 1240 break; 1241 1242 case FIONBIO: /* Non-blocking I/O */ 1243 if (copyin((void *)addr, &d->bd_nonblock, 1244 sizeof (d->bd_nonblock)) != 0) 1245 error = EFAULT; 1246 break; 1247 } 1248 return (error); 1249 } 1250 1251 /* 1252 * Set d's packet filter program to fp. If this file already has a filter, 1253 * free it and replace it. If the new filter is "empty" (has a 0 size), then 1254 * the result is to just remove and free the existing filter. 1255 * Returns EINVAL for bogus requests. 1256 */ 1257 int 1258 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1259 { 1260 struct bpf_insn *fcode, *old; 1261 uint_t flen, size; 1262 size_t oldsize; 1263 1264 if (fp->bf_insns == 0) { 1265 if (fp->bf_len != 0) 1266 return (EINVAL); 1267 mutex_enter(&d->bd_lock); 1268 old = d->bd_filter; 1269 oldsize = d->bd_filter_size; 1270 d->bd_filter = 0; 1271 d->bd_filter_size = 0; 1272 reset_d(d); 1273 mutex_exit(&d->bd_lock); 1274 if (old != 0) 1275 kmem_free(old, oldsize); 1276 return (0); 1277 } 1278 flen = fp->bf_len; 1279 if (flen > BPF_MAXINSNS) 1280 return (EINVAL); 1281 1282 size = flen * sizeof (*fp->bf_insns); 1283 fcode = kmem_alloc(size, KM_SLEEP); 1284 if (copyin(fp->bf_insns, fcode, size) != 0) 1285 return (EFAULT); 1286 1287 if (bpf_validate(fcode, (int)flen)) { 1288 mutex_enter(&d->bd_lock); 1289 old = d->bd_filter; 1290 oldsize = d->bd_filter_size; 1291 d->bd_filter = fcode; 1292 d->bd_filter_size = size; 1293 reset_d(d); 1294 mutex_exit(&d->bd_lock); 1295 if (old != 0) 1296 kmem_free(old, oldsize); 1297 1298 return (0); 1299 } 1300 kmem_free(fcode, size); 1301 return (EINVAL); 1302 } 1303 1304 /* 1305 * Detach a file from its current interface (if attached at all) and attach 1306 * to the interface indicated by the name stored in ifname. 1307 * Return an errno or 0. 1308 */ 1309 static int 1310 bpf_setif(struct bpf_d *d, char *ifname, int namesize) 1311 { 1312 int unit_seen; 1313 int error = 0; 1314 char *cp; 1315 int i; 1316 1317 /* 1318 * Make sure the provided name has a unit number, and default 1319 * it to '0' if not specified. 1320 * XXX This is ugly ... do this differently? 1321 */ 1322 unit_seen = 0; 1323 cp = ifname; 1324 cp[namesize - 1] = '\0'; /* sanity */ 1325 while (*cp++) 1326 if (*cp >= '0' && *cp <= '9') 1327 unit_seen = 1; 1328 if (!unit_seen) { 1329 /* Make sure to leave room for the '\0'. */ 1330 for (i = 0; i < (namesize - 1); ++i) { 1331 if ((ifname[i] >= 'a' && ifname[i] <= 'z') || 1332 (ifname[i] >= 'A' && ifname[i] <= 'Z')) 1333 continue; 1334 ifname[i] = '0'; 1335 } 1336 } 1337 1338 /* 1339 * Make sure that only one call to this function happens at a time 1340 * and that we're not interleaving a read/write 1341 */ 1342 mutex_enter(&d->bd_lock); 1343 while (d->bd_inuse != 0) { 1344 d->bd_waiting++; 1345 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 1346 d->bd_waiting--; 1347 mutex_exit(&d->bd_lock); 1348 return (EINTR); 1349 } 1350 d->bd_waiting--; 1351 } 1352 d->bd_inuse = -1; 1353 mutex_exit(&d->bd_lock); 1354 1355 if (d->bd_sbuf == 0) 1356 error = bpf_allocbufs(d); 1357 1358 if (error == 0) { 1359 mutex_enter(&d->bd_lock); 1360 if (d->bd_bif) 1361 /* 1362 * Detach if attached to something else. 1363 */ 1364 bpf_detachd(d); 1365 1366 error = bpf_attachd(d, ifname, -1); 1367 reset_d(d); 1368 d->bd_inuse = 0; 1369 if (d->bd_waiting != 0) 1370 cv_signal(&d->bd_wait); 1371 mutex_exit(&d->bd_lock); 1372 return (error); 1373 } 1374 1375 mutex_enter(&d->bd_lock); 1376 d->bd_inuse = 0; 1377 if (d->bd_waiting != 0) 1378 cv_signal(&d->bd_wait); 1379 mutex_exit(&d->bd_lock); 1380 1381 /* 1382 * Try tickle the mac layer into attaching the device... 1383 */ 1384 return (bpf_provider_tickle(ifname, d->bd_zone)); 1385 } 1386 1387 /* 1388 * Copy the interface name to the ifreq. 1389 */ 1390 static int 1391 bpf_ifname(struct bpf_d *d, char *buffer, int bufsize) 1392 { 1393 1394 mutex_enter(&d->bd_lock); 1395 if (d->bd_bif == 0) { 1396 mutex_exit(&d->bd_lock); 1397 return (EINVAL); 1398 } 1399 1400 (void) strlcpy(buffer, d->bd_ifname, bufsize); 1401 mutex_exit(&d->bd_lock); 1402 1403 return (0); 1404 } 1405 1406 /* ARGSUSED */ 1407 int 1408 bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp, 1409 struct pollhead **phpp) 1410 { 1411 struct bpf_d *d = bpf_dev_get(getminor(dev)); 1412 1413 /* 1414 * Until this driver is modified to issue proper pollwakeup() calls on 1415 * its pollhead, edge-triggered polling is not allowed. 1416 */ 1417 if (events & POLLET) { 1418 return (EPERM); 1419 } 1420 1421 if (events & (POLLIN | POLLRDNORM)) { 1422 /* 1423 * An imitation of the FIONREAD ioctl code. 1424 */ 1425 mutex_enter(&d->bd_lock); 1426 if (d->bd_hlen != 0 || 1427 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1428 d->bd_slen != 0)) { 1429 *reventsp |= events & (POLLIN | POLLRDNORM); 1430 } else { 1431 /* 1432 * Until the bpf driver has been updated to include 1433 * adequate pollwakeup() logic, no pollhead will be 1434 * emitted here, preventing the resource from being 1435 * cached by poll()/devpoll/epoll. 1436 */ 1437 *reventsp = 0; 1438 /* Start the read timeout if necessary */ 1439 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1440 bpf_clear_timeout(d); 1441 /* 1442 * Only allow the timeout to be set once. 1443 */ 1444 if (d->bd_callout == 0) 1445 d->bd_callout = timeout(bpf_timed_out, 1446 d, d->bd_rtout); 1447 d->bd_state = BPF_WAITING; 1448 } 1449 } 1450 mutex_exit(&d->bd_lock); 1451 } 1452 1453 return (0); 1454 } 1455 1456 /* 1457 * Copy data from an mblk_t chain into a buffer. This works for ipnet 1458 * because the dl_ipnetinfo_t is placed in an mblk_t that leads the 1459 * packet itself. 1460 */ 1461 static void * 1462 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1463 { 1464 const mblk_t *m; 1465 uint_t count; 1466 uchar_t *dst; 1467 1468 m = src_arg; 1469 dst = dst_arg; 1470 while (len > 0) { 1471 if (m == NULL) 1472 panic("bpf_mcpy"); 1473 count = (uint_t)min(M_LEN(m), len); 1474 (void) memcpy(dst, mtod(m, const void *), count); 1475 m = m->b_cont; 1476 dst += count; 1477 len -= count; 1478 } 1479 return (dst_arg); 1480 } 1481 1482 /* 1483 * Dispatch a packet to all the listeners on interface bp. 1484 * 1485 * marg pointer to the packet, either a data buffer or an mbuf chain 1486 * buflen buffer length, if marg is a data buffer 1487 * cpfn a function that can copy marg into the listener's buffer 1488 * pktlen length of the packet 1489 * issent boolean indicating whether the packet was sent or receive 1490 */ 1491 static inline void 1492 bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen, 1493 uint_t buflen, boolean_t issent) 1494 { 1495 struct timeval tv; 1496 uint_t slen; 1497 1498 if (!d->bd_seesent && issent) 1499 return; 1500 1501 /* 1502 * Accuracy of the packet counters in BPF is vital so it 1503 * is important to protect even the outer ones. 1504 */ 1505 mutex_enter(&d->bd_lock); 1506 slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 1507 DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif, 1508 struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen); 1509 d->bd_rcount++; 1510 ks_stats.kp_receive.value.ui64++; 1511 if (slen != 0) { 1512 uniqtime(&tv); 1513 catchpacket(d, marg, pktlen, slen, cpfn, &tv); 1514 } 1515 mutex_exit(&d->bd_lock); 1516 } 1517 1518 /* 1519 * Incoming linkage from device drivers. 1520 */ 1521 /* ARGSUSED */ 1522 void 1523 bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent) 1524 { 1525 cp_fn_t cpfn; 1526 struct bpf_d *d = arg; 1527 uint_t pktlen, buflen; 1528 void *marg; 1529 1530 pktlen = msgdsize(m); 1531 1532 if (pktlen == M_LEN(m)) { 1533 cpfn = (cp_fn_t)memcpy; 1534 marg = mtod(m, void *); 1535 buflen = pktlen; 1536 } else { 1537 cpfn = bpf_mcpy; 1538 marg = m; 1539 buflen = 0; 1540 } 1541 1542 bpf_deliver(d, cpfn, marg, pktlen, buflen, issent); 1543 } 1544 1545 /* 1546 * Incoming linkage from ipnet. 1547 * In ipnet, there is only one event, NH_OBSERVE, that delivers packets 1548 * from all network interfaces. Thus the tap function needs to apply a 1549 * filter using the interface index/id to immitate snoop'ing on just the 1550 * specified interface. 1551 */ 1552 /* ARGSUSED */ 1553 void 1554 bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length) 1555 { 1556 hook_pkt_observe_t *hdr; 1557 struct bpf_d *d = arg; 1558 1559 hdr = (hook_pkt_observe_t *)m->b_rptr; 1560 if (ntohl(hdr->hpo_ifindex) != d->bd_linkid) 1561 return; 1562 bpf_deliver(d, bpf_mcpy, m, length, 0, issent); 1563 1564 } 1565 1566 /* 1567 * Move the packet data from interface memory (pkt) into the 1568 * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 1569 * otherwise 0. "copy" is the routine called to do the actual data 1570 * transfer. memcpy is passed in to copy contiguous chunks, while 1571 * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1572 * pkt is really an mbuf. 1573 */ 1574 static void 1575 catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen, 1576 cp_fn_t cpfn, struct timeval *tv) 1577 { 1578 struct bpf_hdr *hp; 1579 int totlen, curlen; 1580 int hdrlen = d->bd_hdrlen; 1581 int do_wakeup = 0; 1582 1583 ++d->bd_ccount; 1584 ks_stats.kp_capture.value.ui64++; 1585 /* 1586 * Figure out how many bytes to move. If the packet is 1587 * greater or equal to the snapshot length, transfer that 1588 * much. Otherwise, transfer the whole packet (unless 1589 * we hit the buffer size limit). 1590 */ 1591 totlen = hdrlen + min(snaplen, pktlen); 1592 if (totlen > d->bd_bufsize) 1593 totlen = d->bd_bufsize; 1594 1595 /* 1596 * Round up the end of the previous packet to the next longword. 1597 */ 1598 curlen = BPF_WORDALIGN(d->bd_slen); 1599 if (curlen + totlen > d->bd_bufsize) { 1600 /* 1601 * This packet will overflow the storage buffer. 1602 * Rotate the buffers if we can, then wakeup any 1603 * pending reads. 1604 */ 1605 if (d->bd_fbuf == 0) { 1606 /* 1607 * We haven't completed the previous read yet, 1608 * so drop the packet. 1609 */ 1610 ++d->bd_dcount; 1611 ks_stats.kp_dropped.value.ui64++; 1612 return; 1613 } 1614 ROTATE_BUFFERS(d); 1615 do_wakeup = 1; 1616 curlen = 0; 1617 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1618 /* 1619 * Immediate mode is set, or the read timeout has 1620 * already expired during a select call. A packet 1621 * arrived, so the reader should be woken up. 1622 */ 1623 do_wakeup = 1; 1624 } 1625 1626 /* 1627 * Append the bpf header to the existing buffer before we add 1628 * on the actual packet data. 1629 */ 1630 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 1631 hp->bh_tstamp.tv_sec = tv->tv_sec; 1632 hp->bh_tstamp.tv_usec = tv->tv_usec; 1633 hp->bh_datalen = pktlen; 1634 hp->bh_hdrlen = (uint16_t)hdrlen; 1635 /* 1636 * Copy the packet data into the store buffer and update its length. 1637 */ 1638 (*cpfn)((uchar_t *)hp + hdrlen, pkt, 1639 (hp->bh_caplen = totlen - hdrlen)); 1640 d->bd_slen = curlen + totlen; 1641 1642 /* 1643 * Call bpf_wakeup after bd_slen has been updated. 1644 */ 1645 if (do_wakeup) 1646 bpf_wakeup(d); 1647 } 1648 1649 /* 1650 * Initialize all nonzero fields of a descriptor. 1651 */ 1652 static int 1653 bpf_allocbufs(struct bpf_d *d) 1654 { 1655 1656 d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 1657 if (!d->bd_fbuf) 1658 return (ENOBUFS); 1659 d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 1660 if (!d->bd_sbuf) { 1661 kmem_free(d->bd_fbuf, d->bd_bufsize); 1662 return (ENOBUFS); 1663 } 1664 d->bd_slen = 0; 1665 d->bd_hlen = 0; 1666 return (0); 1667 } 1668 1669 /* 1670 * Free buffers currently in use by a descriptor. 1671 * Called on close. 1672 */ 1673 static void 1674 bpf_freed(struct bpf_d *d) 1675 { 1676 /* 1677 * At this point the descriptor has been detached from its 1678 * interface and it yet hasn't been marked free. 1679 */ 1680 if (d->bd_sbuf != 0) { 1681 kmem_free(d->bd_sbuf, d->bd_bufsize); 1682 if (d->bd_hbuf != 0) 1683 kmem_free(d->bd_hbuf, d->bd_bufsize); 1684 if (d->bd_fbuf != 0) 1685 kmem_free(d->bd_fbuf, d->bd_bufsize); 1686 } 1687 if (d->bd_filter) 1688 kmem_free(d->bd_filter, d->bd_filter_size); 1689 } 1690 1691 /* 1692 * Get a list of available data link type of the interface. 1693 */ 1694 static int 1695 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp) 1696 { 1697 bpf_provider_list_t *bp; 1698 bpf_provider_t *bpr; 1699 zoneid_t zoneid; 1700 uintptr_t mcip; 1701 uint_t nicdlt; 1702 uintptr_t mh; 1703 int error; 1704 int n; 1705 1706 n = 0; 1707 mh = 0; 1708 mcip = 0; 1709 error = 0; 1710 mutex_enter(&d->bd_lock); 1711 LIST_FOREACH(bp, &bpf_providers, bpl_next) { 1712 bpr = bp->bpl_what; 1713 error = MBPF_OPEN(bpr, d->bd_ifname, &mh, d->bd_zone); 1714 if (error != 0) 1715 goto next; 1716 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 1717 if (error != 0) 1718 goto next; 1719 error = MBPF_GET_ZONE(bpr, mh, &zoneid); 1720 if (error != 0) 1721 goto next; 1722 if (d->bd_zone != GLOBAL_ZONEID && 1723 d->bd_zone != zoneid) 1724 goto next; 1725 error = MBPF_GET_DLT(bpr, mh, &nicdlt); 1726 if (error != 0) 1727 goto next; 1728 nicdlt = bpf_dl_to_dlt(nicdlt); 1729 if (listp->bfl_list != NULL) { 1730 if (n >= listp->bfl_len) { 1731 MBPF_CLIENT_CLOSE(bpr, mcip); 1732 MBPF_CLOSE(bpr, mh); 1733 break; 1734 } 1735 /* 1736 * Bumping of bd_inuse ensures the structure does not 1737 * disappear while the copyout runs and allows the for 1738 * loop to be continued. 1739 */ 1740 d->bd_inuse++; 1741 mutex_exit(&d->bd_lock); 1742 if (copyout(&nicdlt, 1743 listp->bfl_list + n, sizeof (uint_t)) != 0) 1744 error = EFAULT; 1745 mutex_enter(&d->bd_lock); 1746 if (error != 0) 1747 break; 1748 d->bd_inuse--; 1749 } 1750 n++; 1751 next: 1752 if (mcip != 0) { 1753 MBPF_CLIENT_CLOSE(bpr, mcip); 1754 mcip = 0; 1755 } 1756 if (mh != 0) { 1757 MBPF_CLOSE(bpr, mh); 1758 mh = 0; 1759 } 1760 } 1761 mutex_exit(&d->bd_lock); 1762 1763 /* 1764 * It is quite possible that one or more provider to BPF may not 1765 * know about a link name whlist others do. In that case, so long 1766 * as we have one success, do not declare an error unless it was 1767 * an EFAULT as this indicates a problem that needs to be reported. 1768 */ 1769 if ((error != EFAULT) && (n > 0)) 1770 error = 0; 1771 1772 listp->bfl_len = n; 1773 return (error); 1774 } 1775 1776 /* 1777 * Set the data link type of a BPF instance. 1778 */ 1779 static int 1780 bpf_setdlt(struct bpf_d *d, void *addr) 1781 { 1782 char ifname[LIFNAMSIZ+1]; 1783 zoneid_t niczone; 1784 int error; 1785 int dlt; 1786 1787 if (copyin(addr, &dlt, sizeof (dlt)) != 0) 1788 return (EFAULT); 1789 1790 mutex_enter(&d->bd_lock); 1791 1792 if (d->bd_bif == 0) { /* Interface not set */ 1793 mutex_exit(&d->bd_lock); 1794 return (EINVAL); 1795 } 1796 if (d->bd_dlt == dlt) { /* NULL-op */ 1797 mutex_exit(&d->bd_lock); 1798 return (0); 1799 } 1800 1801 error = MBPF_GET_ZONE(&d->bd_mac, d->bd_bif, &niczone); 1802 if (error != 0) { 1803 mutex_exit(&d->bd_lock); 1804 return (error); 1805 } 1806 1807 /* 1808 * See the matrix at the top of the file for the permissions table 1809 * enforced by this driver. 1810 */ 1811 if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) && 1812 (niczone != d->bd_zone)) { 1813 mutex_exit(&d->bd_lock); 1814 return (EINVAL); 1815 } 1816 1817 (void) strlcpy(ifname, d->bd_ifname, sizeof (ifname)); 1818 d->bd_inuse = -1; 1819 bpf_detachd(d); 1820 error = bpf_attachd(d, ifname, dlt); 1821 reset_d(d); 1822 d->bd_inuse = 0; 1823 1824 mutex_exit(&d->bd_lock); 1825 return (error); 1826 } 1827 1828 /* 1829 * bpf_clear_timeout is called with the bd_lock mutex held, providing it 1830 * with the necessary protection to retrieve and modify bd_callout but it 1831 * does not hold the lock for its entire duration... see below... 1832 */ 1833 static void 1834 bpf_clear_timeout(struct bpf_d *d) 1835 { 1836 timeout_id_t tid = d->bd_callout; 1837 d->bd_callout = 0; 1838 d->bd_inuse++; 1839 1840 /* 1841 * If the timeout has fired and is waiting on bd_lock, we could 1842 * deadlock here because untimeout if bd_lock is held and would 1843 * wait for bpf_timed_out to finish and it never would. 1844 */ 1845 if (tid != 0) { 1846 mutex_exit(&d->bd_lock); 1847 (void) untimeout(tid); 1848 mutex_enter(&d->bd_lock); 1849 } 1850 1851 d->bd_inuse--; 1852 } 1853 1854 /* 1855 * As a cloning device driver, BPF needs to keep track of which device 1856 * numbers are in use and which ones are not. A hash table, indexed by 1857 * the minor device number, is used to store the pointers to the 1858 * individual descriptors that are allocated in bpfopen(). 1859 * The functions below present the interface for that hash table to 1860 * the rest of the driver. 1861 */ 1862 static struct bpf_d * 1863 bpf_dev_find(minor_t minor) 1864 { 1865 struct bpf_d *d = NULL; 1866 1867 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 1868 (mod_hash_val_t *)&d); 1869 1870 return (d); 1871 } 1872 1873 static void 1874 bpf_dev_add(struct bpf_d *d) 1875 { 1876 (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 1877 (mod_hash_val_t)d); 1878 } 1879 1880 static void 1881 bpf_dev_remove(struct bpf_d *d) 1882 { 1883 struct bpf_d *stor; 1884 1885 (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 1886 (mod_hash_val_t *)&stor); 1887 ASSERT(stor == d); 1888 } 1889 1890 /* 1891 * bpf_def_get should only ever be called for a minor number that exists, 1892 * thus there should always be a pointer in the hash table that corresponds 1893 * to it. 1894 */ 1895 static struct bpf_d * 1896 bpf_dev_get(minor_t minor) 1897 { 1898 struct bpf_d *d = NULL; 1899 1900 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 1901 (mod_hash_val_t *)&d); 1902 ASSERT(d != NULL); 1903 1904 return (d); 1905 } 1906