1 /* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from the Stanford/CMU enet packet filter, 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 10 * Berkeley Laboratory. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95 37 * static char rcsid[] = 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 39 */ 40 /* 41 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 42 * Use is subject to license terms. 43 */ 44 45 /* 46 * The BPF implements the following access controls for zones attempting 47 * to read and write data. Writing of data requires that the net_rawaccess 48 * privilege is held whilst reading data requires either net_rawaccess or 49 * net_observerability. 50 * 51 * | Shared | Exclusive | Global 52 * -----------------------------+--------+------------+------------+ 53 * DLT_IPNET in local zone | Read | Read | Read | 54 * -----------------------------+--------+------------+------------+ 55 * Raw access to local zone NIC | None | Read/Write | Read/Write | 56 * -----------------------------+--------+------------+------------+ 57 * Raw access to all NICs | None | None | Read/Write | 58 * -----------------------------+--------+------------+------------+ 59 * 60 * The BPF driver is written as a cloning driver: each call to bpfopen() 61 * allocates a new minor number. This provides BPF with a 1:1 relationship 62 * between open's and close's. There is some amount of "descriptor state" 63 * that is kept per open. Pointers to this data are stored in a hash table 64 * (bpf_hash) that is index'd by the minor device number for each open file. 65 */ 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/ioctl.h> 70 #include <sys/queue.h> 71 #include <sys/filio.h> 72 #include <sys/policy.h> 73 #include <sys/cmn_err.h> 74 #include <sys/uio.h> 75 #include <sys/file.h> 76 #include <sys/sysmacros.h> 77 #include <sys/zone.h> 78 79 #include <sys/socket.h> 80 #include <sys/errno.h> 81 #include <sys/poll.h> 82 #include <sys/dlpi.h> 83 #include <sys/neti.h> 84 85 #include <net/if.h> 86 87 #include <net/bpf.h> 88 #include <net/bpfdesc.h> 89 #include <net/dlt.h> 90 91 #include <netinet/in.h> 92 #include <sys/mac.h> 93 #include <sys/mac_client.h> 94 #include <sys/mac_impl.h> 95 #include <sys/time_std_impl.h> 96 #include <sys/hook.h> 97 #include <sys/hook_event.h> 98 99 100 #define mtod(_v, _t) (_t)((_v)->b_rptr) 101 #define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr) 102 103 /* 104 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 105 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 106 */ 107 #define BPF_BUFSIZE (32 * 1024) 108 109 typedef void *(*cp_fn_t)(void *, const void *, size_t); 110 111 /* 112 * The default read buffer size, and limit for BIOCSBLEN. 113 */ 114 int bpf_bufsize = BPF_BUFSIZE; 115 int bpf_maxbufsize = (16 * 1024 * 1024); 116 static mod_hash_t *bpf_hash = NULL; 117 118 /* 119 * Use a mutex to avoid a race condition between gathering the stats/peers 120 * and opening/closing the device. 121 */ 122 static kcondvar_t bpf_dlt_waiter; 123 static kmutex_t bpf_mtx; 124 static bpf_kstats_t ks_stats; 125 static bpf_kstats_t bpf_kstats = { 126 { "readWait", KSTAT_DATA_UINT64 }, 127 { "writeOk", KSTAT_DATA_UINT64 }, 128 { "writeError", KSTAT_DATA_UINT64 }, 129 { "receive", KSTAT_DATA_UINT64 }, 130 { "captured", KSTAT_DATA_UINT64 }, 131 { "dropped", KSTAT_DATA_UINT64 }, 132 }; 133 static kstat_t *bpf_ksp; 134 135 /* 136 * bpf_list is a list of the BPF descriptors currently open 137 */ 138 LIST_HEAD(, bpf_d) bpf_list; 139 140 static int bpf_allocbufs(struct bpf_d *); 141 static void bpf_clear_timeout(struct bpf_d *); 142 static void bpf_deliver(struct bpf_d *, cp_fn_t, 143 void *, uint_t, uint_t, boolean_t); 144 static void bpf_freed(struct bpf_d *); 145 static int bpf_ifname(struct bpf_d *d, char *, int); 146 static void *bpf_mcpy(void *, const void *, size_t); 147 static int bpf_attachd(struct bpf_d *, const char *, int); 148 static void bpf_detachd(struct bpf_d *); 149 static int bpf_setif(struct bpf_d *, char *, int); 150 static void bpf_timed_out(void *); 151 static inline void 152 bpf_wakeup(struct bpf_d *); 153 static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t, 154 cp_fn_t, struct timeval *); 155 static void reset_d(struct bpf_d *); 156 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 157 static int bpf_setdlt(struct bpf_d *, void *); 158 static void bpf_dev_add(struct bpf_d *); 159 static struct bpf_d *bpf_dev_find(minor_t); 160 static struct bpf_d *bpf_dev_get(minor_t); 161 static void bpf_dev_remove(struct bpf_d *); 162 163 static int 164 bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp) 165 { 166 mblk_t *m; 167 int error; 168 int len; 169 int hlen; 170 int align; 171 172 /* 173 * Build a sockaddr based on the data link layer type. 174 * We do this at this level because the ethernet header 175 * is copied directly into the data field of the sockaddr. 176 * In the case of SLIP, there is no header and the packet 177 * is forwarded as is. 178 * Also, we are careful to leave room at the front of the mbuf 179 * for the link level header. 180 */ 181 switch (linktype) { 182 183 case DLT_EN10MB: 184 hlen = sizeof (struct ether_header); 185 break; 186 187 case DLT_FDDI: 188 hlen = 16; 189 break; 190 191 case DLT_NULL: 192 hlen = 0; 193 break; 194 195 case DLT_IPOIB: 196 hlen = 44; 197 break; 198 199 default: 200 return (EIO); 201 } 202 203 align = 4 - (hlen & 3); 204 205 len = uio->uio_resid; 206 /* 207 * If there aren't enough bytes for a link level header or the 208 * packet length exceeds the interface mtu, return an error. 209 */ 210 if (len < hlen || len - hlen > mtu) 211 return (EMSGSIZE); 212 213 m = allocb(len + align, BPRI_MED); 214 if (m == NULL) { 215 error = ENOBUFS; 216 goto bad; 217 } 218 219 /* Insure the data is properly aligned */ 220 if (align > 0) 221 m->b_rptr += align; 222 m->b_wptr = m->b_rptr + len; 223 224 error = uiomove(mtod(m, void *), len, UIO_WRITE, uio); 225 if (error) 226 goto bad; 227 *mp = m; 228 return (0); 229 230 bad: 231 if (m != NULL) 232 freemsg(m); 233 return (error); 234 } 235 236 237 /* 238 * Attach file to the bpf interface, i.e. make d listen on bp. 239 */ 240 static int 241 bpf_attachd(struct bpf_d *d, const char *ifname, int dlt) 242 { 243 bpf_provider_list_t *bp; 244 bpf_provider_t *bpr; 245 boolean_t zonematch; 246 zoneid_t niczone; 247 uintptr_t mcip; 248 zoneid_t zone; 249 uint_t nicdlt; 250 uintptr_t mh; 251 int hdrlen; 252 int error; 253 254 ASSERT(d->bd_bif == NULL); 255 ASSERT(d->bd_mcip == NULL); 256 zone = d->bd_zone; 257 zonematch = B_TRUE; 258 again: 259 mh = 0; 260 mcip = 0; 261 LIST_FOREACH(bp, &bpf_providers, bpl_next) { 262 bpr = bp->bpl_what; 263 error = MBPF_OPEN(bpr, ifname, &mh, zone); 264 if (error != 0) 265 goto next; 266 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 267 if (error != 0) 268 goto next; 269 error = MBPF_GET_DLT(bpr, mh, &nicdlt); 270 if (error != 0) 271 goto next; 272 273 nicdlt = bpf_dl_to_dlt(nicdlt); 274 if (dlt != -1 && dlt != nicdlt) { 275 error = ENOENT; 276 goto next; 277 } 278 279 error = MBPF_GET_ZONE(bpr, mh, &niczone); 280 if (error != 0) 281 goto next; 282 283 DTRACE_PROBE4(bpf__attach, struct bpf_provider_s *, bpr, 284 uintptr_t, mh, int, nicdlt, zoneid_t, niczone); 285 286 if (zonematch && niczone != zone) { 287 error = ENOENT; 288 goto next; 289 } 290 break; 291 next: 292 if (mcip != 0) { 293 MBPF_CLIENT_CLOSE(bpr, mcip); 294 mcip = 0; 295 } 296 if (mh != NULL) { 297 MBPF_CLOSE(bpr, mh); 298 mh = 0; 299 } 300 } 301 if (error != 0) { 302 if (zonematch && (zone == GLOBAL_ZONEID)) { 303 /* 304 * If we failed to do an exact match for the global 305 * zone using the global zoneid, try again in case 306 * the network interface is owned by a local zone. 307 */ 308 zonematch = B_FALSE; 309 goto again; 310 } 311 return (error); 312 } 313 314 d->bd_mac = *bpr; 315 d->bd_mcip = mcip; 316 d->bd_bif = mh; 317 d->bd_dlt = nicdlt; 318 hdrlen = bpf_dl_hdrsize(nicdlt); 319 d->bd_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 320 321 (void) strlcpy(d->bd_ifname, MBPF_CLIENT_NAME(&d->bd_mac, mcip), 322 sizeof (d->bd_ifname)); 323 324 (void) MBPF_GET_LINKID(&d->bd_mac, d->bd_ifname, &d->bd_linkid, 325 zone); 326 (void) MBPF_PROMISC_ADD(&d->bd_mac, d->bd_mcip, 0, d, 327 &d->bd_promisc_handle, d->bd_promisc_flags); 328 return (0); 329 } 330 331 /* 332 * Detach a file from its interface. 333 */ 334 static void 335 bpf_detachd(struct bpf_d *d) 336 { 337 uintptr_t mph; 338 uintptr_t mch; 339 uintptr_t mh; 340 341 ASSERT(d->bd_inuse == -1); 342 mch = d->bd_mcip; 343 d->bd_mcip = 0; 344 mh = d->bd_bif; 345 d->bd_bif = 0; 346 347 /* 348 * Check if this descriptor had requested promiscuous mode. 349 * If so, turn it off. There's no need to take any action 350 * here, that is done when MBPF_PROMISC_REMOVE is used; 351 * bd_promisc is just a local flag to stop promiscuous mode 352 * from being set more than once. 353 */ 354 if (d->bd_promisc) 355 d->bd_promisc = 0; 356 357 /* 358 * Take device out of "promiscuous" mode. Since we were able to 359 * enter "promiscuous" mode, we should be able to turn it off. 360 * Note, this field stores a pointer used to support both 361 * promiscuous and non-promiscuous callbacks for packets. 362 */ 363 mph = d->bd_promisc_handle; 364 d->bd_promisc_handle = 0; 365 366 /* 367 * The lock has to be dropped here because mac_promisc_remove may 368 * need to wait for mac_promisc_dispatch, which has called into 369 * bpf and catchpacket is waiting for bd_lock... 370 * i.e mac_promisc_remove() needs to be called with none of the 371 * locks held that are part of the bpf_mtap() call path. 372 */ 373 mutex_exit(&d->bd_lock); 374 if (mph != 0) 375 MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 376 377 if (mch != 0) 378 MBPF_CLIENT_CLOSE(&d->bd_mac, mch); 379 380 if (mh != 0) 381 MBPF_CLOSE(&d->bd_mac, mh); 382 383 /* 384 * Because this function is called with bd_lock held, so it must 385 * exit with it held. 386 */ 387 mutex_enter(&d->bd_lock); 388 *d->bd_ifname = '\0'; 389 (void) memset(&d->bd_mac, 0, sizeof (d->bd_mac)); 390 } 391 392 393 /* 394 * bpfilterattach() is called at load time. 395 */ 396 int 397 bpfilterattach(void) 398 { 399 400 bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31, 401 mod_hash_null_keydtor); 402 if (bpf_hash == NULL) 403 return (ENOMEM); 404 405 (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats)); 406 407 bpf_ksp = kstat_create("bpf", 0, "global", "misc", 408 KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t), 409 KSTAT_FLAG_VIRTUAL); 410 if (bpf_ksp != NULL) { 411 bpf_ksp->ks_data = &ks_stats; 412 kstat_install(bpf_ksp); 413 } else { 414 mod_hash_destroy_idhash(bpf_hash); 415 bpf_hash = NULL; 416 return (EEXIST); 417 } 418 419 cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL); 420 mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL); 421 422 LIST_INIT(&bpf_list); 423 424 return (0); 425 } 426 427 428 /* 429 * bpfilterdetach() is called at unload time. 430 */ 431 int 432 bpfilterdetach(void) 433 { 434 435 if (bpf_ksp != NULL) { 436 kstat_delete(bpf_ksp); 437 bpf_ksp = NULL; 438 } 439 440 mod_hash_destroy_idhash(bpf_hash); 441 bpf_hash = NULL; 442 443 cv_destroy(&bpf_dlt_waiter); 444 mutex_destroy(&bpf_mtx); 445 446 return (0); 447 } 448 449 /* 450 * Open ethernet device. Clones. 451 */ 452 /* ARGSUSED */ 453 int 454 bpfopen(dev_t *devp, int flag, int mode, cred_t *cred) 455 { 456 struct bpf_d *d; 457 uint_t dmin; 458 459 /* 460 * The security policy described at the top of this file is 461 * enforced here. 462 */ 463 if ((flag & FWRITE) != 0) { 464 if (secpolicy_net_rawaccess(cred) != 0) 465 return (EACCES); 466 } 467 468 if ((flag & FREAD) != 0) { 469 if ((secpolicy_net_observability(cred) != 0) && 470 (secpolicy_net_rawaccess(cred) != 0)) 471 return (EACCES); 472 } 473 474 if ((flag & (FWRITE|FREAD)) == 0) 475 return (ENXIO); 476 477 /* 478 * A structure is allocated per open file in BPF to store settings 479 * such as buffer capture size, provide private buffers, etc. 480 */ 481 d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP); 482 d->bd_bufsize = bpf_bufsize; 483 d->bd_fmode = flag; 484 d->bd_zone = crgetzoneid(cred); 485 d->bd_seesent = 1; 486 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS| 487 MAC_PROMISC_FLAGS_NO_COPY; 488 mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL); 489 cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL); 490 491 mutex_enter(&bpf_mtx); 492 /* 493 * Find an unused minor number. Obviously this is an O(n) algorithm 494 * and doesn't scale particularly well, so if there are large numbers 495 * of open file descriptors happening in real use, this design may 496 * need to be revisited. 497 */ 498 for (dmin = 0; dmin < L_MAXMIN; dmin++) 499 if (bpf_dev_find(dmin) == NULL) 500 break; 501 if (dmin == L_MAXMIN) { 502 mutex_exit(&bpf_mtx); 503 kmem_free(d, sizeof (*d)); 504 return (ENXIO); 505 } 506 d->bd_dev = dmin; 507 LIST_INSERT_HEAD(&bpf_list, d, bd_list); 508 bpf_dev_add(d); 509 mutex_exit(&bpf_mtx); 510 511 *devp = makedevice(getmajor(*devp), dmin); 512 513 return (0); 514 } 515 516 /* 517 * Close the descriptor by detaching it from its interface, 518 * deallocating its buffers, and marking it free. 519 * 520 * Because we only allow a device to be opened once, there is always a 521 * 1 to 1 relationship between opens and closes supporting this function. 522 */ 523 /* ARGSUSED */ 524 int 525 bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p) 526 { 527 struct bpf_d *d = bpf_dev_get(getminor(dev)); 528 529 mutex_enter(&d->bd_lock); 530 531 while (d->bd_inuse != 0) { 532 d->bd_waiting++; 533 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 534 d->bd_waiting--; 535 mutex_exit(&d->bd_lock); 536 return (EINTR); 537 } 538 d->bd_waiting--; 539 } 540 541 d->bd_inuse = -1; 542 if (d->bd_state == BPF_WAITING) 543 bpf_clear_timeout(d); 544 d->bd_state = BPF_IDLE; 545 if (d->bd_bif) 546 bpf_detachd(d); 547 mutex_exit(&d->bd_lock); 548 549 mutex_enter(&bpf_mtx); 550 LIST_REMOVE(d, bd_list); 551 bpf_dev_remove(d); 552 mutex_exit(&bpf_mtx); 553 554 mutex_enter(&d->bd_lock); 555 mutex_destroy(&d->bd_lock); 556 cv_destroy(&d->bd_wait); 557 558 bpf_freed(d); 559 kmem_free(d, sizeof (*d)); 560 561 return (0); 562 } 563 564 /* 565 * Rotate the packet buffers in descriptor d. Move the store buffer 566 * into the hold slot, and the free buffer into the store slot. 567 * Zero the length of the new store buffer. 568 */ 569 #define ROTATE_BUFFERS(d) \ 570 (d)->bd_hbuf = (d)->bd_sbuf; \ 571 (d)->bd_hlen = (d)->bd_slen; \ 572 (d)->bd_sbuf = (d)->bd_fbuf; \ 573 (d)->bd_slen = 0; \ 574 (d)->bd_fbuf = 0; 575 /* 576 * bpfread - read next chunk of packets from buffers 577 */ 578 /* ARGSUSED */ 579 int 580 bpfread(dev_t dev, struct uio *uio, cred_t *cred) 581 { 582 struct bpf_d *d = bpf_dev_get(getminor(dev)); 583 int timed_out; 584 ulong_t delay; 585 int error; 586 587 if ((d->bd_fmode & FREAD) == 0) 588 return (EBADF); 589 590 /* 591 * Restrict application to use a buffer the same size as 592 * the kernel buffers. 593 */ 594 if (uio->uio_resid != d->bd_bufsize) 595 return (EINVAL); 596 597 mutex_enter(&d->bd_lock); 598 if (d->bd_state == BPF_WAITING) 599 bpf_clear_timeout(d); 600 timed_out = (d->bd_state == BPF_TIMED_OUT); 601 d->bd_state = BPF_IDLE; 602 /* 603 * If the hold buffer is empty, then do a timed sleep, which 604 * ends when the timeout expires or when enough packets 605 * have arrived to fill the store buffer. 606 */ 607 while (d->bd_hbuf == 0) { 608 if (d->bd_nonblock) { 609 if (d->bd_slen == 0) { 610 mutex_exit(&d->bd_lock); 611 return (EWOULDBLOCK); 612 } 613 ROTATE_BUFFERS(d); 614 break; 615 } 616 617 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 618 /* 619 * A packet(s) either arrived since the previous 620 * read or arrived while we were asleep. 621 * Rotate the buffers and return what's here. 622 */ 623 ROTATE_BUFFERS(d); 624 break; 625 } 626 ks_stats.kp_read_wait.value.ui64++; 627 delay = ddi_get_lbolt() + d->bd_rtout; 628 error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay); 629 if (error == 0) { 630 mutex_exit(&d->bd_lock); 631 return (EINTR); 632 } 633 if (error == -1) { 634 /* 635 * On a timeout, return what's in the buffer, 636 * which may be nothing. If there is something 637 * in the store buffer, we can rotate the buffers. 638 */ 639 if (d->bd_hbuf) 640 /* 641 * We filled up the buffer in between 642 * getting the timeout and arriving 643 * here, so we don't need to rotate. 644 */ 645 break; 646 647 if (d->bd_slen == 0) { 648 mutex_exit(&d->bd_lock); 649 return (0); 650 } 651 ROTATE_BUFFERS(d); 652 } 653 } 654 /* 655 * At this point, we know we have something in the hold slot. 656 */ 657 mutex_exit(&d->bd_lock); 658 659 /* 660 * Move data from hold buffer into user space. 661 * We know the entire buffer is transferred since 662 * we checked above that the read buffer is bpf_bufsize bytes. 663 */ 664 error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); 665 666 mutex_enter(&d->bd_lock); 667 d->bd_fbuf = d->bd_hbuf; 668 d->bd_hbuf = 0; 669 d->bd_hlen = 0; 670 done: 671 mutex_exit(&d->bd_lock); 672 return (error); 673 } 674 675 676 /* 677 * If there are processes sleeping on this descriptor, wake them up. 678 * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver, 679 * so there is no code here grabbing it. 680 */ 681 static inline void 682 bpf_wakeup(struct bpf_d *d) 683 { 684 cv_signal(&d->bd_wait); 685 } 686 687 static void 688 bpf_timed_out(void *arg) 689 { 690 struct bpf_d *d = arg; 691 692 mutex_enter(&d->bd_lock); 693 if (d->bd_state == BPF_WAITING) { 694 d->bd_state = BPF_TIMED_OUT; 695 if (d->bd_slen != 0) 696 cv_signal(&d->bd_wait); 697 } 698 mutex_exit(&d->bd_lock); 699 } 700 701 702 /* ARGSUSED */ 703 int 704 bpfwrite(dev_t dev, struct uio *uio, cred_t *cred) 705 { 706 struct bpf_d *d = bpf_dev_get(getminor(dev)); 707 uintptr_t mch; 708 uint_t mtu; 709 mblk_t *m; 710 int error; 711 int dlt; 712 713 if ((d->bd_fmode & FWRITE) == 0) 714 return (EBADF); 715 716 mutex_enter(&d->bd_lock); 717 if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif == 0) { 718 mutex_exit(&d->bd_lock); 719 return (EINTR); 720 } 721 722 if (uio->uio_resid == 0) { 723 mutex_exit(&d->bd_lock); 724 return (0); 725 } 726 727 while (d->bd_inuse < 0) { 728 d->bd_waiting++; 729 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 730 d->bd_waiting--; 731 mutex_exit(&d->bd_lock); 732 return (EINTR); 733 } 734 d->bd_waiting--; 735 } 736 737 mutex_exit(&d->bd_lock); 738 739 dlt = d->bd_dlt; 740 mch = d->bd_mcip; 741 MBPF_SDU_GET(&d->bd_mac, d->bd_bif, &mtu); 742 d->bd_inuse++; 743 744 m = NULL; 745 if (dlt == DLT_IPNET) { 746 error = EIO; 747 goto done; 748 } 749 750 error = bpf_movein(uio, dlt, mtu, &m); 751 if (error) 752 goto done; 753 754 DTRACE_PROBE4(bpf__tx, struct bpf_d *, d, int, dlt, 755 uint_t, mtu, mblk_t *, m); 756 757 if (M_LEN(m) > mtu) { 758 error = EMSGSIZE; 759 goto done; 760 } 761 762 error = MBPF_TX(&d->bd_mac, mch, m); 763 /* 764 * The "tx" action here is required to consume the mblk_t. 765 */ 766 m = NULL; 767 768 done: 769 if (error == 0) 770 ks_stats.kp_write_ok.value.ui64++; 771 else 772 ks_stats.kp_write_error.value.ui64++; 773 if (m != NULL) 774 freemsg(m); 775 776 mutex_enter(&d->bd_lock); 777 d->bd_inuse--; 778 if ((d->bd_inuse == 0) && (d->bd_waiting != 0)) 779 cv_signal(&d->bd_wait); 780 mutex_exit(&d->bd_lock); 781 782 /* 783 * The driver frees the mbuf. 784 */ 785 return (error); 786 } 787 788 789 /* 790 * Reset a descriptor by flushing its packet buffer and clearing the 791 * receive and drop counts. Should be called at splnet. 792 */ 793 static void 794 reset_d(struct bpf_d *d) 795 { 796 if (d->bd_hbuf) { 797 /* Free the hold buffer. */ 798 d->bd_fbuf = d->bd_hbuf; 799 d->bd_hbuf = 0; 800 } 801 d->bd_slen = 0; 802 d->bd_hlen = 0; 803 d->bd_rcount = 0; 804 d->bd_dcount = 0; 805 d->bd_ccount = 0; 806 } 807 808 /* 809 * FIONREAD Check for read packet available. 810 * BIOCGBLEN Get buffer len [for read()]. 811 * BIOCSETF Set ethernet read filter. 812 * BIOCFLUSH Flush read packet buffer. 813 * BIOCPROMISC Put interface into promiscuous mode. 814 * BIOCGDLT Get link layer type. 815 * BIOCGETIF Get interface name. 816 * BIOCSETIF Set interface. 817 * BIOCSRTIMEOUT Set read timeout. 818 * BIOCGRTIMEOUT Get read timeout. 819 * BIOCGSTATS Get packet stats. 820 * BIOCIMMEDIATE Set immediate mode. 821 * BIOCVERSION Get filter language version. 822 * BIOCGHDRCMPLT Get "header already complete" flag. 823 * BIOCSHDRCMPLT Set "header already complete" flag. 824 */ 825 /* ARGSUSED */ 826 int 827 bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval) 828 { 829 struct bpf_d *d = bpf_dev_get(getminor(dev)); 830 struct bpf_program prog; 831 struct lifreq lifreq; 832 struct ifreq ifreq; 833 int error = 0; 834 uint_t size; 835 836 /* 837 * Refresh the PID associated with this bpf file. 838 */ 839 mutex_enter(&d->bd_lock); 840 if (d->bd_state == BPF_WAITING) 841 bpf_clear_timeout(d); 842 d->bd_state = BPF_IDLE; 843 mutex_exit(&d->bd_lock); 844 845 switch (cmd) { 846 847 default: 848 error = EINVAL; 849 break; 850 851 /* 852 * Check for read packet available. 853 */ 854 case FIONREAD: 855 { 856 int n; 857 858 mutex_enter(&d->bd_lock); 859 n = d->bd_slen; 860 if (d->bd_hbuf) 861 n += d->bd_hlen; 862 mutex_exit(&d->bd_lock); 863 864 *(int *)addr = n; 865 break; 866 } 867 868 /* 869 * Get buffer len [for read()]. 870 */ 871 case BIOCGBLEN: 872 error = copyout(&d->bd_bufsize, (void *)addr, 873 sizeof (d->bd_bufsize)); 874 break; 875 876 /* 877 * Set buffer length. 878 */ 879 case BIOCSBLEN: 880 if (copyin((void *)addr, &size, sizeof (size)) != 0) { 881 error = EFAULT; 882 break; 883 } 884 885 mutex_enter(&d->bd_lock); 886 if (d->bd_bif != 0) { 887 error = EINVAL; 888 } else { 889 if (size > bpf_maxbufsize) 890 size = bpf_maxbufsize; 891 else if (size < BPF_MINBUFSIZE) 892 size = BPF_MINBUFSIZE; 893 894 d->bd_bufsize = size; 895 } 896 mutex_exit(&d->bd_lock); 897 898 if (error == 0) 899 error = copyout(&size, (void *)addr, sizeof (size)); 900 break; 901 902 /* 903 * Set link layer read filter. 904 */ 905 case BIOCSETF: 906 if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) { 907 error = EFAULT; 908 break; 909 } 910 error = bpf_setf(d, &prog); 911 break; 912 913 /* 914 * Flush read packet buffer. 915 */ 916 case BIOCFLUSH: 917 mutex_enter(&d->bd_lock); 918 reset_d(d); 919 mutex_exit(&d->bd_lock); 920 break; 921 922 /* 923 * Put interface into promiscuous mode. 924 * This is a one-way ioctl, it is not used to turn promiscuous 925 * mode off. 926 */ 927 case BIOCPROMISC: 928 if (d->bd_bif == 0) { 929 /* 930 * No interface attached yet. 931 */ 932 error = EINVAL; 933 break; 934 } 935 mutex_enter(&d->bd_lock); 936 if (d->bd_promisc == 0) { 937 938 if (d->bd_promisc_handle) { 939 uintptr_t mph; 940 941 mph = d->bd_promisc_handle; 942 d->bd_promisc_handle = 0; 943 944 mutex_exit(&d->bd_lock); 945 MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 946 mutex_enter(&d->bd_lock); 947 } 948 949 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY; 950 error = MBPF_PROMISC_ADD(&d->bd_mac, 951 d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d, 952 &d->bd_promisc_handle, d->bd_promisc_flags); 953 if (error == 0) 954 d->bd_promisc = 1; 955 } 956 mutex_exit(&d->bd_lock); 957 break; 958 959 /* 960 * Get device parameters. 961 */ 962 case BIOCGDLT: 963 if (d->bd_bif == 0) 964 error = EINVAL; 965 else 966 error = copyout(&d->bd_dlt, (void *)addr, 967 sizeof (d->bd_dlt)); 968 break; 969 970 /* 971 * Get a list of supported device parameters. 972 */ 973 case BIOCGDLTLIST: 974 if (d->bd_bif == 0) { 975 error = EINVAL; 976 } else { 977 struct bpf_dltlist list; 978 979 if (copyin((void *)addr, &list, sizeof (list)) != 0) { 980 error = EFAULT; 981 break; 982 } 983 error = bpf_getdltlist(d, &list); 984 if ((error == 0) && 985 copyout(&list, (void *)addr, sizeof (list)) != 0) 986 error = EFAULT; 987 } 988 break; 989 990 /* 991 * Set device parameters. 992 */ 993 case BIOCSDLT: 994 error = bpf_setdlt(d, (void *)addr); 995 break; 996 997 /* 998 * Get interface name. 999 */ 1000 case BIOCGETIF: 1001 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 1002 error = EFAULT; 1003 break; 1004 } 1005 error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 1006 if ((error == 0) && 1007 copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) { 1008 error = EFAULT; 1009 break; 1010 } 1011 break; 1012 1013 /* 1014 * Set interface. 1015 */ 1016 case BIOCSETIF: 1017 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 1018 error = EFAULT; 1019 break; 1020 } 1021 error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 1022 break; 1023 1024 /* 1025 * Get interface name. 1026 */ 1027 case BIOCGETLIF: 1028 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 1029 error = EFAULT; 1030 break; 1031 } 1032 error = bpf_ifname(d, lifreq.lifr_name, 1033 sizeof (lifreq.lifr_name)); 1034 if ((error == 0) && 1035 copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) { 1036 error = EFAULT; 1037 break; 1038 } 1039 break; 1040 1041 /* 1042 * Set interface. 1043 */ 1044 case BIOCSETLIF: 1045 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 1046 error = EFAULT; 1047 break; 1048 } 1049 error = bpf_setif(d, lifreq.lifr_name, 1050 sizeof (lifreq.lifr_name)); 1051 break; 1052 1053 #ifdef _SYSCALL32_IMPL 1054 /* 1055 * Set read timeout. 1056 */ 1057 case BIOCSRTIMEOUT32: 1058 { 1059 struct timeval32 tv; 1060 1061 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 1062 error = EFAULT; 1063 break; 1064 } 1065 1066 /* Convert the timeout in microseconds to ticks */ 1067 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 1068 tv.tv_usec); 1069 if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 1070 d->bd_rtout = 1; 1071 break; 1072 } 1073 1074 /* 1075 * Get read timeout. 1076 */ 1077 case BIOCGRTIMEOUT32: 1078 { 1079 struct timeval32 tv; 1080 clock_t ticks; 1081 1082 ticks = drv_hztousec(d->bd_rtout); 1083 tv.tv_sec = ticks / 1000000; 1084 tv.tv_usec = ticks - (tv.tv_sec * 1000000); 1085 error = copyout(&tv, (void *)addr, sizeof (tv)); 1086 break; 1087 } 1088 1089 /* 1090 * Get a list of supported device parameters. 1091 */ 1092 case BIOCGDLTLIST32: 1093 if (d->bd_bif == 0) { 1094 error = EINVAL; 1095 } else { 1096 struct bpf_dltlist32 lst32; 1097 struct bpf_dltlist list; 1098 1099 if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) { 1100 error = EFAULT; 1101 break; 1102 } 1103 1104 list.bfl_len = lst32.bfl_len; 1105 list.bfl_list = (void *)(uint64_t)lst32.bfl_list; 1106 error = bpf_getdltlist(d, &list); 1107 if (error == 0) { 1108 lst32.bfl_len = list.bfl_len; 1109 1110 if (copyout(&lst32, (void *)addr, 1111 sizeof (lst32)) != 0) 1112 error = EFAULT; 1113 } 1114 } 1115 break; 1116 1117 /* 1118 * Set link layer read filter. 1119 */ 1120 case BIOCSETF32: { 1121 struct bpf_program32 prog32; 1122 1123 if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) { 1124 error = EFAULT; 1125 break; 1126 } 1127 prog.bf_len = prog32.bf_len; 1128 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns; 1129 error = bpf_setf(d, &prog); 1130 break; 1131 } 1132 #endif 1133 1134 /* 1135 * Set read timeout. 1136 */ 1137 case BIOCSRTIMEOUT: 1138 { 1139 struct timeval tv; 1140 1141 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 1142 error = EFAULT; 1143 break; 1144 } 1145 1146 /* Convert the timeout in microseconds to ticks */ 1147 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 1148 tv.tv_usec); 1149 if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 1150 d->bd_rtout = 1; 1151 break; 1152 } 1153 1154 /* 1155 * Get read timeout. 1156 */ 1157 case BIOCGRTIMEOUT: 1158 { 1159 struct timeval tv; 1160 clock_t ticks; 1161 1162 ticks = drv_hztousec(d->bd_rtout); 1163 tv.tv_sec = ticks / 1000000; 1164 tv.tv_usec = ticks - (tv.tv_sec * 1000000); 1165 if (copyout(&tv, (void *)addr, sizeof (tv)) != 0) 1166 error = EFAULT; 1167 break; 1168 } 1169 1170 /* 1171 * Get packet stats. 1172 */ 1173 case BIOCGSTATS: 1174 { 1175 struct bpf_stat bs; 1176 1177 bs.bs_recv = d->bd_rcount; 1178 bs.bs_drop = d->bd_dcount; 1179 bs.bs_capt = d->bd_ccount; 1180 if (copyout(&bs, (void *)addr, sizeof (bs)) != 0) 1181 error = EFAULT; 1182 break; 1183 } 1184 1185 /* 1186 * Set immediate mode. 1187 */ 1188 case BIOCIMMEDIATE: 1189 if (copyin((void *)addr, &d->bd_immediate, 1190 sizeof (d->bd_immediate)) != 0) 1191 error = EFAULT; 1192 break; 1193 1194 case BIOCVERSION: 1195 { 1196 struct bpf_version bv; 1197 1198 bv.bv_major = BPF_MAJOR_VERSION; 1199 bv.bv_minor = BPF_MINOR_VERSION; 1200 if (copyout(&bv, (void *)addr, sizeof (bv)) != 0) 1201 error = EFAULT; 1202 break; 1203 } 1204 1205 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1206 if (copyout(&d->bd_hdrcmplt, (void *)addr, 1207 sizeof (d->bd_hdrcmplt)) != 0) 1208 error = EFAULT; 1209 break; 1210 1211 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1212 if (copyin((void *)addr, &d->bd_hdrcmplt, 1213 sizeof (d->bd_hdrcmplt)) != 0) 1214 error = EFAULT; 1215 break; 1216 1217 /* 1218 * Get "see sent packets" flag 1219 */ 1220 case BIOCGSEESENT: 1221 if (copyout(&d->bd_seesent, (void *)addr, 1222 sizeof (d->bd_seesent)) != 0) 1223 error = EFAULT; 1224 break; 1225 1226 /* 1227 * Set "see sent" packets flag 1228 */ 1229 case BIOCSSEESENT: 1230 if (copyin((void *)addr, &d->bd_seesent, 1231 sizeof (d->bd_seesent)) != 0) 1232 error = EFAULT; 1233 break; 1234 1235 case FIONBIO: /* Non-blocking I/O */ 1236 if (copyin((void *)addr, &d->bd_nonblock, 1237 sizeof (d->bd_nonblock)) != 0) 1238 error = EFAULT; 1239 break; 1240 } 1241 return (error); 1242 } 1243 1244 /* 1245 * Set d's packet filter program to fp. If this file already has a filter, 1246 * free it and replace it. If the new filter is "empty" (has a 0 size), then 1247 * the result is to just remove and free the existing filter. 1248 * Returns EINVAL for bogus requests. 1249 */ 1250 int 1251 bpf_setf(struct bpf_d *d, struct bpf_program *fp) 1252 { 1253 struct bpf_insn *fcode, *old; 1254 uint_t flen, size; 1255 size_t oldsize; 1256 1257 if (fp->bf_insns == 0) { 1258 if (fp->bf_len != 0) 1259 return (EINVAL); 1260 mutex_enter(&d->bd_lock); 1261 old = d->bd_filter; 1262 oldsize = d->bd_filter_size; 1263 d->bd_filter = 0; 1264 d->bd_filter_size = 0; 1265 reset_d(d); 1266 mutex_exit(&d->bd_lock); 1267 if (old != 0) 1268 kmem_free(old, oldsize); 1269 return (0); 1270 } 1271 flen = fp->bf_len; 1272 if (flen > BPF_MAXINSNS) 1273 return (EINVAL); 1274 1275 size = flen * sizeof (*fp->bf_insns); 1276 fcode = kmem_alloc(size, KM_SLEEP); 1277 if (copyin(fp->bf_insns, fcode, size) != 0) 1278 return (EFAULT); 1279 1280 if (bpf_validate(fcode, (int)flen)) { 1281 mutex_enter(&d->bd_lock); 1282 old = d->bd_filter; 1283 oldsize = d->bd_filter_size; 1284 d->bd_filter = fcode; 1285 d->bd_filter_size = size; 1286 reset_d(d); 1287 mutex_exit(&d->bd_lock); 1288 if (old != 0) 1289 kmem_free(old, oldsize); 1290 1291 return (0); 1292 } 1293 kmem_free(fcode, size); 1294 return (EINVAL); 1295 } 1296 1297 /* 1298 * Detach a file from its current interface (if attached at all) and attach 1299 * to the interface indicated by the name stored in ifname. 1300 * Return an errno or 0. 1301 */ 1302 static int 1303 bpf_setif(struct bpf_d *d, char *ifname, int namesize) 1304 { 1305 int unit_seen; 1306 int error = 0; 1307 char *cp; 1308 int i; 1309 1310 /* 1311 * Make sure the provided name has a unit number, and default 1312 * it to '0' if not specified. 1313 * XXX This is ugly ... do this differently? 1314 */ 1315 unit_seen = 0; 1316 cp = ifname; 1317 cp[namesize - 1] = '\0'; /* sanity */ 1318 while (*cp++) 1319 if (*cp >= '0' && *cp <= '9') 1320 unit_seen = 1; 1321 if (!unit_seen) { 1322 /* Make sure to leave room for the '\0'. */ 1323 for (i = 0; i < (namesize - 1); ++i) { 1324 if ((ifname[i] >= 'a' && ifname[i] <= 'z') || 1325 (ifname[i] >= 'A' && ifname[i] <= 'Z')) 1326 continue; 1327 ifname[i] = '0'; 1328 } 1329 } 1330 1331 /* 1332 * Make sure that only one call to this function happens at a time 1333 * and that we're not interleaving a read/write 1334 */ 1335 mutex_enter(&d->bd_lock); 1336 while (d->bd_inuse != 0) { 1337 d->bd_waiting++; 1338 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 1339 d->bd_waiting--; 1340 mutex_exit(&d->bd_lock); 1341 return (EINTR); 1342 } 1343 d->bd_waiting--; 1344 } 1345 d->bd_inuse = -1; 1346 mutex_exit(&d->bd_lock); 1347 1348 if (d->bd_sbuf == 0) 1349 error = bpf_allocbufs(d); 1350 1351 if (error == 0) { 1352 mutex_enter(&d->bd_lock); 1353 if (d->bd_bif) 1354 /* 1355 * Detach if attached to something else. 1356 */ 1357 bpf_detachd(d); 1358 1359 error = bpf_attachd(d, ifname, -1); 1360 reset_d(d); 1361 d->bd_inuse = 0; 1362 if (d->bd_waiting != 0) 1363 cv_signal(&d->bd_wait); 1364 mutex_exit(&d->bd_lock); 1365 return (error); 1366 } 1367 1368 mutex_enter(&d->bd_lock); 1369 d->bd_inuse = 0; 1370 if (d->bd_waiting != 0) 1371 cv_signal(&d->bd_wait); 1372 mutex_exit(&d->bd_lock); 1373 1374 /* 1375 * Try tickle the mac layer into attaching the device... 1376 */ 1377 return (bpf_provider_tickle(ifname, d->bd_zone)); 1378 } 1379 1380 /* 1381 * Copy the interface name to the ifreq. 1382 */ 1383 static int 1384 bpf_ifname(struct bpf_d *d, char *buffer, int bufsize) 1385 { 1386 1387 mutex_enter(&d->bd_lock); 1388 if (d->bd_bif == NULL) { 1389 mutex_exit(&d->bd_lock); 1390 return (EINVAL); 1391 } 1392 1393 (void) strlcpy(buffer, d->bd_ifname, bufsize); 1394 mutex_exit(&d->bd_lock); 1395 1396 return (0); 1397 } 1398 1399 /* 1400 * Support for poll() system call 1401 * 1402 * Return true iff the specific operation will not block indefinitely - with 1403 * the assumption that it is safe to positively acknowledge a request for the 1404 * ability to write to the BPF device. 1405 * Otherwise, return false but make a note that a selnotify() must be done. 1406 */ 1407 int 1408 bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp, 1409 struct pollhead **phpp) 1410 { 1411 struct bpf_d *d = bpf_dev_get(getminor(dev)); 1412 1413 if (events & (POLLIN | POLLRDNORM)) { 1414 /* 1415 * An imitation of the FIONREAD ioctl code. 1416 */ 1417 mutex_enter(&d->bd_lock); 1418 if (d->bd_hlen != 0 || 1419 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 1420 d->bd_slen != 0)) { 1421 *reventsp |= events & (POLLIN | POLLRDNORM); 1422 } else { 1423 *reventsp = 0; 1424 if (!anyyet) 1425 *phpp = &d->bd_poll; 1426 /* Start the read timeout if necessary */ 1427 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 1428 bpf_clear_timeout(d); 1429 /* 1430 * Only allow the timeout to be set once. 1431 */ 1432 if (d->bd_callout == 0) 1433 d->bd_callout = timeout(bpf_timed_out, 1434 d, d->bd_rtout); 1435 d->bd_state = BPF_WAITING; 1436 } 1437 } 1438 mutex_exit(&d->bd_lock); 1439 } 1440 1441 return (0); 1442 } 1443 1444 /* 1445 * Copy data from an mblk_t chain into a buffer. This works for ipnet 1446 * because the dl_ipnetinfo_t is placed in an mblk_t that leads the 1447 * packet itself. 1448 */ 1449 static void * 1450 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 1451 { 1452 const mblk_t *m; 1453 uint_t count; 1454 uchar_t *dst; 1455 1456 m = src_arg; 1457 dst = dst_arg; 1458 while (len > 0) { 1459 if (m == NULL) 1460 panic("bpf_mcpy"); 1461 count = (uint_t)min(M_LEN(m), len); 1462 (void) memcpy(dst, mtod(m, const void *), count); 1463 m = m->b_cont; 1464 dst += count; 1465 len -= count; 1466 } 1467 return (dst_arg); 1468 } 1469 1470 /* 1471 * Dispatch a packet to all the listeners on interface bp. 1472 * 1473 * marg pointer to the packet, either a data buffer or an mbuf chain 1474 * buflen buffer length, if marg is a data buffer 1475 * cpfn a function that can copy marg into the listener's buffer 1476 * pktlen length of the packet 1477 * issent boolean indicating whether the packet was sent or receive 1478 */ 1479 static inline void 1480 bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen, 1481 uint_t buflen, boolean_t issent) 1482 { 1483 struct timeval tv; 1484 uint_t slen; 1485 1486 if (!d->bd_seesent && issent) 1487 return; 1488 1489 /* 1490 * Accuracy of the packet counters in BPF is vital so it 1491 * is important to protect even the outer ones. 1492 */ 1493 mutex_enter(&d->bd_lock); 1494 slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 1495 DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif, 1496 struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen); 1497 d->bd_rcount++; 1498 ks_stats.kp_receive.value.ui64++; 1499 if (slen != 0) { 1500 uniqtime(&tv); 1501 catchpacket(d, marg, pktlen, slen, cpfn, &tv); 1502 } 1503 mutex_exit(&d->bd_lock); 1504 } 1505 1506 /* 1507 * Incoming linkage from device drivers. 1508 */ 1509 /* ARGSUSED */ 1510 void 1511 bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent) 1512 { 1513 cp_fn_t cpfn; 1514 struct bpf_d *d = arg; 1515 uint_t pktlen, buflen; 1516 void *marg; 1517 1518 pktlen = msgdsize(m); 1519 1520 if (pktlen == M_LEN(m)) { 1521 cpfn = (cp_fn_t)memcpy; 1522 marg = mtod(m, void *); 1523 buflen = pktlen; 1524 } else { 1525 cpfn = bpf_mcpy; 1526 marg = m; 1527 buflen = 0; 1528 } 1529 1530 bpf_deliver(d, cpfn, marg, pktlen, buflen, issent); 1531 } 1532 1533 /* 1534 * Incoming linkage from ipnet. 1535 * In ipnet, there is only one event, NH_OBSERVE, that delivers packets 1536 * from all network interfaces. Thus the tap function needs to apply a 1537 * filter using the interface index/id to immitate snoop'ing on just the 1538 * specified interface. 1539 */ 1540 /* ARGSUSED */ 1541 void 1542 bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length) 1543 { 1544 hook_pkt_observe_t *hdr; 1545 struct bpf_d *d = arg; 1546 1547 hdr = (hook_pkt_observe_t *)m->b_rptr; 1548 if (ntohl(hdr->hpo_ifindex) != d->bd_linkid) 1549 return; 1550 bpf_deliver(d, bpf_mcpy, m, length, 0, issent); 1551 1552 } 1553 1554 /* 1555 * Move the packet data from interface memory (pkt) into the 1556 * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 1557 * otherwise 0. "copy" is the routine called to do the actual data 1558 * transfer. memcpy is passed in to copy contiguous chunks, while 1559 * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 1560 * pkt is really an mbuf. 1561 */ 1562 static void 1563 catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen, 1564 cp_fn_t cpfn, struct timeval *tv) 1565 { 1566 struct bpf_hdr *hp; 1567 int totlen, curlen; 1568 int hdrlen = d->bd_hdrlen; 1569 int do_wakeup = 0; 1570 1571 ++d->bd_ccount; 1572 ks_stats.kp_capture.value.ui64++; 1573 /* 1574 * Figure out how many bytes to move. If the packet is 1575 * greater or equal to the snapshot length, transfer that 1576 * much. Otherwise, transfer the whole packet (unless 1577 * we hit the buffer size limit). 1578 */ 1579 totlen = hdrlen + min(snaplen, pktlen); 1580 if (totlen > d->bd_bufsize) 1581 totlen = d->bd_bufsize; 1582 1583 /* 1584 * Round up the end of the previous packet to the next longword. 1585 */ 1586 curlen = BPF_WORDALIGN(d->bd_slen); 1587 if (curlen + totlen > d->bd_bufsize) { 1588 /* 1589 * This packet will overflow the storage buffer. 1590 * Rotate the buffers if we can, then wakeup any 1591 * pending reads. 1592 */ 1593 if (d->bd_fbuf == 0) { 1594 /* 1595 * We haven't completed the previous read yet, 1596 * so drop the packet. 1597 */ 1598 ++d->bd_dcount; 1599 ks_stats.kp_dropped.value.ui64++; 1600 return; 1601 } 1602 ROTATE_BUFFERS(d); 1603 do_wakeup = 1; 1604 curlen = 0; 1605 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 1606 /* 1607 * Immediate mode is set, or the read timeout has 1608 * already expired during a select call. A packet 1609 * arrived, so the reader should be woken up. 1610 */ 1611 do_wakeup = 1; 1612 } 1613 1614 /* 1615 * Append the bpf header to the existing buffer before we add 1616 * on the actual packet data. 1617 */ 1618 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 1619 hp->bh_tstamp.tv_sec = tv->tv_sec; 1620 hp->bh_tstamp.tv_usec = tv->tv_usec; 1621 hp->bh_datalen = pktlen; 1622 hp->bh_hdrlen = (uint16_t)hdrlen; 1623 /* 1624 * Copy the packet data into the store buffer and update its length. 1625 */ 1626 (*cpfn)((uchar_t *)hp + hdrlen, pkt, 1627 (hp->bh_caplen = totlen - hdrlen)); 1628 d->bd_slen = curlen + totlen; 1629 1630 /* 1631 * Call bpf_wakeup after bd_slen has been updated. 1632 */ 1633 if (do_wakeup) 1634 bpf_wakeup(d); 1635 } 1636 1637 /* 1638 * Initialize all nonzero fields of a descriptor. 1639 */ 1640 static int 1641 bpf_allocbufs(struct bpf_d *d) 1642 { 1643 1644 d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 1645 if (!d->bd_fbuf) 1646 return (ENOBUFS); 1647 d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 1648 if (!d->bd_sbuf) { 1649 kmem_free(d->bd_fbuf, d->bd_bufsize); 1650 return (ENOBUFS); 1651 } 1652 d->bd_slen = 0; 1653 d->bd_hlen = 0; 1654 return (0); 1655 } 1656 1657 /* 1658 * Free buffers currently in use by a descriptor. 1659 * Called on close. 1660 */ 1661 static void 1662 bpf_freed(struct bpf_d *d) 1663 { 1664 /* 1665 * At this point the descriptor has been detached from its 1666 * interface and it yet hasn't been marked free. 1667 */ 1668 if (d->bd_sbuf != 0) { 1669 kmem_free(d->bd_sbuf, d->bd_bufsize); 1670 if (d->bd_hbuf != 0) 1671 kmem_free(d->bd_hbuf, d->bd_bufsize); 1672 if (d->bd_fbuf != 0) 1673 kmem_free(d->bd_fbuf, d->bd_bufsize); 1674 } 1675 if (d->bd_filter) 1676 kmem_free(d->bd_filter, d->bd_filter_size); 1677 } 1678 1679 /* 1680 * Get a list of available data link type of the interface. 1681 */ 1682 static int 1683 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp) 1684 { 1685 bpf_provider_list_t *bp; 1686 bpf_provider_t *bpr; 1687 zoneid_t zoneid; 1688 uintptr_t mcip; 1689 uint_t nicdlt; 1690 uintptr_t mh; 1691 int error; 1692 int n; 1693 1694 n = 0; 1695 mh = 0; 1696 mcip = 0; 1697 error = 0; 1698 mutex_enter(&d->bd_lock); 1699 LIST_FOREACH(bp, &bpf_providers, bpl_next) { 1700 bpr = bp->bpl_what; 1701 error = MBPF_OPEN(bpr, d->bd_ifname, &mh, d->bd_zone); 1702 if (error != 0) 1703 goto next; 1704 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 1705 if (error != 0) 1706 goto next; 1707 error = MBPF_GET_ZONE(bpr, mh, &zoneid); 1708 if (error != 0) 1709 goto next; 1710 if (d->bd_zone != GLOBAL_ZONEID && 1711 d->bd_zone != zoneid) 1712 goto next; 1713 error = MBPF_GET_DLT(bpr, mh, &nicdlt); 1714 if (error != 0) 1715 goto next; 1716 nicdlt = bpf_dl_to_dlt(nicdlt); 1717 if (listp->bfl_list != NULL) { 1718 if (n >= listp->bfl_len) { 1719 MBPF_CLIENT_CLOSE(bpr, mcip); 1720 MBPF_CLOSE(bpr, mh); 1721 break; 1722 } 1723 /* 1724 * Bumping of bd_inuse ensures the structure does not 1725 * disappear while the copyout runs and allows the for 1726 * loop to be continued. 1727 */ 1728 d->bd_inuse++; 1729 mutex_exit(&d->bd_lock); 1730 if (copyout(&nicdlt, 1731 listp->bfl_list + n, sizeof (uint_t)) != 0) 1732 error = EFAULT; 1733 mutex_enter(&d->bd_lock); 1734 if (error != 0) 1735 break; 1736 d->bd_inuse--; 1737 } 1738 n++; 1739 next: 1740 if (mcip != 0) { 1741 MBPF_CLIENT_CLOSE(bpr, mcip); 1742 mcip = 0; 1743 } 1744 if (mh != 0) { 1745 MBPF_CLOSE(bpr, mh); 1746 mh = 0; 1747 } 1748 } 1749 mutex_exit(&d->bd_lock); 1750 1751 /* 1752 * It is quite possible that one or more provider to BPF may not 1753 * know about a link name whlist others do. In that case, so long 1754 * as we have one success, do not declare an error unless it was 1755 * an EFAULT as this indicates a problem that needs to be reported. 1756 */ 1757 if ((error != EFAULT) && (n > 0)) 1758 error = 0; 1759 1760 listp->bfl_len = n; 1761 return (error); 1762 } 1763 1764 /* 1765 * Set the data link type of a BPF instance. 1766 */ 1767 static int 1768 bpf_setdlt(struct bpf_d *d, void *addr) 1769 { 1770 char ifname[LIFNAMSIZ+1]; 1771 zoneid_t niczone; 1772 int error; 1773 int dlt; 1774 1775 if (copyin(addr, &dlt, sizeof (dlt)) != 0) 1776 return (EFAULT); 1777 1778 mutex_enter(&d->bd_lock); 1779 1780 if (d->bd_bif == 0) { /* Interface not set */ 1781 mutex_exit(&d->bd_lock); 1782 return (EINVAL); 1783 } 1784 if (d->bd_dlt == dlt) { /* NULL-op */ 1785 mutex_exit(&d->bd_lock); 1786 return (0); 1787 } 1788 1789 error = MBPF_GET_ZONE(&d->bd_mac, d->bd_bif, &niczone); 1790 if (error != 0) { 1791 mutex_exit(&d->bd_lock); 1792 return (error); 1793 } 1794 1795 /* 1796 * See the matrix at the top of the file for the permissions table 1797 * enforced by this driver. 1798 */ 1799 if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) && 1800 (niczone != d->bd_zone)) { 1801 mutex_exit(&d->bd_lock); 1802 return (EINVAL); 1803 } 1804 1805 (void) strlcpy(ifname, d->bd_ifname, sizeof (ifname)); 1806 d->bd_inuse = -1; 1807 bpf_detachd(d); 1808 error = bpf_attachd(d, ifname, dlt); 1809 reset_d(d); 1810 d->bd_inuse = 0; 1811 1812 mutex_exit(&d->bd_lock); 1813 return (error); 1814 } 1815 1816 /* 1817 * bpf_clear_timeout is called with the bd_lock mutex held, providing it 1818 * with the necessary protection to retrieve and modify bd_callout but it 1819 * does not hold the lock for its entire duration... see below... 1820 */ 1821 static void 1822 bpf_clear_timeout(struct bpf_d *d) 1823 { 1824 timeout_id_t tid = d->bd_callout; 1825 d->bd_callout = 0; 1826 d->bd_inuse++; 1827 1828 /* 1829 * If the timeout has fired and is waiting on bd_lock, we could 1830 * deadlock here because untimeout if bd_lock is held and would 1831 * wait for bpf_timed_out to finish and it never would. 1832 */ 1833 if (tid != 0) { 1834 mutex_exit(&d->bd_lock); 1835 (void) untimeout(tid); 1836 mutex_enter(&d->bd_lock); 1837 } 1838 1839 d->bd_inuse--; 1840 } 1841 1842 /* 1843 * As a cloning device driver, BPF needs to keep track of which device 1844 * numbers are in use and which ones are not. A hash table, indexed by 1845 * the minor device number, is used to store the pointers to the 1846 * individual descriptors that are allocated in bpfopen(). 1847 * The functions below present the interface for that hash table to 1848 * the rest of the driver. 1849 */ 1850 static struct bpf_d * 1851 bpf_dev_find(minor_t minor) 1852 { 1853 struct bpf_d *d = NULL; 1854 1855 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 1856 (mod_hash_val_t *)&d); 1857 1858 return (d); 1859 } 1860 1861 static void 1862 bpf_dev_add(struct bpf_d *d) 1863 { 1864 (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 1865 (mod_hash_val_t)d); 1866 } 1867 1868 static void 1869 bpf_dev_remove(struct bpf_d *d) 1870 { 1871 struct bpf_d *stor; 1872 1873 (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 1874 (mod_hash_val_t *)&stor); 1875 ASSERT(stor == d); 1876 } 1877 1878 /* 1879 * bpf_def_get should only ever be called for a minor number that exists, 1880 * thus there should always be a pointer in the hash table that corresponds 1881 * to it. 1882 */ 1883 static struct bpf_d * 1884 bpf_dev_get(minor_t minor) 1885 { 1886 struct bpf_d *d = NULL; 1887 1888 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 1889 (mod_hash_val_t *)&d); 1890 ASSERT(d != NULL); 1891 1892 return (d); 1893 } 1894