10a0e9771SDarren Reed /* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */ 20a0e9771SDarren Reed 30a0e9771SDarren Reed /* 40a0e9771SDarren Reed * Copyright (c) 1990, 1991, 1993 50a0e9771SDarren Reed * The Regents of the University of California. All rights reserved. 60a0e9771SDarren Reed * 70a0e9771SDarren Reed * This code is derived from the Stanford/CMU enet packet filter, 80a0e9771SDarren Reed * (net/enet.c) distributed as part of 4.3BSD, and code contributed 90a0e9771SDarren Reed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 100a0e9771SDarren Reed * Berkeley Laboratory. 110a0e9771SDarren Reed * 120a0e9771SDarren Reed * Redistribution and use in source and binary forms, with or without 130a0e9771SDarren Reed * modification, are permitted provided that the following conditions 140a0e9771SDarren Reed * are met: 150a0e9771SDarren Reed * 1. Redistributions of source code must retain the above copyright 160a0e9771SDarren Reed * notice, this list of conditions and the following disclaimer. 170a0e9771SDarren Reed * 2. Redistributions in binary form must reproduce the above copyright 180a0e9771SDarren Reed * notice, this list of conditions and the following disclaimer in the 190a0e9771SDarren Reed * documentation and/or other materials provided with the distribution. 200a0e9771SDarren Reed * 3. Neither the name of the University nor the names of its contributors 210a0e9771SDarren Reed * may be used to endorse or promote products derived from this software 220a0e9771SDarren Reed * without specific prior written permission. 230a0e9771SDarren Reed * 240a0e9771SDarren Reed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 250a0e9771SDarren Reed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 260a0e9771SDarren Reed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 270a0e9771SDarren Reed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 280a0e9771SDarren Reed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 290a0e9771SDarren Reed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 300a0e9771SDarren Reed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 310a0e9771SDarren Reed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 320a0e9771SDarren Reed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 330a0e9771SDarren Reed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 340a0e9771SDarren Reed * SUCH DAMAGE. 350a0e9771SDarren Reed * 360a0e9771SDarren Reed * @(#)bpf.c 8.4 (Berkeley) 1/9/95 370a0e9771SDarren Reed * static char rcsid[] = 380a0e9771SDarren Reed * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp "; 390a0e9771SDarren Reed */ 400a0e9771SDarren Reed /* 410a0e9771SDarren Reed * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 420a0e9771SDarren Reed * Use is subject to license terms. 430a0e9771SDarren Reed */ 440a0e9771SDarren Reed 450a0e9771SDarren Reed /* 460a0e9771SDarren Reed * The BPF implements the following access controls for zones attempting 470a0e9771SDarren Reed * to read and write data. Writing of data requires that the net_rawaccess 480a0e9771SDarren Reed * privilege is held whilst reading data requires either net_rawaccess or 490a0e9771SDarren Reed * net_observerability. 500a0e9771SDarren Reed * 510a0e9771SDarren Reed * | Shared | Exclusive | Global 520a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 530a0e9771SDarren Reed * DLT_IPNET in local zone | Read | Read | Read | 540a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 550a0e9771SDarren Reed * Raw access to local zone NIC | None | Read/Write | Read/Write | 560a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 570a0e9771SDarren Reed * Raw access to all NICs | None | None | Read/Write | 580a0e9771SDarren Reed * -----------------------------+--------+------------+------------+ 590a0e9771SDarren Reed * 600a0e9771SDarren Reed * The BPF driver is written as a cloning driver: each call to bpfopen() 610a0e9771SDarren Reed * allocates a new minor number. This provides BPF with a 1:1 relationship 620a0e9771SDarren Reed * between open's and close's. There is some amount of "descriptor state" 630a0e9771SDarren Reed * that is kept per open. Pointers to this data are stored in a hash table 640a0e9771SDarren Reed * (bpf_hash) that is index'd by the minor device number for each open file. 650a0e9771SDarren Reed */ 660a0e9771SDarren Reed #include <sys/param.h> 670a0e9771SDarren Reed #include <sys/systm.h> 680a0e9771SDarren Reed #include <sys/time.h> 690a0e9771SDarren Reed #include <sys/ioctl.h> 700a0e9771SDarren Reed #include <sys/queue.h> 710a0e9771SDarren Reed #include <sys/filio.h> 720a0e9771SDarren Reed #include <sys/policy.h> 730a0e9771SDarren Reed #include <sys/cmn_err.h> 740a0e9771SDarren Reed #include <sys/uio.h> 750a0e9771SDarren Reed #include <sys/file.h> 760a0e9771SDarren Reed #include <sys/sysmacros.h> 770a0e9771SDarren Reed #include <sys/zone.h> 780a0e9771SDarren Reed 790a0e9771SDarren Reed #include <sys/socket.h> 800a0e9771SDarren Reed #include <sys/errno.h> 810a0e9771SDarren Reed #include <sys/poll.h> 820a0e9771SDarren Reed #include <sys/dlpi.h> 830a0e9771SDarren Reed #include <sys/neti.h> 840a0e9771SDarren Reed 850a0e9771SDarren Reed #include <net/if.h> 860a0e9771SDarren Reed 870a0e9771SDarren Reed #include <net/bpf.h> 880a0e9771SDarren Reed #include <net/bpfdesc.h> 890a0e9771SDarren Reed #include <net/dlt.h> 900a0e9771SDarren Reed 910a0e9771SDarren Reed #include <netinet/in.h> 920a0e9771SDarren Reed #include <sys/mac.h> 930a0e9771SDarren Reed #include <sys/mac_client.h> 940a0e9771SDarren Reed #include <sys/mac_impl.h> 950a0e9771SDarren Reed #include <sys/time_std_impl.h> 960a0e9771SDarren Reed #include <sys/hook.h> 970a0e9771SDarren Reed #include <sys/hook_event.h> 980a0e9771SDarren Reed 990a0e9771SDarren Reed 1000a0e9771SDarren Reed #define mtod(_v, _t) (_t)((_v)->b_rptr) 1010a0e9771SDarren Reed #define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr) 1020a0e9771SDarren Reed 1030a0e9771SDarren Reed /* 1040a0e9771SDarren Reed * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet 1050a0e9771SDarren Reed * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k). 1060a0e9771SDarren Reed */ 1070a0e9771SDarren Reed #define BPF_BUFSIZE (32 * 1024) 1080a0e9771SDarren Reed 1090a0e9771SDarren Reed typedef void *(*cp_fn_t)(void *, const void *, size_t); 1100a0e9771SDarren Reed 1110a0e9771SDarren Reed /* 1120a0e9771SDarren Reed * The default read buffer size, and limit for BIOCSBLEN. 1130a0e9771SDarren Reed */ 1140a0e9771SDarren Reed int bpf_bufsize = BPF_BUFSIZE; 1150a0e9771SDarren Reed int bpf_maxbufsize = (16 * 1024 * 1024); 116b7ea883bSDarren Reed static mod_hash_t *bpf_hash = NULL; 1170a0e9771SDarren Reed 1180a0e9771SDarren Reed /* 1190a0e9771SDarren Reed * Use a mutex to avoid a race condition between gathering the stats/peers 1200a0e9771SDarren Reed * and opening/closing the device. 1210a0e9771SDarren Reed */ 1220a0e9771SDarren Reed static kcondvar_t bpf_dlt_waiter; 1230a0e9771SDarren Reed static kmutex_t bpf_mtx; 1240a0e9771SDarren Reed static bpf_kstats_t ks_stats; 1250a0e9771SDarren Reed static bpf_kstats_t bpf_kstats = { 1260a0e9771SDarren Reed { "readWait", KSTAT_DATA_UINT64 }, 1270a0e9771SDarren Reed { "writeOk", KSTAT_DATA_UINT64 }, 1280a0e9771SDarren Reed { "writeError", KSTAT_DATA_UINT64 }, 1290a0e9771SDarren Reed { "receive", KSTAT_DATA_UINT64 }, 1300a0e9771SDarren Reed { "captured", KSTAT_DATA_UINT64 }, 1310a0e9771SDarren Reed { "dropped", KSTAT_DATA_UINT64 }, 1320a0e9771SDarren Reed }; 1330a0e9771SDarren Reed static kstat_t *bpf_ksp; 1340a0e9771SDarren Reed 1350a0e9771SDarren Reed /* 136b7ea883bSDarren Reed * bpf_list is a list of the BPF descriptors currently open 1370a0e9771SDarren Reed */ 1380a0e9771SDarren Reed LIST_HEAD(, bpf_d) bpf_list; 1390a0e9771SDarren Reed 1400a0e9771SDarren Reed static int bpf_allocbufs(struct bpf_d *); 1410a0e9771SDarren Reed static void bpf_clear_timeout(struct bpf_d *); 1420a0e9771SDarren Reed static void bpf_deliver(struct bpf_d *, cp_fn_t, 1430a0e9771SDarren Reed void *, uint_t, uint_t, boolean_t); 1440a0e9771SDarren Reed static void bpf_freed(struct bpf_d *); 1450a0e9771SDarren Reed static int bpf_ifname(struct bpf_d *d, char *, int); 1460a0e9771SDarren Reed static void *bpf_mcpy(void *, const void *, size_t); 147b7ea883bSDarren Reed static int bpf_attachd(struct bpf_d *, const char *, int); 1480a0e9771SDarren Reed static void bpf_detachd(struct bpf_d *); 1490a0e9771SDarren Reed static int bpf_setif(struct bpf_d *, char *, int); 1500a0e9771SDarren Reed static void bpf_timed_out(void *); 1510a0e9771SDarren Reed static inline void 1520a0e9771SDarren Reed bpf_wakeup(struct bpf_d *); 1530a0e9771SDarren Reed static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t, 1540a0e9771SDarren Reed cp_fn_t, struct timeval *); 1550a0e9771SDarren Reed static void reset_d(struct bpf_d *); 1560a0e9771SDarren Reed static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 1570a0e9771SDarren Reed static int bpf_setdlt(struct bpf_d *, void *); 1580a0e9771SDarren Reed static void bpf_dev_add(struct bpf_d *); 1590a0e9771SDarren Reed static struct bpf_d *bpf_dev_find(minor_t); 1600a0e9771SDarren Reed static struct bpf_d *bpf_dev_get(minor_t); 1610a0e9771SDarren Reed static void bpf_dev_remove(struct bpf_d *); 1620a0e9771SDarren Reed 1630a0e9771SDarren Reed static int 1640a0e9771SDarren Reed bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp) 1650a0e9771SDarren Reed { 1660a0e9771SDarren Reed mblk_t *m; 1670a0e9771SDarren Reed int error; 1680a0e9771SDarren Reed int len; 1690a0e9771SDarren Reed int hlen; 1700a0e9771SDarren Reed int align; 1710a0e9771SDarren Reed 1720a0e9771SDarren Reed /* 1730a0e9771SDarren Reed * Build a sockaddr based on the data link layer type. 1740a0e9771SDarren Reed * We do this at this level because the ethernet header 1750a0e9771SDarren Reed * is copied directly into the data field of the sockaddr. 1760a0e9771SDarren Reed * In the case of SLIP, there is no header and the packet 1770a0e9771SDarren Reed * is forwarded as is. 1780a0e9771SDarren Reed * Also, we are careful to leave room at the front of the mbuf 1790a0e9771SDarren Reed * for the link level header. 1800a0e9771SDarren Reed */ 1810a0e9771SDarren Reed switch (linktype) { 1820a0e9771SDarren Reed 1830a0e9771SDarren Reed case DLT_EN10MB: 1840a0e9771SDarren Reed hlen = sizeof (struct ether_header); 1850a0e9771SDarren Reed break; 1860a0e9771SDarren Reed 1870a0e9771SDarren Reed case DLT_FDDI: 1880a0e9771SDarren Reed hlen = 16; 1890a0e9771SDarren Reed break; 1900a0e9771SDarren Reed 1910a0e9771SDarren Reed case DLT_NULL: 1920a0e9771SDarren Reed hlen = 0; 1930a0e9771SDarren Reed break; 1940a0e9771SDarren Reed 1950a0e9771SDarren Reed case DLT_IPOIB: 1960a0e9771SDarren Reed hlen = 44; 1970a0e9771SDarren Reed break; 1980a0e9771SDarren Reed 1990a0e9771SDarren Reed default: 2000a0e9771SDarren Reed return (EIO); 2010a0e9771SDarren Reed } 2020a0e9771SDarren Reed 2030a0e9771SDarren Reed align = 4 - (hlen & 3); 2040a0e9771SDarren Reed 2050a0e9771SDarren Reed len = uio->uio_resid; 2060a0e9771SDarren Reed /* 2070a0e9771SDarren Reed * If there aren't enough bytes for a link level header or the 2080a0e9771SDarren Reed * packet length exceeds the interface mtu, return an error. 2090a0e9771SDarren Reed */ 2100a0e9771SDarren Reed if (len < hlen || len - hlen > mtu) 2110a0e9771SDarren Reed return (EMSGSIZE); 2120a0e9771SDarren Reed 2130a0e9771SDarren Reed m = allocb(len + align, BPRI_MED); 2140a0e9771SDarren Reed if (m == NULL) { 2150a0e9771SDarren Reed error = ENOBUFS; 2160a0e9771SDarren Reed goto bad; 2170a0e9771SDarren Reed } 2180a0e9771SDarren Reed 2190a0e9771SDarren Reed /* Insure the data is properly aligned */ 2200a0e9771SDarren Reed if (align > 0) 2210a0e9771SDarren Reed m->b_rptr += align; 2220a0e9771SDarren Reed m->b_wptr = m->b_rptr + len; 2230a0e9771SDarren Reed 2240a0e9771SDarren Reed error = uiomove(mtod(m, void *), len, UIO_WRITE, uio); 2250a0e9771SDarren Reed if (error) 2260a0e9771SDarren Reed goto bad; 2270a0e9771SDarren Reed *mp = m; 2280a0e9771SDarren Reed return (0); 2290a0e9771SDarren Reed 2300a0e9771SDarren Reed bad: 2310a0e9771SDarren Reed if (m != NULL) 2320a0e9771SDarren Reed freemsg(m); 2330a0e9771SDarren Reed return (error); 2340a0e9771SDarren Reed } 2350a0e9771SDarren Reed 2360a0e9771SDarren Reed 2370a0e9771SDarren Reed /* 2380a0e9771SDarren Reed * Attach file to the bpf interface, i.e. make d listen on bp. 2390a0e9771SDarren Reed */ 240b7ea883bSDarren Reed static int 241b7ea883bSDarren Reed bpf_attachd(struct bpf_d *d, const char *ifname, int dlt) 2420a0e9771SDarren Reed { 243b7ea883bSDarren Reed bpf_provider_list_t *bp; 244b7ea883bSDarren Reed bpf_provider_t *bpr; 245b7ea883bSDarren Reed boolean_t zonematch; 246b7ea883bSDarren Reed zoneid_t niczone; 247b7ea883bSDarren Reed uintptr_t mcip; 248b7ea883bSDarren Reed zoneid_t zone; 249b7ea883bSDarren Reed uint_t nicdlt; 250b7ea883bSDarren Reed uintptr_t mh; 251b7ea883bSDarren Reed int hdrlen; 252b7ea883bSDarren Reed int error; 2530a0e9771SDarren Reed 2540a0e9771SDarren Reed ASSERT(d->bd_bif == NULL); 255b7ea883bSDarren Reed ASSERT(d->bd_mcip == NULL); 256b7ea883bSDarren Reed zone = d->bd_zone; 257b7ea883bSDarren Reed zonematch = B_TRUE; 258b7ea883bSDarren Reed again: 259b7ea883bSDarren Reed mh = 0; 260b7ea883bSDarren Reed mcip = 0; 261b7ea883bSDarren Reed LIST_FOREACH(bp, &bpf_providers, bpl_next) { 262b7ea883bSDarren Reed bpr = bp->bpl_what; 263b7ea883bSDarren Reed error = MBPF_OPEN(bpr, ifname, &mh, zone); 264b7ea883bSDarren Reed if (error != 0) 265b7ea883bSDarren Reed goto next; 266b7ea883bSDarren Reed error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 267b7ea883bSDarren Reed if (error != 0) 268b7ea883bSDarren Reed goto next; 269b7ea883bSDarren Reed error = MBPF_GET_DLT(bpr, mh, &nicdlt); 270b7ea883bSDarren Reed if (error != 0) 271b7ea883bSDarren Reed goto next; 2720a0e9771SDarren Reed 273b7ea883bSDarren Reed nicdlt = bpf_dl_to_dlt(nicdlt); 274b7ea883bSDarren Reed if (dlt != -1 && dlt != nicdlt) { 275b7ea883bSDarren Reed error = ENOENT; 276b7ea883bSDarren Reed goto next; 277b7ea883bSDarren Reed } 278b7ea883bSDarren Reed 279b7ea883bSDarren Reed error = MBPF_GET_ZONE(bpr, mh, &niczone); 280b7ea883bSDarren Reed if (error != 0) 281b7ea883bSDarren Reed goto next; 282b7ea883bSDarren Reed 283b7ea883bSDarren Reed DTRACE_PROBE4(bpf__attach, struct bpf_provider_s *, bpr, 284b7ea883bSDarren Reed uintptr_t, mh, int, nicdlt, zoneid_t, niczone); 285b7ea883bSDarren Reed 286b7ea883bSDarren Reed if (zonematch && niczone != zone) { 287b7ea883bSDarren Reed error = ENOENT; 288b7ea883bSDarren Reed goto next; 289b7ea883bSDarren Reed } 290b7ea883bSDarren Reed break; 291b7ea883bSDarren Reed next: 292b7ea883bSDarren Reed if (mcip != 0) { 293b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 294b7ea883bSDarren Reed mcip = 0; 295b7ea883bSDarren Reed } 296b7ea883bSDarren Reed if (mh != NULL) { 297b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 298b7ea883bSDarren Reed mh = 0; 299b7ea883bSDarren Reed } 300b7ea883bSDarren Reed } 301b7ea883bSDarren Reed if (error != 0) { 302b7ea883bSDarren Reed if (zonematch && (zone == GLOBAL_ZONEID)) { 303b7ea883bSDarren Reed /* 304b7ea883bSDarren Reed * If we failed to do an exact match for the global 305b7ea883bSDarren Reed * zone using the global zoneid, try again in case 306b7ea883bSDarren Reed * the network interface is owned by a local zone. 307b7ea883bSDarren Reed */ 308b7ea883bSDarren Reed zonematch = B_FALSE; 309b7ea883bSDarren Reed goto again; 310b7ea883bSDarren Reed } 311b7ea883bSDarren Reed return (error); 312b7ea883bSDarren Reed } 313b7ea883bSDarren Reed 314b7ea883bSDarren Reed d->bd_mac = *bpr; 315b7ea883bSDarren Reed d->bd_mcip = mcip; 316b7ea883bSDarren Reed d->bd_bif = mh; 317b7ea883bSDarren Reed d->bd_dlt = nicdlt; 318b7ea883bSDarren Reed hdrlen = bpf_dl_hdrsize(nicdlt); 319b7ea883bSDarren Reed d->bd_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 320b7ea883bSDarren Reed 321b7ea883bSDarren Reed (void) strlcpy(d->bd_ifname, MBPF_CLIENT_NAME(&d->bd_mac, mcip), 322b7ea883bSDarren Reed sizeof (d->bd_ifname)); 323b7ea883bSDarren Reed 324b7ea883bSDarren Reed (void) MBPF_GET_LINKID(&d->bd_mac, d->bd_ifname, &d->bd_linkid, 325b7ea883bSDarren Reed zone); 326b7ea883bSDarren Reed (void) MBPF_PROMISC_ADD(&d->bd_mac, d->bd_mcip, 0, d, 3270a0e9771SDarren Reed &d->bd_promisc_handle, d->bd_promisc_flags); 328b7ea883bSDarren Reed return (0); 3290a0e9771SDarren Reed } 3300a0e9771SDarren Reed 3310a0e9771SDarren Reed /* 3320a0e9771SDarren Reed * Detach a file from its interface. 3330a0e9771SDarren Reed */ 3340a0e9771SDarren Reed static void 3350a0e9771SDarren Reed bpf_detachd(struct bpf_d *d) 3360a0e9771SDarren Reed { 3370a0e9771SDarren Reed uintptr_t mph; 3380a0e9771SDarren Reed uintptr_t mch; 339b7ea883bSDarren Reed uintptr_t mh; 3400a0e9771SDarren Reed 341b7ea883bSDarren Reed ASSERT(d->bd_inuse == -1); 3420a0e9771SDarren Reed mch = d->bd_mcip; 3430a0e9771SDarren Reed d->bd_mcip = 0; 344b7ea883bSDarren Reed mh = d->bd_bif; 345b7ea883bSDarren Reed d->bd_bif = 0; 3460a0e9771SDarren Reed 3470a0e9771SDarren Reed /* 3480a0e9771SDarren Reed * Check if this descriptor had requested promiscuous mode. 3490a0e9771SDarren Reed * If so, turn it off. There's no need to take any action 3500a0e9771SDarren Reed * here, that is done when MBPF_PROMISC_REMOVE is used; 3510a0e9771SDarren Reed * bd_promisc is just a local flag to stop promiscuous mode 3520a0e9771SDarren Reed * from being set more than once. 3530a0e9771SDarren Reed */ 3540a0e9771SDarren Reed if (d->bd_promisc) 3550a0e9771SDarren Reed d->bd_promisc = 0; 3560a0e9771SDarren Reed 3570a0e9771SDarren Reed /* 3580a0e9771SDarren Reed * Take device out of "promiscuous" mode. Since we were able to 3590a0e9771SDarren Reed * enter "promiscuous" mode, we should be able to turn it off. 3600a0e9771SDarren Reed * Note, this field stores a pointer used to support both 3610a0e9771SDarren Reed * promiscuous and non-promiscuous callbacks for packets. 3620a0e9771SDarren Reed */ 3630a0e9771SDarren Reed mph = d->bd_promisc_handle; 3640a0e9771SDarren Reed d->bd_promisc_handle = 0; 3650a0e9771SDarren Reed 3660a0e9771SDarren Reed /* 3670a0e9771SDarren Reed * The lock has to be dropped here because mac_promisc_remove may 3680a0e9771SDarren Reed * need to wait for mac_promisc_dispatch, which has called into 3690a0e9771SDarren Reed * bpf and catchpacket is waiting for bd_lock... 3700a0e9771SDarren Reed * i.e mac_promisc_remove() needs to be called with none of the 3710a0e9771SDarren Reed * locks held that are part of the bpf_mtap() call path. 3720a0e9771SDarren Reed */ 3730a0e9771SDarren Reed mutex_exit(&d->bd_lock); 3740a0e9771SDarren Reed if (mph != 0) 375b7ea883bSDarren Reed MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 3760a0e9771SDarren Reed 3770a0e9771SDarren Reed if (mch != 0) 378b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(&d->bd_mac, mch); 3790a0e9771SDarren Reed 380b7ea883bSDarren Reed if (mh != 0) 381b7ea883bSDarren Reed MBPF_CLOSE(&d->bd_mac, mh); 3820a0e9771SDarren Reed 3830a0e9771SDarren Reed /* 3840a0e9771SDarren Reed * Because this function is called with bd_lock held, so it must 3850a0e9771SDarren Reed * exit with it held. 3860a0e9771SDarren Reed */ 3870a0e9771SDarren Reed mutex_enter(&d->bd_lock); 388b7ea883bSDarren Reed *d->bd_ifname = '\0'; 389*b50f3686SDarren Reed (void) memset(&d->bd_mac, 0, sizeof (d->bd_mac)); 3900a0e9771SDarren Reed } 3910a0e9771SDarren Reed 3920a0e9771SDarren Reed 3930a0e9771SDarren Reed /* 3940a0e9771SDarren Reed * bpfilterattach() is called at load time. 3950a0e9771SDarren Reed */ 3960a0e9771SDarren Reed int 3970a0e9771SDarren Reed bpfilterattach(void) 3980a0e9771SDarren Reed { 3990a0e9771SDarren Reed 4000a0e9771SDarren Reed bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31, 4010a0e9771SDarren Reed mod_hash_null_keydtor); 4020a0e9771SDarren Reed if (bpf_hash == NULL) 4030a0e9771SDarren Reed return (ENOMEM); 4040a0e9771SDarren Reed 4050a0e9771SDarren Reed (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats)); 4060a0e9771SDarren Reed 4070a0e9771SDarren Reed bpf_ksp = kstat_create("bpf", 0, "global", "misc", 4080a0e9771SDarren Reed KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t), 4090a0e9771SDarren Reed KSTAT_FLAG_VIRTUAL); 4100a0e9771SDarren Reed if (bpf_ksp != NULL) { 4110a0e9771SDarren Reed bpf_ksp->ks_data = &ks_stats; 4120a0e9771SDarren Reed kstat_install(bpf_ksp); 4130a0e9771SDarren Reed } else { 4140a0e9771SDarren Reed mod_hash_destroy_idhash(bpf_hash); 4150a0e9771SDarren Reed bpf_hash = NULL; 4160a0e9771SDarren Reed return (EEXIST); 4170a0e9771SDarren Reed } 4180a0e9771SDarren Reed 4190a0e9771SDarren Reed cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL); 4200a0e9771SDarren Reed mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL); 4210a0e9771SDarren Reed 4220a0e9771SDarren Reed LIST_INIT(&bpf_list); 4230a0e9771SDarren Reed 4240a0e9771SDarren Reed return (0); 4250a0e9771SDarren Reed } 4260a0e9771SDarren Reed 4270a0e9771SDarren Reed 4280a0e9771SDarren Reed /* 4290a0e9771SDarren Reed * bpfilterdetach() is called at unload time. 4300a0e9771SDarren Reed */ 4310a0e9771SDarren Reed int 4320a0e9771SDarren Reed bpfilterdetach(void) 4330a0e9771SDarren Reed { 4340a0e9771SDarren Reed 4350a0e9771SDarren Reed if (bpf_ksp != NULL) { 4360a0e9771SDarren Reed kstat_delete(bpf_ksp); 4370a0e9771SDarren Reed bpf_ksp = NULL; 4380a0e9771SDarren Reed } 4390a0e9771SDarren Reed 4400a0e9771SDarren Reed mod_hash_destroy_idhash(bpf_hash); 4410a0e9771SDarren Reed bpf_hash = NULL; 4420a0e9771SDarren Reed 4430a0e9771SDarren Reed cv_destroy(&bpf_dlt_waiter); 4440a0e9771SDarren Reed mutex_destroy(&bpf_mtx); 4450a0e9771SDarren Reed 4460a0e9771SDarren Reed return (0); 4470a0e9771SDarren Reed } 4480a0e9771SDarren Reed 4490a0e9771SDarren Reed /* 4500a0e9771SDarren Reed * Open ethernet device. Clones. 4510a0e9771SDarren Reed */ 4520a0e9771SDarren Reed /* ARGSUSED */ 4530a0e9771SDarren Reed int 4540a0e9771SDarren Reed bpfopen(dev_t *devp, int flag, int mode, cred_t *cred) 4550a0e9771SDarren Reed { 4560a0e9771SDarren Reed struct bpf_d *d; 4570a0e9771SDarren Reed uint_t dmin; 4580a0e9771SDarren Reed 4590a0e9771SDarren Reed /* 4600a0e9771SDarren Reed * The security policy described at the top of this file is 4610a0e9771SDarren Reed * enforced here. 4620a0e9771SDarren Reed */ 4630a0e9771SDarren Reed if ((flag & FWRITE) != 0) { 4640a0e9771SDarren Reed if (secpolicy_net_rawaccess(cred) != 0) 4650a0e9771SDarren Reed return (EACCES); 4660a0e9771SDarren Reed } 4670a0e9771SDarren Reed 4680a0e9771SDarren Reed if ((flag & FREAD) != 0) { 4690a0e9771SDarren Reed if ((secpolicy_net_observability(cred) != 0) && 4700a0e9771SDarren Reed (secpolicy_net_rawaccess(cred) != 0)) 4710a0e9771SDarren Reed return (EACCES); 4720a0e9771SDarren Reed } 4730a0e9771SDarren Reed 4740a0e9771SDarren Reed if ((flag & (FWRITE|FREAD)) == 0) 4750a0e9771SDarren Reed return (ENXIO); 4760a0e9771SDarren Reed 4770a0e9771SDarren Reed /* 4780a0e9771SDarren Reed * A structure is allocated per open file in BPF to store settings 4790a0e9771SDarren Reed * such as buffer capture size, provide private buffers, etc. 4800a0e9771SDarren Reed */ 4810a0e9771SDarren Reed d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP); 4820a0e9771SDarren Reed d->bd_bufsize = bpf_bufsize; 4830a0e9771SDarren Reed d->bd_fmode = flag; 4840a0e9771SDarren Reed d->bd_zone = crgetzoneid(cred); 4850a0e9771SDarren Reed d->bd_seesent = 1; 4860a0e9771SDarren Reed d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS| 4870a0e9771SDarren Reed MAC_PROMISC_FLAGS_NO_COPY; 4880a0e9771SDarren Reed mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL); 4890a0e9771SDarren Reed cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL); 4900a0e9771SDarren Reed 4910a0e9771SDarren Reed mutex_enter(&bpf_mtx); 4920a0e9771SDarren Reed /* 4930a0e9771SDarren Reed * Find an unused minor number. Obviously this is an O(n) algorithm 4940a0e9771SDarren Reed * and doesn't scale particularly well, so if there are large numbers 4950a0e9771SDarren Reed * of open file descriptors happening in real use, this design may 4960a0e9771SDarren Reed * need to be revisited. 4970a0e9771SDarren Reed */ 4980a0e9771SDarren Reed for (dmin = 0; dmin < L_MAXMIN; dmin++) 4990a0e9771SDarren Reed if (bpf_dev_find(dmin) == NULL) 5000a0e9771SDarren Reed break; 5010a0e9771SDarren Reed if (dmin == L_MAXMIN) { 5020a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5030a0e9771SDarren Reed kmem_free(d, sizeof (*d)); 5040a0e9771SDarren Reed return (ENXIO); 5050a0e9771SDarren Reed } 5060a0e9771SDarren Reed d->bd_dev = dmin; 5070a0e9771SDarren Reed LIST_INSERT_HEAD(&bpf_list, d, bd_list); 5080a0e9771SDarren Reed bpf_dev_add(d); 5090a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5100a0e9771SDarren Reed 5110a0e9771SDarren Reed *devp = makedevice(getmajor(*devp), dmin); 5120a0e9771SDarren Reed 5130a0e9771SDarren Reed return (0); 5140a0e9771SDarren Reed } 5150a0e9771SDarren Reed 5160a0e9771SDarren Reed /* 5170a0e9771SDarren Reed * Close the descriptor by detaching it from its interface, 5180a0e9771SDarren Reed * deallocating its buffers, and marking it free. 5190a0e9771SDarren Reed * 5200a0e9771SDarren Reed * Because we only allow a device to be opened once, there is always a 5210a0e9771SDarren Reed * 1 to 1 relationship between opens and closes supporting this function. 5220a0e9771SDarren Reed */ 5230a0e9771SDarren Reed /* ARGSUSED */ 5240a0e9771SDarren Reed int 5250a0e9771SDarren Reed bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p) 5260a0e9771SDarren Reed { 5270a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 5280a0e9771SDarren Reed 5290a0e9771SDarren Reed mutex_enter(&d->bd_lock); 530b7ea883bSDarren Reed 531b7ea883bSDarren Reed while (d->bd_inuse != 0) { 532b7ea883bSDarren Reed d->bd_waiting++; 533b7ea883bSDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 534b7ea883bSDarren Reed d->bd_waiting--; 535b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 536b7ea883bSDarren Reed return (EINTR); 537b7ea883bSDarren Reed } 538b7ea883bSDarren Reed d->bd_waiting--; 539b7ea883bSDarren Reed } 540b7ea883bSDarren Reed 541b7ea883bSDarren Reed d->bd_inuse = -1; 5420a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 5430a0e9771SDarren Reed bpf_clear_timeout(d); 5440a0e9771SDarren Reed d->bd_state = BPF_IDLE; 5450a0e9771SDarren Reed if (d->bd_bif) 5460a0e9771SDarren Reed bpf_detachd(d); 5470a0e9771SDarren Reed mutex_exit(&d->bd_lock); 5480a0e9771SDarren Reed 5490a0e9771SDarren Reed mutex_enter(&bpf_mtx); 5500a0e9771SDarren Reed LIST_REMOVE(d, bd_list); 5510a0e9771SDarren Reed bpf_dev_remove(d); 5520a0e9771SDarren Reed mutex_exit(&bpf_mtx); 5530a0e9771SDarren Reed 5540a0e9771SDarren Reed mutex_enter(&d->bd_lock); 5550a0e9771SDarren Reed mutex_destroy(&d->bd_lock); 5560a0e9771SDarren Reed cv_destroy(&d->bd_wait); 5570a0e9771SDarren Reed 5580a0e9771SDarren Reed bpf_freed(d); 5590a0e9771SDarren Reed kmem_free(d, sizeof (*d)); 5600a0e9771SDarren Reed 5610a0e9771SDarren Reed return (0); 5620a0e9771SDarren Reed } 5630a0e9771SDarren Reed 5640a0e9771SDarren Reed /* 5650a0e9771SDarren Reed * Rotate the packet buffers in descriptor d. Move the store buffer 5660a0e9771SDarren Reed * into the hold slot, and the free buffer into the store slot. 5670a0e9771SDarren Reed * Zero the length of the new store buffer. 5680a0e9771SDarren Reed */ 5690a0e9771SDarren Reed #define ROTATE_BUFFERS(d) \ 5700a0e9771SDarren Reed (d)->bd_hbuf = (d)->bd_sbuf; \ 5710a0e9771SDarren Reed (d)->bd_hlen = (d)->bd_slen; \ 5720a0e9771SDarren Reed (d)->bd_sbuf = (d)->bd_fbuf; \ 5730a0e9771SDarren Reed (d)->bd_slen = 0; \ 5740a0e9771SDarren Reed (d)->bd_fbuf = 0; 5750a0e9771SDarren Reed /* 5760a0e9771SDarren Reed * bpfread - read next chunk of packets from buffers 5770a0e9771SDarren Reed */ 5780a0e9771SDarren Reed /* ARGSUSED */ 5790a0e9771SDarren Reed int 5800a0e9771SDarren Reed bpfread(dev_t dev, struct uio *uio, cred_t *cred) 5810a0e9771SDarren Reed { 5820a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 5830a0e9771SDarren Reed int timed_out; 5840a0e9771SDarren Reed ulong_t delay; 5850a0e9771SDarren Reed int error; 5860a0e9771SDarren Reed 5870a0e9771SDarren Reed if ((d->bd_fmode & FREAD) == 0) 5880a0e9771SDarren Reed return (EBADF); 5890a0e9771SDarren Reed 5900a0e9771SDarren Reed /* 5910a0e9771SDarren Reed * Restrict application to use a buffer the same size as 5920a0e9771SDarren Reed * the kernel buffers. 5930a0e9771SDarren Reed */ 5940a0e9771SDarren Reed if (uio->uio_resid != d->bd_bufsize) 5950a0e9771SDarren Reed return (EINVAL); 5960a0e9771SDarren Reed 5970a0e9771SDarren Reed mutex_enter(&d->bd_lock); 5980a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 5990a0e9771SDarren Reed bpf_clear_timeout(d); 6000a0e9771SDarren Reed timed_out = (d->bd_state == BPF_TIMED_OUT); 6010a0e9771SDarren Reed d->bd_state = BPF_IDLE; 6020a0e9771SDarren Reed /* 6030a0e9771SDarren Reed * If the hold buffer is empty, then do a timed sleep, which 6040a0e9771SDarren Reed * ends when the timeout expires or when enough packets 6050a0e9771SDarren Reed * have arrived to fill the store buffer. 6060a0e9771SDarren Reed */ 6070a0e9771SDarren Reed while (d->bd_hbuf == 0) { 6080a0e9771SDarren Reed if (d->bd_nonblock) { 6090a0e9771SDarren Reed if (d->bd_slen == 0) { 6100a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6110a0e9771SDarren Reed return (EWOULDBLOCK); 6120a0e9771SDarren Reed } 6130a0e9771SDarren Reed ROTATE_BUFFERS(d); 6140a0e9771SDarren Reed break; 6150a0e9771SDarren Reed } 6160a0e9771SDarren Reed 6170a0e9771SDarren Reed if ((d->bd_immediate || timed_out) && d->bd_slen != 0) { 6180a0e9771SDarren Reed /* 6190a0e9771SDarren Reed * A packet(s) either arrived since the previous 6200a0e9771SDarren Reed * read or arrived while we were asleep. 6210a0e9771SDarren Reed * Rotate the buffers and return what's here. 6220a0e9771SDarren Reed */ 6230a0e9771SDarren Reed ROTATE_BUFFERS(d); 6240a0e9771SDarren Reed break; 6250a0e9771SDarren Reed } 6260a0e9771SDarren Reed ks_stats.kp_read_wait.value.ui64++; 6270a0e9771SDarren Reed delay = ddi_get_lbolt() + d->bd_rtout; 6280a0e9771SDarren Reed error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay); 6290a0e9771SDarren Reed if (error == 0) { 6300a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6310a0e9771SDarren Reed return (EINTR); 6320a0e9771SDarren Reed } 6330a0e9771SDarren Reed if (error == -1) { 6340a0e9771SDarren Reed /* 6350a0e9771SDarren Reed * On a timeout, return what's in the buffer, 6360a0e9771SDarren Reed * which may be nothing. If there is something 6370a0e9771SDarren Reed * in the store buffer, we can rotate the buffers. 6380a0e9771SDarren Reed */ 6390a0e9771SDarren Reed if (d->bd_hbuf) 6400a0e9771SDarren Reed /* 6410a0e9771SDarren Reed * We filled up the buffer in between 6420a0e9771SDarren Reed * getting the timeout and arriving 6430a0e9771SDarren Reed * here, so we don't need to rotate. 6440a0e9771SDarren Reed */ 6450a0e9771SDarren Reed break; 6460a0e9771SDarren Reed 6470a0e9771SDarren Reed if (d->bd_slen == 0) { 6480a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6490a0e9771SDarren Reed return (0); 6500a0e9771SDarren Reed } 6510a0e9771SDarren Reed ROTATE_BUFFERS(d); 6520a0e9771SDarren Reed } 6530a0e9771SDarren Reed } 6540a0e9771SDarren Reed /* 6550a0e9771SDarren Reed * At this point, we know we have something in the hold slot. 6560a0e9771SDarren Reed */ 6570a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6580a0e9771SDarren Reed 6590a0e9771SDarren Reed /* 6600a0e9771SDarren Reed * Move data from hold buffer into user space. 6610a0e9771SDarren Reed * We know the entire buffer is transferred since 6620a0e9771SDarren Reed * we checked above that the read buffer is bpf_bufsize bytes. 6630a0e9771SDarren Reed */ 6640a0e9771SDarren Reed error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); 6650a0e9771SDarren Reed 6660a0e9771SDarren Reed mutex_enter(&d->bd_lock); 6670a0e9771SDarren Reed d->bd_fbuf = d->bd_hbuf; 6680a0e9771SDarren Reed d->bd_hbuf = 0; 6690a0e9771SDarren Reed d->bd_hlen = 0; 6700a0e9771SDarren Reed done: 6710a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6720a0e9771SDarren Reed return (error); 6730a0e9771SDarren Reed } 6740a0e9771SDarren Reed 6750a0e9771SDarren Reed 6760a0e9771SDarren Reed /* 6770a0e9771SDarren Reed * If there are processes sleeping on this descriptor, wake them up. 6780a0e9771SDarren Reed * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver, 6790a0e9771SDarren Reed * so there is no code here grabbing it. 6800a0e9771SDarren Reed */ 6810a0e9771SDarren Reed static inline void 6820a0e9771SDarren Reed bpf_wakeup(struct bpf_d *d) 6830a0e9771SDarren Reed { 6840a0e9771SDarren Reed cv_signal(&d->bd_wait); 6850a0e9771SDarren Reed } 6860a0e9771SDarren Reed 6870a0e9771SDarren Reed static void 6880a0e9771SDarren Reed bpf_timed_out(void *arg) 6890a0e9771SDarren Reed { 6900a0e9771SDarren Reed struct bpf_d *d = arg; 6910a0e9771SDarren Reed 6920a0e9771SDarren Reed mutex_enter(&d->bd_lock); 6930a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) { 6940a0e9771SDarren Reed d->bd_state = BPF_TIMED_OUT; 6950a0e9771SDarren Reed if (d->bd_slen != 0) 6960a0e9771SDarren Reed cv_signal(&d->bd_wait); 6970a0e9771SDarren Reed } 6980a0e9771SDarren Reed mutex_exit(&d->bd_lock); 6990a0e9771SDarren Reed } 7000a0e9771SDarren Reed 7010a0e9771SDarren Reed 7020a0e9771SDarren Reed /* ARGSUSED */ 7030a0e9771SDarren Reed int 7040a0e9771SDarren Reed bpfwrite(dev_t dev, struct uio *uio, cred_t *cred) 7050a0e9771SDarren Reed { 7060a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 7070a0e9771SDarren Reed uintptr_t mch; 7080a0e9771SDarren Reed uint_t mtu; 7090a0e9771SDarren Reed mblk_t *m; 7100a0e9771SDarren Reed int error; 7110a0e9771SDarren Reed int dlt; 7120a0e9771SDarren Reed 7130a0e9771SDarren Reed if ((d->bd_fmode & FWRITE) == 0) 7140a0e9771SDarren Reed return (EBADF); 7150a0e9771SDarren Reed 7160a0e9771SDarren Reed mutex_enter(&d->bd_lock); 717b7ea883bSDarren Reed if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif == 0) { 7180a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7190a0e9771SDarren Reed return (EINTR); 7200a0e9771SDarren Reed } 7210a0e9771SDarren Reed 7220a0e9771SDarren Reed if (uio->uio_resid == 0) { 7230a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7240a0e9771SDarren Reed return (0); 7250a0e9771SDarren Reed } 7260a0e9771SDarren Reed 7270a0e9771SDarren Reed while (d->bd_inuse < 0) { 7280a0e9771SDarren Reed d->bd_waiting++; 7290a0e9771SDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 7300a0e9771SDarren Reed d->bd_waiting--; 7310a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7320a0e9771SDarren Reed return (EINTR); 7330a0e9771SDarren Reed } 7340a0e9771SDarren Reed d->bd_waiting--; 7350a0e9771SDarren Reed } 7360a0e9771SDarren Reed 7370a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7380a0e9771SDarren Reed 739b7ea883bSDarren Reed dlt = d->bd_dlt; 7400a0e9771SDarren Reed mch = d->bd_mcip; 741b7ea883bSDarren Reed MBPF_SDU_GET(&d->bd_mac, d->bd_bif, &mtu); 7420a0e9771SDarren Reed d->bd_inuse++; 7430a0e9771SDarren Reed 7440a0e9771SDarren Reed m = NULL; 7450a0e9771SDarren Reed if (dlt == DLT_IPNET) { 7460a0e9771SDarren Reed error = EIO; 7470a0e9771SDarren Reed goto done; 7480a0e9771SDarren Reed } 7490a0e9771SDarren Reed 7500a0e9771SDarren Reed error = bpf_movein(uio, dlt, mtu, &m); 7510a0e9771SDarren Reed if (error) 7520a0e9771SDarren Reed goto done; 7530a0e9771SDarren Reed 754b7ea883bSDarren Reed DTRACE_PROBE4(bpf__tx, struct bpf_d *, d, int, dlt, 755b7ea883bSDarren Reed uint_t, mtu, mblk_t *, m); 7560a0e9771SDarren Reed 7570a0e9771SDarren Reed if (M_LEN(m) > mtu) { 7580a0e9771SDarren Reed error = EMSGSIZE; 7590a0e9771SDarren Reed goto done; 7600a0e9771SDarren Reed } 7610a0e9771SDarren Reed 762b7ea883bSDarren Reed error = MBPF_TX(&d->bd_mac, mch, m); 7630a0e9771SDarren Reed /* 7640a0e9771SDarren Reed * The "tx" action here is required to consume the mblk_t. 7650a0e9771SDarren Reed */ 7660a0e9771SDarren Reed m = NULL; 7670a0e9771SDarren Reed 7680a0e9771SDarren Reed done: 7690a0e9771SDarren Reed if (error == 0) 7700a0e9771SDarren Reed ks_stats.kp_write_ok.value.ui64++; 7710a0e9771SDarren Reed else 7720a0e9771SDarren Reed ks_stats.kp_write_error.value.ui64++; 7730a0e9771SDarren Reed if (m != NULL) 7740a0e9771SDarren Reed freemsg(m); 7750a0e9771SDarren Reed 7760a0e9771SDarren Reed mutex_enter(&d->bd_lock); 7770a0e9771SDarren Reed d->bd_inuse--; 7780a0e9771SDarren Reed if ((d->bd_inuse == 0) && (d->bd_waiting != 0)) 7790a0e9771SDarren Reed cv_signal(&d->bd_wait); 7800a0e9771SDarren Reed mutex_exit(&d->bd_lock); 7810a0e9771SDarren Reed 7820a0e9771SDarren Reed /* 7830a0e9771SDarren Reed * The driver frees the mbuf. 7840a0e9771SDarren Reed */ 7850a0e9771SDarren Reed return (error); 7860a0e9771SDarren Reed } 7870a0e9771SDarren Reed 7880a0e9771SDarren Reed 7890a0e9771SDarren Reed /* 7900a0e9771SDarren Reed * Reset a descriptor by flushing its packet buffer and clearing the 7910a0e9771SDarren Reed * receive and drop counts. Should be called at splnet. 7920a0e9771SDarren Reed */ 7930a0e9771SDarren Reed static void 7940a0e9771SDarren Reed reset_d(struct bpf_d *d) 7950a0e9771SDarren Reed { 7960a0e9771SDarren Reed if (d->bd_hbuf) { 7970a0e9771SDarren Reed /* Free the hold buffer. */ 7980a0e9771SDarren Reed d->bd_fbuf = d->bd_hbuf; 7990a0e9771SDarren Reed d->bd_hbuf = 0; 8000a0e9771SDarren Reed } 8010a0e9771SDarren Reed d->bd_slen = 0; 8020a0e9771SDarren Reed d->bd_hlen = 0; 8030a0e9771SDarren Reed d->bd_rcount = 0; 8040a0e9771SDarren Reed d->bd_dcount = 0; 8050a0e9771SDarren Reed d->bd_ccount = 0; 8060a0e9771SDarren Reed } 8070a0e9771SDarren Reed 8080a0e9771SDarren Reed /* 8090a0e9771SDarren Reed * FIONREAD Check for read packet available. 8100a0e9771SDarren Reed * BIOCGBLEN Get buffer len [for read()]. 8110a0e9771SDarren Reed * BIOCSETF Set ethernet read filter. 8120a0e9771SDarren Reed * BIOCFLUSH Flush read packet buffer. 8130a0e9771SDarren Reed * BIOCPROMISC Put interface into promiscuous mode. 8140a0e9771SDarren Reed * BIOCGDLT Get link layer type. 8150a0e9771SDarren Reed * BIOCGETIF Get interface name. 8160a0e9771SDarren Reed * BIOCSETIF Set interface. 8170a0e9771SDarren Reed * BIOCSRTIMEOUT Set read timeout. 8180a0e9771SDarren Reed * BIOCGRTIMEOUT Get read timeout. 8190a0e9771SDarren Reed * BIOCGSTATS Get packet stats. 8200a0e9771SDarren Reed * BIOCIMMEDIATE Set immediate mode. 8210a0e9771SDarren Reed * BIOCVERSION Get filter language version. 8220a0e9771SDarren Reed * BIOCGHDRCMPLT Get "header already complete" flag. 8230a0e9771SDarren Reed * BIOCSHDRCMPLT Set "header already complete" flag. 8240a0e9771SDarren Reed */ 8250a0e9771SDarren Reed /* ARGSUSED */ 8260a0e9771SDarren Reed int 8270a0e9771SDarren Reed bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval) 8280a0e9771SDarren Reed { 8290a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 8300a0e9771SDarren Reed struct bpf_program prog; 8310a0e9771SDarren Reed struct lifreq lifreq; 8320a0e9771SDarren Reed struct ifreq ifreq; 8330a0e9771SDarren Reed int error = 0; 8340a0e9771SDarren Reed uint_t size; 8350a0e9771SDarren Reed 8360a0e9771SDarren Reed /* 8370a0e9771SDarren Reed * Refresh the PID associated with this bpf file. 8380a0e9771SDarren Reed */ 8390a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8400a0e9771SDarren Reed if (d->bd_state == BPF_WAITING) 8410a0e9771SDarren Reed bpf_clear_timeout(d); 8420a0e9771SDarren Reed d->bd_state = BPF_IDLE; 8430a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8440a0e9771SDarren Reed 8450a0e9771SDarren Reed switch (cmd) { 8460a0e9771SDarren Reed 8470a0e9771SDarren Reed default: 8480a0e9771SDarren Reed error = EINVAL; 8490a0e9771SDarren Reed break; 8500a0e9771SDarren Reed 8510a0e9771SDarren Reed /* 8520a0e9771SDarren Reed * Check for read packet available. 8530a0e9771SDarren Reed */ 8540a0e9771SDarren Reed case FIONREAD: 8550a0e9771SDarren Reed { 8560a0e9771SDarren Reed int n; 8570a0e9771SDarren Reed 8580a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8590a0e9771SDarren Reed n = d->bd_slen; 8600a0e9771SDarren Reed if (d->bd_hbuf) 8610a0e9771SDarren Reed n += d->bd_hlen; 8620a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8630a0e9771SDarren Reed 8640a0e9771SDarren Reed *(int *)addr = n; 8650a0e9771SDarren Reed break; 8660a0e9771SDarren Reed } 8670a0e9771SDarren Reed 8680a0e9771SDarren Reed /* 8690a0e9771SDarren Reed * Get buffer len [for read()]. 8700a0e9771SDarren Reed */ 8710a0e9771SDarren Reed case BIOCGBLEN: 8720a0e9771SDarren Reed error = copyout(&d->bd_bufsize, (void *)addr, 8730a0e9771SDarren Reed sizeof (d->bd_bufsize)); 8740a0e9771SDarren Reed break; 8750a0e9771SDarren Reed 8760a0e9771SDarren Reed /* 8770a0e9771SDarren Reed * Set buffer length. 8780a0e9771SDarren Reed */ 8790a0e9771SDarren Reed case BIOCSBLEN: 8800a0e9771SDarren Reed if (copyin((void *)addr, &size, sizeof (size)) != 0) { 8810a0e9771SDarren Reed error = EFAULT; 8820a0e9771SDarren Reed break; 8830a0e9771SDarren Reed } 8840a0e9771SDarren Reed 8850a0e9771SDarren Reed mutex_enter(&d->bd_lock); 8860a0e9771SDarren Reed if (d->bd_bif != 0) { 8870a0e9771SDarren Reed error = EINVAL; 8880a0e9771SDarren Reed } else { 8890a0e9771SDarren Reed if (size > bpf_maxbufsize) 8900a0e9771SDarren Reed size = bpf_maxbufsize; 8910a0e9771SDarren Reed else if (size < BPF_MINBUFSIZE) 8920a0e9771SDarren Reed size = BPF_MINBUFSIZE; 8930a0e9771SDarren Reed 8940a0e9771SDarren Reed d->bd_bufsize = size; 8950a0e9771SDarren Reed } 8960a0e9771SDarren Reed mutex_exit(&d->bd_lock); 8970a0e9771SDarren Reed 8980a0e9771SDarren Reed if (error == 0) 8990a0e9771SDarren Reed error = copyout(&size, (void *)addr, sizeof (size)); 9000a0e9771SDarren Reed break; 9010a0e9771SDarren Reed 9020a0e9771SDarren Reed /* 9030a0e9771SDarren Reed * Set link layer read filter. 9040a0e9771SDarren Reed */ 9050a0e9771SDarren Reed case BIOCSETF: 9060a0e9771SDarren Reed if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) { 9070a0e9771SDarren Reed error = EFAULT; 9080a0e9771SDarren Reed break; 9090a0e9771SDarren Reed } 9100a0e9771SDarren Reed error = bpf_setf(d, &prog); 9110a0e9771SDarren Reed break; 9120a0e9771SDarren Reed 9130a0e9771SDarren Reed /* 9140a0e9771SDarren Reed * Flush read packet buffer. 9150a0e9771SDarren Reed */ 9160a0e9771SDarren Reed case BIOCFLUSH: 9170a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9180a0e9771SDarren Reed reset_d(d); 9190a0e9771SDarren Reed mutex_exit(&d->bd_lock); 9200a0e9771SDarren Reed break; 9210a0e9771SDarren Reed 9220a0e9771SDarren Reed /* 9230a0e9771SDarren Reed * Put interface into promiscuous mode. 9240a0e9771SDarren Reed * This is a one-way ioctl, it is not used to turn promiscuous 9250a0e9771SDarren Reed * mode off. 9260a0e9771SDarren Reed */ 9270a0e9771SDarren Reed case BIOCPROMISC: 9280a0e9771SDarren Reed if (d->bd_bif == 0) { 9290a0e9771SDarren Reed /* 9300a0e9771SDarren Reed * No interface attached yet. 9310a0e9771SDarren Reed */ 9320a0e9771SDarren Reed error = EINVAL; 9330a0e9771SDarren Reed break; 9340a0e9771SDarren Reed } 9350a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9360a0e9771SDarren Reed if (d->bd_promisc == 0) { 9370a0e9771SDarren Reed 9380a0e9771SDarren Reed if (d->bd_promisc_handle) { 9390a0e9771SDarren Reed uintptr_t mph; 9400a0e9771SDarren Reed 9410a0e9771SDarren Reed mph = d->bd_promisc_handle; 9420a0e9771SDarren Reed d->bd_promisc_handle = 0; 9430a0e9771SDarren Reed 9440a0e9771SDarren Reed mutex_exit(&d->bd_lock); 945b7ea883bSDarren Reed MBPF_PROMISC_REMOVE(&d->bd_mac, mph); 9460a0e9771SDarren Reed mutex_enter(&d->bd_lock); 9470a0e9771SDarren Reed } 9480a0e9771SDarren Reed 9490a0e9771SDarren Reed d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY; 950b7ea883bSDarren Reed error = MBPF_PROMISC_ADD(&d->bd_mac, 9510a0e9771SDarren Reed d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d, 9520a0e9771SDarren Reed &d->bd_promisc_handle, d->bd_promisc_flags); 9530a0e9771SDarren Reed if (error == 0) 9540a0e9771SDarren Reed d->bd_promisc = 1; 9550a0e9771SDarren Reed } 9560a0e9771SDarren Reed mutex_exit(&d->bd_lock); 9570a0e9771SDarren Reed break; 9580a0e9771SDarren Reed 9590a0e9771SDarren Reed /* 9600a0e9771SDarren Reed * Get device parameters. 9610a0e9771SDarren Reed */ 9620a0e9771SDarren Reed case BIOCGDLT: 9630a0e9771SDarren Reed if (d->bd_bif == 0) 9640a0e9771SDarren Reed error = EINVAL; 9650a0e9771SDarren Reed else 966b7ea883bSDarren Reed error = copyout(&d->bd_dlt, (void *)addr, 967b7ea883bSDarren Reed sizeof (d->bd_dlt)); 9680a0e9771SDarren Reed break; 9690a0e9771SDarren Reed 9700a0e9771SDarren Reed /* 9710a0e9771SDarren Reed * Get a list of supported device parameters. 9720a0e9771SDarren Reed */ 9730a0e9771SDarren Reed case BIOCGDLTLIST: 9740a0e9771SDarren Reed if (d->bd_bif == 0) { 9750a0e9771SDarren Reed error = EINVAL; 9760a0e9771SDarren Reed } else { 9770a0e9771SDarren Reed struct bpf_dltlist list; 9780a0e9771SDarren Reed 9790a0e9771SDarren Reed if (copyin((void *)addr, &list, sizeof (list)) != 0) { 9800a0e9771SDarren Reed error = EFAULT; 9810a0e9771SDarren Reed break; 9820a0e9771SDarren Reed } 9830a0e9771SDarren Reed error = bpf_getdltlist(d, &list); 9840a0e9771SDarren Reed if ((error == 0) && 9850a0e9771SDarren Reed copyout(&list, (void *)addr, sizeof (list)) != 0) 9860a0e9771SDarren Reed error = EFAULT; 9870a0e9771SDarren Reed } 9880a0e9771SDarren Reed break; 9890a0e9771SDarren Reed 9900a0e9771SDarren Reed /* 9910a0e9771SDarren Reed * Set device parameters. 9920a0e9771SDarren Reed */ 9930a0e9771SDarren Reed case BIOCSDLT: 9940a0e9771SDarren Reed error = bpf_setdlt(d, (void *)addr); 9950a0e9771SDarren Reed break; 9960a0e9771SDarren Reed 9970a0e9771SDarren Reed /* 9980a0e9771SDarren Reed * Get interface name. 9990a0e9771SDarren Reed */ 10000a0e9771SDarren Reed case BIOCGETIF: 10010a0e9771SDarren Reed if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 10020a0e9771SDarren Reed error = EFAULT; 10030a0e9771SDarren Reed break; 10040a0e9771SDarren Reed } 10050a0e9771SDarren Reed error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 10060a0e9771SDarren Reed if ((error == 0) && 10070a0e9771SDarren Reed copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) { 10080a0e9771SDarren Reed error = EFAULT; 10090a0e9771SDarren Reed break; 10100a0e9771SDarren Reed } 10110a0e9771SDarren Reed break; 10120a0e9771SDarren Reed 10130a0e9771SDarren Reed /* 10140a0e9771SDarren Reed * Set interface. 10150a0e9771SDarren Reed */ 10160a0e9771SDarren Reed case BIOCSETIF: 10170a0e9771SDarren Reed if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) { 10180a0e9771SDarren Reed error = EFAULT; 10190a0e9771SDarren Reed break; 10200a0e9771SDarren Reed } 10210a0e9771SDarren Reed error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name)); 10220a0e9771SDarren Reed break; 10230a0e9771SDarren Reed 10240a0e9771SDarren Reed /* 10250a0e9771SDarren Reed * Get interface name. 10260a0e9771SDarren Reed */ 10270a0e9771SDarren Reed case BIOCGETLIF: 10280a0e9771SDarren Reed if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 10290a0e9771SDarren Reed error = EFAULT; 10300a0e9771SDarren Reed break; 10310a0e9771SDarren Reed } 10320a0e9771SDarren Reed error = bpf_ifname(d, lifreq.lifr_name, 10330a0e9771SDarren Reed sizeof (lifreq.lifr_name)); 10340a0e9771SDarren Reed if ((error == 0) && 10350a0e9771SDarren Reed copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) { 10360a0e9771SDarren Reed error = EFAULT; 10370a0e9771SDarren Reed break; 10380a0e9771SDarren Reed } 10390a0e9771SDarren Reed break; 10400a0e9771SDarren Reed 10410a0e9771SDarren Reed /* 10420a0e9771SDarren Reed * Set interface. 10430a0e9771SDarren Reed */ 10440a0e9771SDarren Reed case BIOCSETLIF: 10450a0e9771SDarren Reed if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) { 10460a0e9771SDarren Reed error = EFAULT; 10470a0e9771SDarren Reed break; 10480a0e9771SDarren Reed } 10490a0e9771SDarren Reed error = bpf_setif(d, lifreq.lifr_name, 10500a0e9771SDarren Reed sizeof (lifreq.lifr_name)); 10510a0e9771SDarren Reed break; 10520a0e9771SDarren Reed 10530a0e9771SDarren Reed #ifdef _SYSCALL32_IMPL 10540a0e9771SDarren Reed /* 10550a0e9771SDarren Reed * Set read timeout. 10560a0e9771SDarren Reed */ 10570a0e9771SDarren Reed case BIOCSRTIMEOUT32: 10580a0e9771SDarren Reed { 10590a0e9771SDarren Reed struct timeval32 tv; 10600a0e9771SDarren Reed 10610a0e9771SDarren Reed if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 10620a0e9771SDarren Reed error = EFAULT; 10630a0e9771SDarren Reed break; 10640a0e9771SDarren Reed } 10650a0e9771SDarren Reed 10660a0e9771SDarren Reed /* Convert the timeout in microseconds to ticks */ 10670a0e9771SDarren Reed d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 10680a0e9771SDarren Reed tv.tv_usec); 10690a0e9771SDarren Reed if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 10700a0e9771SDarren Reed d->bd_rtout = 1; 10710a0e9771SDarren Reed break; 10720a0e9771SDarren Reed } 10730a0e9771SDarren Reed 10740a0e9771SDarren Reed /* 10750a0e9771SDarren Reed * Get read timeout. 10760a0e9771SDarren Reed */ 10770a0e9771SDarren Reed case BIOCGRTIMEOUT32: 10780a0e9771SDarren Reed { 10790a0e9771SDarren Reed struct timeval32 tv; 10800a0e9771SDarren Reed clock_t ticks; 10810a0e9771SDarren Reed 10820a0e9771SDarren Reed ticks = drv_hztousec(d->bd_rtout); 10830a0e9771SDarren Reed tv.tv_sec = ticks / 1000000; 10840a0e9771SDarren Reed tv.tv_usec = ticks - (tv.tv_sec * 1000000); 10850a0e9771SDarren Reed error = copyout(&tv, (void *)addr, sizeof (tv)); 10860a0e9771SDarren Reed break; 10870a0e9771SDarren Reed } 10880a0e9771SDarren Reed 10890a0e9771SDarren Reed /* 10900a0e9771SDarren Reed * Get a list of supported device parameters. 10910a0e9771SDarren Reed */ 10920a0e9771SDarren Reed case BIOCGDLTLIST32: 10930a0e9771SDarren Reed if (d->bd_bif == 0) { 10940a0e9771SDarren Reed error = EINVAL; 10950a0e9771SDarren Reed } else { 10960a0e9771SDarren Reed struct bpf_dltlist32 lst32; 10970a0e9771SDarren Reed struct bpf_dltlist list; 10980a0e9771SDarren Reed 10990a0e9771SDarren Reed if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) { 11000a0e9771SDarren Reed error = EFAULT; 11010a0e9771SDarren Reed break; 11020a0e9771SDarren Reed } 11030a0e9771SDarren Reed 11040a0e9771SDarren Reed list.bfl_len = lst32.bfl_len; 11050a0e9771SDarren Reed list.bfl_list = (void *)(uint64_t)lst32.bfl_list; 11060a0e9771SDarren Reed error = bpf_getdltlist(d, &list); 11070a0e9771SDarren Reed if (error == 0) { 11080a0e9771SDarren Reed lst32.bfl_len = list.bfl_len; 11090a0e9771SDarren Reed 11100a0e9771SDarren Reed if (copyout(&lst32, (void *)addr, 11110a0e9771SDarren Reed sizeof (lst32)) != 0) 11120a0e9771SDarren Reed error = EFAULT; 11130a0e9771SDarren Reed } 11140a0e9771SDarren Reed } 11150a0e9771SDarren Reed break; 11160a0e9771SDarren Reed 11170a0e9771SDarren Reed /* 11180a0e9771SDarren Reed * Set link layer read filter. 11190a0e9771SDarren Reed */ 11200a0e9771SDarren Reed case BIOCSETF32: { 11210a0e9771SDarren Reed struct bpf_program32 prog32; 11220a0e9771SDarren Reed 11230a0e9771SDarren Reed if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) { 11240a0e9771SDarren Reed error = EFAULT; 11250a0e9771SDarren Reed break; 11260a0e9771SDarren Reed } 11270a0e9771SDarren Reed prog.bf_len = prog32.bf_len; 11280a0e9771SDarren Reed prog.bf_insns = (void *)(uint64_t)prog32.bf_insns; 11290a0e9771SDarren Reed error = bpf_setf(d, &prog); 11300a0e9771SDarren Reed break; 11310a0e9771SDarren Reed } 11320a0e9771SDarren Reed #endif 11330a0e9771SDarren Reed 11340a0e9771SDarren Reed /* 11350a0e9771SDarren Reed * Set read timeout. 11360a0e9771SDarren Reed */ 11370a0e9771SDarren Reed case BIOCSRTIMEOUT: 11380a0e9771SDarren Reed { 11390a0e9771SDarren Reed struct timeval tv; 11400a0e9771SDarren Reed 11410a0e9771SDarren Reed if (copyin((void *)addr, &tv, sizeof (tv)) != 0) { 11420a0e9771SDarren Reed error = EFAULT; 11430a0e9771SDarren Reed break; 11440a0e9771SDarren Reed } 11450a0e9771SDarren Reed 11460a0e9771SDarren Reed /* Convert the timeout in microseconds to ticks */ 11470a0e9771SDarren Reed d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 + 11480a0e9771SDarren Reed tv.tv_usec); 11490a0e9771SDarren Reed if ((d->bd_rtout == 0) && (tv.tv_usec != 0)) 11500a0e9771SDarren Reed d->bd_rtout = 1; 11510a0e9771SDarren Reed break; 11520a0e9771SDarren Reed } 11530a0e9771SDarren Reed 11540a0e9771SDarren Reed /* 11550a0e9771SDarren Reed * Get read timeout. 11560a0e9771SDarren Reed */ 11570a0e9771SDarren Reed case BIOCGRTIMEOUT: 11580a0e9771SDarren Reed { 11590a0e9771SDarren Reed struct timeval tv; 11600a0e9771SDarren Reed clock_t ticks; 11610a0e9771SDarren Reed 11620a0e9771SDarren Reed ticks = drv_hztousec(d->bd_rtout); 11630a0e9771SDarren Reed tv.tv_sec = ticks / 1000000; 11640a0e9771SDarren Reed tv.tv_usec = ticks - (tv.tv_sec * 1000000); 11650a0e9771SDarren Reed if (copyout(&tv, (void *)addr, sizeof (tv)) != 0) 11660a0e9771SDarren Reed error = EFAULT; 11670a0e9771SDarren Reed break; 11680a0e9771SDarren Reed } 11690a0e9771SDarren Reed 11700a0e9771SDarren Reed /* 11710a0e9771SDarren Reed * Get packet stats. 11720a0e9771SDarren Reed */ 11730a0e9771SDarren Reed case BIOCGSTATS: 11740a0e9771SDarren Reed { 11750a0e9771SDarren Reed struct bpf_stat bs; 11760a0e9771SDarren Reed 11770a0e9771SDarren Reed bs.bs_recv = d->bd_rcount; 11780a0e9771SDarren Reed bs.bs_drop = d->bd_dcount; 11790a0e9771SDarren Reed bs.bs_capt = d->bd_ccount; 11800a0e9771SDarren Reed if (copyout(&bs, (void *)addr, sizeof (bs)) != 0) 11810a0e9771SDarren Reed error = EFAULT; 11820a0e9771SDarren Reed break; 11830a0e9771SDarren Reed } 11840a0e9771SDarren Reed 11850a0e9771SDarren Reed /* 11860a0e9771SDarren Reed * Set immediate mode. 11870a0e9771SDarren Reed */ 11880a0e9771SDarren Reed case BIOCIMMEDIATE: 11890a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_immediate, 11900a0e9771SDarren Reed sizeof (d->bd_immediate)) != 0) 11910a0e9771SDarren Reed error = EFAULT; 11920a0e9771SDarren Reed break; 11930a0e9771SDarren Reed 11940a0e9771SDarren Reed case BIOCVERSION: 11950a0e9771SDarren Reed { 11960a0e9771SDarren Reed struct bpf_version bv; 11970a0e9771SDarren Reed 11980a0e9771SDarren Reed bv.bv_major = BPF_MAJOR_VERSION; 11990a0e9771SDarren Reed bv.bv_minor = BPF_MINOR_VERSION; 12000a0e9771SDarren Reed if (copyout(&bv, (void *)addr, sizeof (bv)) != 0) 12010a0e9771SDarren Reed error = EFAULT; 12020a0e9771SDarren Reed break; 12030a0e9771SDarren Reed } 12040a0e9771SDarren Reed 12050a0e9771SDarren Reed case BIOCGHDRCMPLT: /* get "header already complete" flag */ 12060a0e9771SDarren Reed if (copyout(&d->bd_hdrcmplt, (void *)addr, 12070a0e9771SDarren Reed sizeof (d->bd_hdrcmplt)) != 0) 12080a0e9771SDarren Reed error = EFAULT; 12090a0e9771SDarren Reed break; 12100a0e9771SDarren Reed 12110a0e9771SDarren Reed case BIOCSHDRCMPLT: /* set "header already complete" flag */ 12120a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_hdrcmplt, 12130a0e9771SDarren Reed sizeof (d->bd_hdrcmplt)) != 0) 12140a0e9771SDarren Reed error = EFAULT; 12150a0e9771SDarren Reed break; 12160a0e9771SDarren Reed 12170a0e9771SDarren Reed /* 12180a0e9771SDarren Reed * Get "see sent packets" flag 12190a0e9771SDarren Reed */ 12200a0e9771SDarren Reed case BIOCGSEESENT: 12210a0e9771SDarren Reed if (copyout(&d->bd_seesent, (void *)addr, 12220a0e9771SDarren Reed sizeof (d->bd_seesent)) != 0) 12230a0e9771SDarren Reed error = EFAULT; 12240a0e9771SDarren Reed break; 12250a0e9771SDarren Reed 12260a0e9771SDarren Reed /* 12270a0e9771SDarren Reed * Set "see sent" packets flag 12280a0e9771SDarren Reed */ 12290a0e9771SDarren Reed case BIOCSSEESENT: 12300a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_seesent, 12310a0e9771SDarren Reed sizeof (d->bd_seesent)) != 0) 12320a0e9771SDarren Reed error = EFAULT; 12330a0e9771SDarren Reed break; 12340a0e9771SDarren Reed 12350a0e9771SDarren Reed case FIONBIO: /* Non-blocking I/O */ 12360a0e9771SDarren Reed if (copyin((void *)addr, &d->bd_nonblock, 12370a0e9771SDarren Reed sizeof (d->bd_nonblock)) != 0) 12380a0e9771SDarren Reed error = EFAULT; 12390a0e9771SDarren Reed break; 12400a0e9771SDarren Reed } 12410a0e9771SDarren Reed return (error); 12420a0e9771SDarren Reed } 12430a0e9771SDarren Reed 12440a0e9771SDarren Reed /* 12450a0e9771SDarren Reed * Set d's packet filter program to fp. If this file already has a filter, 12460a0e9771SDarren Reed * free it and replace it. If the new filter is "empty" (has a 0 size), then 12470a0e9771SDarren Reed * the result is to just remove and free the existing filter. 12480a0e9771SDarren Reed * Returns EINVAL for bogus requests. 12490a0e9771SDarren Reed */ 12500a0e9771SDarren Reed int 12510a0e9771SDarren Reed bpf_setf(struct bpf_d *d, struct bpf_program *fp) 12520a0e9771SDarren Reed { 12530a0e9771SDarren Reed struct bpf_insn *fcode, *old; 12540a0e9771SDarren Reed uint_t flen, size; 12550a0e9771SDarren Reed size_t oldsize; 12560a0e9771SDarren Reed 12570a0e9771SDarren Reed if (fp->bf_insns == 0) { 12580a0e9771SDarren Reed if (fp->bf_len != 0) 12590a0e9771SDarren Reed return (EINVAL); 12600a0e9771SDarren Reed mutex_enter(&d->bd_lock); 12610a0e9771SDarren Reed old = d->bd_filter; 12620a0e9771SDarren Reed oldsize = d->bd_filter_size; 12630a0e9771SDarren Reed d->bd_filter = 0; 12640a0e9771SDarren Reed d->bd_filter_size = 0; 12650a0e9771SDarren Reed reset_d(d); 12660a0e9771SDarren Reed mutex_exit(&d->bd_lock); 12670a0e9771SDarren Reed if (old != 0) 12680a0e9771SDarren Reed kmem_free(old, oldsize); 12690a0e9771SDarren Reed return (0); 12700a0e9771SDarren Reed } 12710a0e9771SDarren Reed flen = fp->bf_len; 12720a0e9771SDarren Reed if (flen > BPF_MAXINSNS) 12730a0e9771SDarren Reed return (EINVAL); 12740a0e9771SDarren Reed 12750a0e9771SDarren Reed size = flen * sizeof (*fp->bf_insns); 12760a0e9771SDarren Reed fcode = kmem_alloc(size, KM_SLEEP); 12770a0e9771SDarren Reed if (copyin(fp->bf_insns, fcode, size) != 0) 12780a0e9771SDarren Reed return (EFAULT); 12790a0e9771SDarren Reed 12800a0e9771SDarren Reed if (bpf_validate(fcode, (int)flen)) { 12810a0e9771SDarren Reed mutex_enter(&d->bd_lock); 12820a0e9771SDarren Reed old = d->bd_filter; 12830a0e9771SDarren Reed oldsize = d->bd_filter_size; 12840a0e9771SDarren Reed d->bd_filter = fcode; 12850a0e9771SDarren Reed d->bd_filter_size = size; 12860a0e9771SDarren Reed reset_d(d); 12870a0e9771SDarren Reed mutex_exit(&d->bd_lock); 12880a0e9771SDarren Reed if (old != 0) 12890a0e9771SDarren Reed kmem_free(old, oldsize); 12900a0e9771SDarren Reed 12910a0e9771SDarren Reed return (0); 12920a0e9771SDarren Reed } 12930a0e9771SDarren Reed kmem_free(fcode, size); 12940a0e9771SDarren Reed return (EINVAL); 12950a0e9771SDarren Reed } 12960a0e9771SDarren Reed 12970a0e9771SDarren Reed /* 12980a0e9771SDarren Reed * Detach a file from its current interface (if attached at all) and attach 1299b7ea883bSDarren Reed * to the interface indicated by the name stored in ifname. 13000a0e9771SDarren Reed * Return an errno or 0. 13010a0e9771SDarren Reed */ 13020a0e9771SDarren Reed static int 13030a0e9771SDarren Reed bpf_setif(struct bpf_d *d, char *ifname, int namesize) 13040a0e9771SDarren Reed { 13050a0e9771SDarren Reed int unit_seen; 1306b7ea883bSDarren Reed int error = 0; 13070a0e9771SDarren Reed char *cp; 13080a0e9771SDarren Reed int i; 13090a0e9771SDarren Reed 13100a0e9771SDarren Reed /* 13110a0e9771SDarren Reed * Make sure the provided name has a unit number, and default 13120a0e9771SDarren Reed * it to '0' if not specified. 13130a0e9771SDarren Reed * XXX This is ugly ... do this differently? 13140a0e9771SDarren Reed */ 13150a0e9771SDarren Reed unit_seen = 0; 13160a0e9771SDarren Reed cp = ifname; 13170a0e9771SDarren Reed cp[namesize - 1] = '\0'; /* sanity */ 13180a0e9771SDarren Reed while (*cp++) 13190a0e9771SDarren Reed if (*cp >= '0' && *cp <= '9') 13200a0e9771SDarren Reed unit_seen = 1; 13210a0e9771SDarren Reed if (!unit_seen) { 13220a0e9771SDarren Reed /* Make sure to leave room for the '\0'. */ 13230a0e9771SDarren Reed for (i = 0; i < (namesize - 1); ++i) { 13240a0e9771SDarren Reed if ((ifname[i] >= 'a' && ifname[i] <= 'z') || 13250a0e9771SDarren Reed (ifname[i] >= 'A' && ifname[i] <= 'Z')) 13260a0e9771SDarren Reed continue; 13270a0e9771SDarren Reed ifname[i] = '0'; 13280a0e9771SDarren Reed } 13290a0e9771SDarren Reed } 13300a0e9771SDarren Reed 13310a0e9771SDarren Reed /* 13320a0e9771SDarren Reed * Make sure that only one call to this function happens at a time 13330a0e9771SDarren Reed * and that we're not interleaving a read/write 13340a0e9771SDarren Reed */ 13350a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13360a0e9771SDarren Reed while (d->bd_inuse != 0) { 13370a0e9771SDarren Reed d->bd_waiting++; 13380a0e9771SDarren Reed if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) { 13390a0e9771SDarren Reed d->bd_waiting--; 13400a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13410a0e9771SDarren Reed return (EINTR); 13420a0e9771SDarren Reed } 13430a0e9771SDarren Reed d->bd_waiting--; 13440a0e9771SDarren Reed } 13450a0e9771SDarren Reed d->bd_inuse = -1; 13460a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13470a0e9771SDarren Reed 13480a0e9771SDarren Reed if (d->bd_sbuf == 0) 13490a0e9771SDarren Reed error = bpf_allocbufs(d); 13500a0e9771SDarren Reed 1351b7ea883bSDarren Reed if (error == 0) { 13520a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13530a0e9771SDarren Reed if (d->bd_bif) 13540a0e9771SDarren Reed /* 13550a0e9771SDarren Reed * Detach if attached to something else. 13560a0e9771SDarren Reed */ 13570a0e9771SDarren Reed bpf_detachd(d); 13580a0e9771SDarren Reed 1359b7ea883bSDarren Reed error = bpf_attachd(d, ifname, -1); 13600a0e9771SDarren Reed reset_d(d); 13610a0e9771SDarren Reed d->bd_inuse = 0; 13620a0e9771SDarren Reed if (d->bd_waiting != 0) 13630a0e9771SDarren Reed cv_signal(&d->bd_wait); 13640a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13650a0e9771SDarren Reed return (error); 13660a0e9771SDarren Reed } 13670a0e9771SDarren Reed 13680a0e9771SDarren Reed mutex_enter(&d->bd_lock); 13690a0e9771SDarren Reed d->bd_inuse = 0; 13700a0e9771SDarren Reed if (d->bd_waiting != 0) 13710a0e9771SDarren Reed cv_signal(&d->bd_wait); 13720a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13730a0e9771SDarren Reed 13740a0e9771SDarren Reed /* 13750a0e9771SDarren Reed * Try tickle the mac layer into attaching the device... 13760a0e9771SDarren Reed */ 13770a0e9771SDarren Reed return (bpf_provider_tickle(ifname, d->bd_zone)); 13780a0e9771SDarren Reed } 13790a0e9771SDarren Reed 13800a0e9771SDarren Reed /* 13810a0e9771SDarren Reed * Copy the interface name to the ifreq. 13820a0e9771SDarren Reed */ 13830a0e9771SDarren Reed static int 13840a0e9771SDarren Reed bpf_ifname(struct bpf_d *d, char *buffer, int bufsize) 13850a0e9771SDarren Reed { 13860a0e9771SDarren Reed 13870a0e9771SDarren Reed mutex_enter(&d->bd_lock); 1388b7ea883bSDarren Reed if (d->bd_bif == NULL) { 13890a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13900a0e9771SDarren Reed return (EINVAL); 13910a0e9771SDarren Reed } 13920a0e9771SDarren Reed 1393b7ea883bSDarren Reed (void) strlcpy(buffer, d->bd_ifname, bufsize); 13940a0e9771SDarren Reed mutex_exit(&d->bd_lock); 13950a0e9771SDarren Reed 13960a0e9771SDarren Reed return (0); 13970a0e9771SDarren Reed } 13980a0e9771SDarren Reed 13990a0e9771SDarren Reed /* 14000a0e9771SDarren Reed * Support for poll() system call 14010a0e9771SDarren Reed * 14020a0e9771SDarren Reed * Return true iff the specific operation will not block indefinitely - with 14030a0e9771SDarren Reed * the assumption that it is safe to positively acknowledge a request for the 14040a0e9771SDarren Reed * ability to write to the BPF device. 14050a0e9771SDarren Reed * Otherwise, return false but make a note that a selnotify() must be done. 14060a0e9771SDarren Reed */ 14070a0e9771SDarren Reed int 14080a0e9771SDarren Reed bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp, 14090a0e9771SDarren Reed struct pollhead **phpp) 14100a0e9771SDarren Reed { 14110a0e9771SDarren Reed struct bpf_d *d = bpf_dev_get(getminor(dev)); 14120a0e9771SDarren Reed 14130a0e9771SDarren Reed if (events & (POLLIN | POLLRDNORM)) { 14140a0e9771SDarren Reed /* 14150a0e9771SDarren Reed * An imitation of the FIONREAD ioctl code. 14160a0e9771SDarren Reed */ 14170a0e9771SDarren Reed mutex_enter(&d->bd_lock); 14180a0e9771SDarren Reed if (d->bd_hlen != 0 || 14190a0e9771SDarren Reed ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) && 14200a0e9771SDarren Reed d->bd_slen != 0)) { 14210a0e9771SDarren Reed *reventsp |= events & (POLLIN | POLLRDNORM); 14220a0e9771SDarren Reed } else { 14230a0e9771SDarren Reed *reventsp = 0; 14240a0e9771SDarren Reed if (!anyyet) 14250a0e9771SDarren Reed *phpp = &d->bd_poll; 14260a0e9771SDarren Reed /* Start the read timeout if necessary */ 14270a0e9771SDarren Reed if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) { 14280a0e9771SDarren Reed bpf_clear_timeout(d); 14290a0e9771SDarren Reed /* 14300a0e9771SDarren Reed * Only allow the timeout to be set once. 14310a0e9771SDarren Reed */ 14320a0e9771SDarren Reed if (d->bd_callout == 0) 14330a0e9771SDarren Reed d->bd_callout = timeout(bpf_timed_out, 14340a0e9771SDarren Reed d, d->bd_rtout); 14350a0e9771SDarren Reed d->bd_state = BPF_WAITING; 14360a0e9771SDarren Reed } 14370a0e9771SDarren Reed } 14380a0e9771SDarren Reed mutex_exit(&d->bd_lock); 14390a0e9771SDarren Reed } 14400a0e9771SDarren Reed 14410a0e9771SDarren Reed return (0); 14420a0e9771SDarren Reed } 14430a0e9771SDarren Reed 14440a0e9771SDarren Reed /* 14450a0e9771SDarren Reed * Copy data from an mblk_t chain into a buffer. This works for ipnet 14460a0e9771SDarren Reed * because the dl_ipnetinfo_t is placed in an mblk_t that leads the 14470a0e9771SDarren Reed * packet itself. 14480a0e9771SDarren Reed */ 14490a0e9771SDarren Reed static void * 14500a0e9771SDarren Reed bpf_mcpy(void *dst_arg, const void *src_arg, size_t len) 14510a0e9771SDarren Reed { 14520a0e9771SDarren Reed const mblk_t *m; 14530a0e9771SDarren Reed uint_t count; 14540a0e9771SDarren Reed uchar_t *dst; 14550a0e9771SDarren Reed 14560a0e9771SDarren Reed m = src_arg; 14570a0e9771SDarren Reed dst = dst_arg; 14580a0e9771SDarren Reed while (len > 0) { 14590a0e9771SDarren Reed if (m == NULL) 14600a0e9771SDarren Reed panic("bpf_mcpy"); 14610a0e9771SDarren Reed count = (uint_t)min(M_LEN(m), len); 14620a0e9771SDarren Reed (void) memcpy(dst, mtod(m, const void *), count); 14630a0e9771SDarren Reed m = m->b_cont; 14640a0e9771SDarren Reed dst += count; 14650a0e9771SDarren Reed len -= count; 14660a0e9771SDarren Reed } 14670a0e9771SDarren Reed return (dst_arg); 14680a0e9771SDarren Reed } 14690a0e9771SDarren Reed 14700a0e9771SDarren Reed /* 14710a0e9771SDarren Reed * Dispatch a packet to all the listeners on interface bp. 14720a0e9771SDarren Reed * 14730a0e9771SDarren Reed * marg pointer to the packet, either a data buffer or an mbuf chain 14740a0e9771SDarren Reed * buflen buffer length, if marg is a data buffer 14750a0e9771SDarren Reed * cpfn a function that can copy marg into the listener's buffer 14760a0e9771SDarren Reed * pktlen length of the packet 14770a0e9771SDarren Reed * issent boolean indicating whether the packet was sent or receive 14780a0e9771SDarren Reed */ 14790a0e9771SDarren Reed static inline void 14800a0e9771SDarren Reed bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen, 14810a0e9771SDarren Reed uint_t buflen, boolean_t issent) 14820a0e9771SDarren Reed { 14830a0e9771SDarren Reed struct timeval tv; 14840a0e9771SDarren Reed uint_t slen; 14850a0e9771SDarren Reed 14860a0e9771SDarren Reed if (!d->bd_seesent && issent) 14870a0e9771SDarren Reed return; 14880a0e9771SDarren Reed 14890a0e9771SDarren Reed /* 14900a0e9771SDarren Reed * Accuracy of the packet counters in BPF is vital so it 14910a0e9771SDarren Reed * is important to protect even the outer ones. 14920a0e9771SDarren Reed */ 14930a0e9771SDarren Reed mutex_enter(&d->bd_lock); 14940a0e9771SDarren Reed slen = bpf_filter(d->bd_filter, marg, pktlen, buflen); 14950a0e9771SDarren Reed DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif, 14960a0e9771SDarren Reed struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen); 14970a0e9771SDarren Reed d->bd_rcount++; 14980a0e9771SDarren Reed ks_stats.kp_receive.value.ui64++; 14990a0e9771SDarren Reed if (slen != 0) { 15000a0e9771SDarren Reed uniqtime(&tv); 15010a0e9771SDarren Reed catchpacket(d, marg, pktlen, slen, cpfn, &tv); 15020a0e9771SDarren Reed } 15030a0e9771SDarren Reed mutex_exit(&d->bd_lock); 15040a0e9771SDarren Reed } 15050a0e9771SDarren Reed 15060a0e9771SDarren Reed /* 15070a0e9771SDarren Reed * Incoming linkage from device drivers. 15080a0e9771SDarren Reed */ 15090a0e9771SDarren Reed /* ARGSUSED */ 15100a0e9771SDarren Reed void 15110a0e9771SDarren Reed bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent) 15120a0e9771SDarren Reed { 15130a0e9771SDarren Reed cp_fn_t cpfn; 15140a0e9771SDarren Reed struct bpf_d *d = arg; 15150a0e9771SDarren Reed uint_t pktlen, buflen; 15160a0e9771SDarren Reed void *marg; 15170a0e9771SDarren Reed 15180a0e9771SDarren Reed pktlen = msgdsize(m); 15190a0e9771SDarren Reed 15200a0e9771SDarren Reed if (pktlen == M_LEN(m)) { 15210a0e9771SDarren Reed cpfn = (cp_fn_t)memcpy; 15220a0e9771SDarren Reed marg = mtod(m, void *); 15230a0e9771SDarren Reed buflen = pktlen; 15240a0e9771SDarren Reed } else { 15250a0e9771SDarren Reed cpfn = bpf_mcpy; 15260a0e9771SDarren Reed marg = m; 15270a0e9771SDarren Reed buflen = 0; 15280a0e9771SDarren Reed } 15290a0e9771SDarren Reed 15300a0e9771SDarren Reed bpf_deliver(d, cpfn, marg, pktlen, buflen, issent); 15310a0e9771SDarren Reed } 15320a0e9771SDarren Reed 15330a0e9771SDarren Reed /* 15340a0e9771SDarren Reed * Incoming linkage from ipnet. 15350a0e9771SDarren Reed * In ipnet, there is only one event, NH_OBSERVE, that delivers packets 15360a0e9771SDarren Reed * from all network interfaces. Thus the tap function needs to apply a 15370a0e9771SDarren Reed * filter using the interface index/id to immitate snoop'ing on just the 15380a0e9771SDarren Reed * specified interface. 15390a0e9771SDarren Reed */ 15400a0e9771SDarren Reed /* ARGSUSED */ 15410a0e9771SDarren Reed void 15420a0e9771SDarren Reed bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length) 15430a0e9771SDarren Reed { 15440a0e9771SDarren Reed hook_pkt_observe_t *hdr; 15450a0e9771SDarren Reed struct bpf_d *d = arg; 15460a0e9771SDarren Reed 15470a0e9771SDarren Reed hdr = (hook_pkt_observe_t *)m->b_rptr; 1548b7ea883bSDarren Reed if (ntohl(hdr->hpo_ifindex) != d->bd_linkid) 15490a0e9771SDarren Reed return; 15500a0e9771SDarren Reed bpf_deliver(d, bpf_mcpy, m, length, 0, issent); 15510a0e9771SDarren Reed 15520a0e9771SDarren Reed } 15530a0e9771SDarren Reed 15540a0e9771SDarren Reed /* 15550a0e9771SDarren Reed * Move the packet data from interface memory (pkt) into the 15560a0e9771SDarren Reed * store buffer. Return 1 if it's time to wakeup a listener (buffer full), 15570a0e9771SDarren Reed * otherwise 0. "copy" is the routine called to do the actual data 15580a0e9771SDarren Reed * transfer. memcpy is passed in to copy contiguous chunks, while 15590a0e9771SDarren Reed * bpf_mcpy is passed in to copy mbuf chains. In the latter case, 15600a0e9771SDarren Reed * pkt is really an mbuf. 15610a0e9771SDarren Reed */ 15620a0e9771SDarren Reed static void 15630a0e9771SDarren Reed catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen, 15640a0e9771SDarren Reed cp_fn_t cpfn, struct timeval *tv) 15650a0e9771SDarren Reed { 15660a0e9771SDarren Reed struct bpf_hdr *hp; 15670a0e9771SDarren Reed int totlen, curlen; 1568b7ea883bSDarren Reed int hdrlen = d->bd_hdrlen; 15690a0e9771SDarren Reed int do_wakeup = 0; 15700a0e9771SDarren Reed 15710a0e9771SDarren Reed ++d->bd_ccount; 15720a0e9771SDarren Reed ks_stats.kp_capture.value.ui64++; 15730a0e9771SDarren Reed /* 15740a0e9771SDarren Reed * Figure out how many bytes to move. If the packet is 15750a0e9771SDarren Reed * greater or equal to the snapshot length, transfer that 15760a0e9771SDarren Reed * much. Otherwise, transfer the whole packet (unless 15770a0e9771SDarren Reed * we hit the buffer size limit). 15780a0e9771SDarren Reed */ 15790a0e9771SDarren Reed totlen = hdrlen + min(snaplen, pktlen); 15800a0e9771SDarren Reed if (totlen > d->bd_bufsize) 15810a0e9771SDarren Reed totlen = d->bd_bufsize; 15820a0e9771SDarren Reed 15830a0e9771SDarren Reed /* 15840a0e9771SDarren Reed * Round up the end of the previous packet to the next longword. 15850a0e9771SDarren Reed */ 15860a0e9771SDarren Reed curlen = BPF_WORDALIGN(d->bd_slen); 15870a0e9771SDarren Reed if (curlen + totlen > d->bd_bufsize) { 15880a0e9771SDarren Reed /* 15890a0e9771SDarren Reed * This packet will overflow the storage buffer. 15900a0e9771SDarren Reed * Rotate the buffers if we can, then wakeup any 15910a0e9771SDarren Reed * pending reads. 15920a0e9771SDarren Reed */ 15930a0e9771SDarren Reed if (d->bd_fbuf == 0) { 15940a0e9771SDarren Reed /* 15950a0e9771SDarren Reed * We haven't completed the previous read yet, 15960a0e9771SDarren Reed * so drop the packet. 15970a0e9771SDarren Reed */ 15980a0e9771SDarren Reed ++d->bd_dcount; 15990a0e9771SDarren Reed ks_stats.kp_dropped.value.ui64++; 16000a0e9771SDarren Reed return; 16010a0e9771SDarren Reed } 16020a0e9771SDarren Reed ROTATE_BUFFERS(d); 16030a0e9771SDarren Reed do_wakeup = 1; 16040a0e9771SDarren Reed curlen = 0; 16050a0e9771SDarren Reed } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) { 16060a0e9771SDarren Reed /* 16070a0e9771SDarren Reed * Immediate mode is set, or the read timeout has 16080a0e9771SDarren Reed * already expired during a select call. A packet 16090a0e9771SDarren Reed * arrived, so the reader should be woken up. 16100a0e9771SDarren Reed */ 16110a0e9771SDarren Reed do_wakeup = 1; 16120a0e9771SDarren Reed } 16130a0e9771SDarren Reed 16140a0e9771SDarren Reed /* 16150a0e9771SDarren Reed * Append the bpf header to the existing buffer before we add 16160a0e9771SDarren Reed * on the actual packet data. 16170a0e9771SDarren Reed */ 16180a0e9771SDarren Reed hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen); 16190a0e9771SDarren Reed hp->bh_tstamp.tv_sec = tv->tv_sec; 16200a0e9771SDarren Reed hp->bh_tstamp.tv_usec = tv->tv_usec; 16210a0e9771SDarren Reed hp->bh_datalen = pktlen; 16220a0e9771SDarren Reed hp->bh_hdrlen = (uint16_t)hdrlen; 16230a0e9771SDarren Reed /* 16240a0e9771SDarren Reed * Copy the packet data into the store buffer and update its length. 16250a0e9771SDarren Reed */ 16260a0e9771SDarren Reed (*cpfn)((uchar_t *)hp + hdrlen, pkt, 16270a0e9771SDarren Reed (hp->bh_caplen = totlen - hdrlen)); 16280a0e9771SDarren Reed d->bd_slen = curlen + totlen; 16290a0e9771SDarren Reed 16300a0e9771SDarren Reed /* 16310a0e9771SDarren Reed * Call bpf_wakeup after bd_slen has been updated. 16320a0e9771SDarren Reed */ 16330a0e9771SDarren Reed if (do_wakeup) 16340a0e9771SDarren Reed bpf_wakeup(d); 16350a0e9771SDarren Reed } 16360a0e9771SDarren Reed 16370a0e9771SDarren Reed /* 16380a0e9771SDarren Reed * Initialize all nonzero fields of a descriptor. 16390a0e9771SDarren Reed */ 16400a0e9771SDarren Reed static int 16410a0e9771SDarren Reed bpf_allocbufs(struct bpf_d *d) 16420a0e9771SDarren Reed { 16430a0e9771SDarren Reed 16440a0e9771SDarren Reed d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 16450a0e9771SDarren Reed if (!d->bd_fbuf) 16460a0e9771SDarren Reed return (ENOBUFS); 16470a0e9771SDarren Reed d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP); 16480a0e9771SDarren Reed if (!d->bd_sbuf) { 16490a0e9771SDarren Reed kmem_free(d->bd_fbuf, d->bd_bufsize); 16500a0e9771SDarren Reed return (ENOBUFS); 16510a0e9771SDarren Reed } 16520a0e9771SDarren Reed d->bd_slen = 0; 16530a0e9771SDarren Reed d->bd_hlen = 0; 16540a0e9771SDarren Reed return (0); 16550a0e9771SDarren Reed } 16560a0e9771SDarren Reed 16570a0e9771SDarren Reed /* 16580a0e9771SDarren Reed * Free buffers currently in use by a descriptor. 16590a0e9771SDarren Reed * Called on close. 16600a0e9771SDarren Reed */ 16610a0e9771SDarren Reed static void 16620a0e9771SDarren Reed bpf_freed(struct bpf_d *d) 16630a0e9771SDarren Reed { 16640a0e9771SDarren Reed /* 16650a0e9771SDarren Reed * At this point the descriptor has been detached from its 16660a0e9771SDarren Reed * interface and it yet hasn't been marked free. 16670a0e9771SDarren Reed */ 16680a0e9771SDarren Reed if (d->bd_sbuf != 0) { 16690a0e9771SDarren Reed kmem_free(d->bd_sbuf, d->bd_bufsize); 16700a0e9771SDarren Reed if (d->bd_hbuf != 0) 16710a0e9771SDarren Reed kmem_free(d->bd_hbuf, d->bd_bufsize); 16720a0e9771SDarren Reed if (d->bd_fbuf != 0) 16730a0e9771SDarren Reed kmem_free(d->bd_fbuf, d->bd_bufsize); 16740a0e9771SDarren Reed } 16750a0e9771SDarren Reed if (d->bd_filter) 16760a0e9771SDarren Reed kmem_free(d->bd_filter, d->bd_filter_size); 16770a0e9771SDarren Reed } 16780a0e9771SDarren Reed 16790a0e9771SDarren Reed /* 16800a0e9771SDarren Reed * Get a list of available data link type of the interface. 16810a0e9771SDarren Reed */ 16820a0e9771SDarren Reed static int 16830a0e9771SDarren Reed bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp) 16840a0e9771SDarren Reed { 1685b7ea883bSDarren Reed bpf_provider_list_t *bp; 1686b7ea883bSDarren Reed bpf_provider_t *bpr; 1687b7ea883bSDarren Reed zoneid_t zoneid; 1688b7ea883bSDarren Reed uintptr_t mcip; 1689b7ea883bSDarren Reed uint_t nicdlt; 1690b7ea883bSDarren Reed uintptr_t mh; 1691b7ea883bSDarren Reed int error; 1692b7ea883bSDarren Reed int n; 16930a0e9771SDarren Reed 16940a0e9771SDarren Reed n = 0; 1695b7ea883bSDarren Reed mh = 0; 1696b7ea883bSDarren Reed mcip = 0; 16970a0e9771SDarren Reed error = 0; 1698b7ea883bSDarren Reed mutex_enter(&d->bd_lock); 1699b7ea883bSDarren Reed LIST_FOREACH(bp, &bpf_providers, bpl_next) { 1700b7ea883bSDarren Reed bpr = bp->bpl_what; 1701b7ea883bSDarren Reed error = MBPF_OPEN(bpr, d->bd_ifname, &mh, d->bd_zone); 1702b7ea883bSDarren Reed if (error != 0) 1703b7ea883bSDarren Reed goto next; 1704b7ea883bSDarren Reed error = MBPF_CLIENT_OPEN(bpr, mh, &mcip); 1705b7ea883bSDarren Reed if (error != 0) 1706b7ea883bSDarren Reed goto next; 1707b7ea883bSDarren Reed error = MBPF_GET_ZONE(bpr, mh, &zoneid); 1708b7ea883bSDarren Reed if (error != 0) 1709b7ea883bSDarren Reed goto next; 17100a0e9771SDarren Reed if (d->bd_zone != GLOBAL_ZONEID && 1711b7ea883bSDarren Reed d->bd_zone != zoneid) 1712b7ea883bSDarren Reed goto next; 1713b7ea883bSDarren Reed error = MBPF_GET_DLT(bpr, mh, &nicdlt); 1714b7ea883bSDarren Reed if (error != 0) 1715b7ea883bSDarren Reed goto next; 1716b7ea883bSDarren Reed nicdlt = bpf_dl_to_dlt(nicdlt); 17170a0e9771SDarren Reed if (listp->bfl_list != NULL) { 1718b7ea883bSDarren Reed if (n >= listp->bfl_len) { 1719b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 1720b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 1721b7ea883bSDarren Reed break; 1722b7ea883bSDarren Reed } 17230a0e9771SDarren Reed /* 1724b7ea883bSDarren Reed * Bumping of bd_inuse ensures the structure does not 17250a0e9771SDarren Reed * disappear while the copyout runs and allows the for 17260a0e9771SDarren Reed * loop to be continued. 17270a0e9771SDarren Reed */ 1728b7ea883bSDarren Reed d->bd_inuse++; 1729b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1730b7ea883bSDarren Reed if (copyout(&nicdlt, 17310a0e9771SDarren Reed listp->bfl_list + n, sizeof (uint_t)) != 0) 17320a0e9771SDarren Reed error = EFAULT; 1733b7ea883bSDarren Reed mutex_enter(&d->bd_lock); 1734b7ea883bSDarren Reed if (error != 0) 1735b7ea883bSDarren Reed break; 1736b7ea883bSDarren Reed d->bd_inuse--; 17370a0e9771SDarren Reed } 17380a0e9771SDarren Reed n++; 1739b7ea883bSDarren Reed next: 1740b7ea883bSDarren Reed if (mcip != 0) { 1741b7ea883bSDarren Reed MBPF_CLIENT_CLOSE(bpr, mcip); 1742b7ea883bSDarren Reed mcip = 0; 17430a0e9771SDarren Reed } 1744b7ea883bSDarren Reed if (mh != 0) { 1745b7ea883bSDarren Reed MBPF_CLOSE(bpr, mh); 1746b7ea883bSDarren Reed mh = 0; 1747b7ea883bSDarren Reed } 1748b7ea883bSDarren Reed } 1749b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1750b7ea883bSDarren Reed 1751b7ea883bSDarren Reed /* 1752b7ea883bSDarren Reed * It is quite possible that one or more provider to BPF may not 1753b7ea883bSDarren Reed * know about a link name whlist others do. In that case, so long 1754b7ea883bSDarren Reed * as we have one success, do not declare an error unless it was 1755b7ea883bSDarren Reed * an EFAULT as this indicates a problem that needs to be reported. 1756b7ea883bSDarren Reed */ 1757b7ea883bSDarren Reed if ((error != EFAULT) && (n > 0)) 1758b7ea883bSDarren Reed error = 0; 1759b7ea883bSDarren Reed 17600a0e9771SDarren Reed listp->bfl_len = n; 17610a0e9771SDarren Reed return (error); 17620a0e9771SDarren Reed } 17630a0e9771SDarren Reed 17640a0e9771SDarren Reed /* 17650a0e9771SDarren Reed * Set the data link type of a BPF instance. 17660a0e9771SDarren Reed */ 17670a0e9771SDarren Reed static int 17680a0e9771SDarren Reed bpf_setdlt(struct bpf_d *d, void *addr) 17690a0e9771SDarren Reed { 17700a0e9771SDarren Reed char ifname[LIFNAMSIZ+1]; 1771b7ea883bSDarren Reed zoneid_t niczone; 17720a0e9771SDarren Reed int error; 17730a0e9771SDarren Reed int dlt; 17740a0e9771SDarren Reed 17750a0e9771SDarren Reed if (copyin(addr, &dlt, sizeof (dlt)) != 0) 17760a0e9771SDarren Reed return (EFAULT); 1777b7ea883bSDarren Reed 17780a0e9771SDarren Reed mutex_enter(&d->bd_lock); 17790a0e9771SDarren Reed 17800a0e9771SDarren Reed if (d->bd_bif == 0) { /* Interface not set */ 17810a0e9771SDarren Reed mutex_exit(&d->bd_lock); 17820a0e9771SDarren Reed return (EINVAL); 17830a0e9771SDarren Reed } 1784b7ea883bSDarren Reed if (d->bd_dlt == dlt) { /* NULL-op */ 17850a0e9771SDarren Reed mutex_exit(&d->bd_lock); 17860a0e9771SDarren Reed return (0); 17870a0e9771SDarren Reed } 17880a0e9771SDarren Reed 1789b7ea883bSDarren Reed error = MBPF_GET_ZONE(&d->bd_mac, d->bd_bif, &niczone); 1790b7ea883bSDarren Reed if (error != 0) { 1791b7ea883bSDarren Reed mutex_exit(&d->bd_lock); 1792b7ea883bSDarren Reed return (error); 1793b7ea883bSDarren Reed } 1794b7ea883bSDarren Reed 17950a0e9771SDarren Reed /* 17960a0e9771SDarren Reed * See the matrix at the top of the file for the permissions table 17970a0e9771SDarren Reed * enforced by this driver. 17980a0e9771SDarren Reed */ 17990a0e9771SDarren Reed if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) && 1800b7ea883bSDarren Reed (niczone != d->bd_zone)) { 18010a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18020a0e9771SDarren Reed return (EINVAL); 18030a0e9771SDarren Reed } 18040a0e9771SDarren Reed 1805b7ea883bSDarren Reed (void) strlcpy(ifname, d->bd_ifname, sizeof (ifname)); 1806b7ea883bSDarren Reed d->bd_inuse = -1; 18070a0e9771SDarren Reed bpf_detachd(d); 1808b7ea883bSDarren Reed error = bpf_attachd(d, ifname, dlt); 18090a0e9771SDarren Reed reset_d(d); 1810b7ea883bSDarren Reed d->bd_inuse = 0; 18110a0e9771SDarren Reed 18120a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18130a0e9771SDarren Reed return (error); 18140a0e9771SDarren Reed } 18150a0e9771SDarren Reed 18160a0e9771SDarren Reed /* 18170a0e9771SDarren Reed * bpf_clear_timeout is called with the bd_lock mutex held, providing it 18180a0e9771SDarren Reed * with the necessary protection to retrieve and modify bd_callout but it 18190a0e9771SDarren Reed * does not hold the lock for its entire duration... see below... 18200a0e9771SDarren Reed */ 18210a0e9771SDarren Reed static void 18220a0e9771SDarren Reed bpf_clear_timeout(struct bpf_d *d) 18230a0e9771SDarren Reed { 18240a0e9771SDarren Reed timeout_id_t tid = d->bd_callout; 18250a0e9771SDarren Reed d->bd_callout = 0; 18260a0e9771SDarren Reed d->bd_inuse++; 18270a0e9771SDarren Reed 18280a0e9771SDarren Reed /* 18290a0e9771SDarren Reed * If the timeout has fired and is waiting on bd_lock, we could 18300a0e9771SDarren Reed * deadlock here because untimeout if bd_lock is held and would 18310a0e9771SDarren Reed * wait for bpf_timed_out to finish and it never would. 18320a0e9771SDarren Reed */ 18330a0e9771SDarren Reed if (tid != 0) { 18340a0e9771SDarren Reed mutex_exit(&d->bd_lock); 18350a0e9771SDarren Reed (void) untimeout(tid); 18360a0e9771SDarren Reed mutex_enter(&d->bd_lock); 18370a0e9771SDarren Reed } 18380a0e9771SDarren Reed 18390a0e9771SDarren Reed d->bd_inuse--; 18400a0e9771SDarren Reed } 18410a0e9771SDarren Reed 18420a0e9771SDarren Reed /* 18430a0e9771SDarren Reed * As a cloning device driver, BPF needs to keep track of which device 18440a0e9771SDarren Reed * numbers are in use and which ones are not. A hash table, indexed by 18450a0e9771SDarren Reed * the minor device number, is used to store the pointers to the 18460a0e9771SDarren Reed * individual descriptors that are allocated in bpfopen(). 18470a0e9771SDarren Reed * The functions below present the interface for that hash table to 18480a0e9771SDarren Reed * the rest of the driver. 18490a0e9771SDarren Reed */ 18500a0e9771SDarren Reed static struct bpf_d * 18510a0e9771SDarren Reed bpf_dev_find(minor_t minor) 18520a0e9771SDarren Reed { 18530a0e9771SDarren Reed struct bpf_d *d = NULL; 18540a0e9771SDarren Reed 18550a0e9771SDarren Reed (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 18560a0e9771SDarren Reed (mod_hash_val_t *)&d); 18570a0e9771SDarren Reed 18580a0e9771SDarren Reed return (d); 18590a0e9771SDarren Reed } 18600a0e9771SDarren Reed 18610a0e9771SDarren Reed static void 18620a0e9771SDarren Reed bpf_dev_add(struct bpf_d *d) 18630a0e9771SDarren Reed { 18640a0e9771SDarren Reed (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 18650a0e9771SDarren Reed (mod_hash_val_t)d); 18660a0e9771SDarren Reed } 18670a0e9771SDarren Reed 18680a0e9771SDarren Reed static void 18690a0e9771SDarren Reed bpf_dev_remove(struct bpf_d *d) 18700a0e9771SDarren Reed { 18710a0e9771SDarren Reed struct bpf_d *stor; 18720a0e9771SDarren Reed 18730a0e9771SDarren Reed (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev, 18740a0e9771SDarren Reed (mod_hash_val_t *)&stor); 18750a0e9771SDarren Reed ASSERT(stor == d); 18760a0e9771SDarren Reed } 18770a0e9771SDarren Reed 18780a0e9771SDarren Reed /* 18790a0e9771SDarren Reed * bpf_def_get should only ever be called for a minor number that exists, 18800a0e9771SDarren Reed * thus there should always be a pointer in the hash table that corresponds 18810a0e9771SDarren Reed * to it. 18820a0e9771SDarren Reed */ 18830a0e9771SDarren Reed static struct bpf_d * 18840a0e9771SDarren Reed bpf_dev_get(minor_t minor) 18850a0e9771SDarren Reed { 18860a0e9771SDarren Reed struct bpf_d *d = NULL; 18870a0e9771SDarren Reed 18880a0e9771SDarren Reed (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor, 18890a0e9771SDarren Reed (mod_hash_val_t *)&d); 18900a0e9771SDarren Reed ASSERT(d != NULL); 18910a0e9771SDarren Reed 18920a0e9771SDarren Reed return (d); 18930a0e9771SDarren Reed } 1894