xref: /freebsd/sys/net/bpf.c (revision 984485a02eb3e63b4170dd911b72de38b35b2289)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_bpf.h"
41 #include "opt_netgraph.h"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/conf.h>
47 #include <sys/fcntl.h>
48 #include <sys/jail.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 
65 #include <sys/socket.h>
66 
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #include <net/bpf_buffer.h>
70 #ifdef BPF_JITTER
71 #include <net/bpf_jitter.h>
72 #endif
73 #include <net/bpf_zerocopy.h>
74 #include <net/bpfdesc.h>
75 #include <net/vnet.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <sys/kernel.h>
80 #include <sys/sysctl.h>
81 
82 #include <net80211/ieee80211_freebsd.h>
83 
84 #include <security/mac/mac_framework.h>
85 
86 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87 
88 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
89 
90 #define PRINET  26			/* interruptible */
91 
92 /*
93  * bpf_iflist is a list of BPF interface structures, each corresponding to a
94  * specific DLT.  The same network interface might have several BPF interface
95  * structures registered by different layers in the stack (i.e., 802.11
96  * frames, ethernet frames, etc).
97  */
98 static LIST_HEAD(, bpf_if)	bpf_iflist;
99 static struct mtx	bpf_mtx;		/* bpf global lock */
100 static int		bpf_bpfd_cnt;
101 
102 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103 static void	bpf_detachd(struct bpf_d *);
104 static void	bpf_freed(struct bpf_d *);
105 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106 		    struct sockaddr *, int *, struct bpf_insn *);
107 static int	bpf_setif(struct bpf_d *, struct ifreq *);
108 static void	bpf_timed_out(void *);
109 static __inline void
110 		bpf_wakeup(struct bpf_d *);
111 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
112 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
113 		    struct timeval *);
114 static void	reset_d(struct bpf_d *);
115 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117 static int	bpf_setdlt(struct bpf_d *, u_int);
118 static void	filt_bpfdetach(struct knote *);
119 static int	filt_bpfread(struct knote *, long);
120 static void	bpf_drvinit(void *);
121 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
122 
123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124 int bpf_maxinsns = BPF_MAXINSNS;
125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126     &bpf_maxinsns, 0, "Maximum bpf program instructions");
127 static int bpf_zerocopy_enable = 0;
128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
129     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
131     bpf_stats_sysctl, "bpf statistics portal");
132 
133 static	d_open_t	bpfopen;
134 static	d_read_t	bpfread;
135 static	d_write_t	bpfwrite;
136 static	d_ioctl_t	bpfioctl;
137 static	d_poll_t	bpfpoll;
138 static	d_kqfilter_t	bpfkqfilter;
139 
140 static struct cdevsw bpf_cdevsw = {
141 	.d_version =	D_VERSION,
142 	.d_open =	bpfopen,
143 	.d_read =	bpfread,
144 	.d_write =	bpfwrite,
145 	.d_ioctl =	bpfioctl,
146 	.d_poll =	bpfpoll,
147 	.d_name =	"bpf",
148 	.d_kqfilter =	bpfkqfilter,
149 };
150 
151 static struct filterops bpfread_filtops = {
152 	.f_isfd = 1,
153 	.f_detach = filt_bpfdetach,
154 	.f_event = filt_bpfread,
155 };
156 
157 /*
158  * Wrapper functions for various buffering methods.  If the set of buffer
159  * modes expands, we will probably want to introduce a switch data structure
160  * similar to protosw, et.
161  */
162 static void
163 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
164     u_int len)
165 {
166 
167 	BPFD_LOCK_ASSERT(d);
168 
169 	switch (d->bd_bufmode) {
170 	case BPF_BUFMODE_BUFFER:
171 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
172 
173 	case BPF_BUFMODE_ZBUF:
174 		d->bd_zcopy++;
175 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
176 
177 	default:
178 		panic("bpf_buf_append_bytes");
179 	}
180 }
181 
182 static void
183 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
184     u_int len)
185 {
186 
187 	BPFD_LOCK_ASSERT(d);
188 
189 	switch (d->bd_bufmode) {
190 	case BPF_BUFMODE_BUFFER:
191 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
192 
193 	case BPF_BUFMODE_ZBUF:
194 		d->bd_zcopy++;
195 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
196 
197 	default:
198 		panic("bpf_buf_append_mbuf");
199 	}
200 }
201 
202 /*
203  * This function gets called when the free buffer is re-assigned.
204  */
205 static void
206 bpf_buf_reclaimed(struct bpf_d *d)
207 {
208 
209 	BPFD_LOCK_ASSERT(d);
210 
211 	switch (d->bd_bufmode) {
212 	case BPF_BUFMODE_BUFFER:
213 		return;
214 
215 	case BPF_BUFMODE_ZBUF:
216 		bpf_zerocopy_buf_reclaimed(d);
217 		return;
218 
219 	default:
220 		panic("bpf_buf_reclaimed");
221 	}
222 }
223 
224 /*
225  * If the buffer mechanism has a way to decide that a held buffer can be made
226  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
227  * returned if the buffer can be discarded, (0) is returned if it cannot.
228  */
229 static int
230 bpf_canfreebuf(struct bpf_d *d)
231 {
232 
233 	BPFD_LOCK_ASSERT(d);
234 
235 	switch (d->bd_bufmode) {
236 	case BPF_BUFMODE_ZBUF:
237 		return (bpf_zerocopy_canfreebuf(d));
238 	}
239 	return (0);
240 }
241 
242 /*
243  * Allow the buffer model to indicate that the current store buffer is
244  * immutable, regardless of the appearance of space.  Return (1) if the
245  * buffer is writable, and (0) if not.
246  */
247 static int
248 bpf_canwritebuf(struct bpf_d *d)
249 {
250 
251 	BPFD_LOCK_ASSERT(d);
252 
253 	switch (d->bd_bufmode) {
254 	case BPF_BUFMODE_ZBUF:
255 		return (bpf_zerocopy_canwritebuf(d));
256 	}
257 	return (1);
258 }
259 
260 /*
261  * Notify buffer model that an attempt to write to the store buffer has
262  * resulted in a dropped packet, in which case the buffer may be considered
263  * full.
264  */
265 static void
266 bpf_buffull(struct bpf_d *d)
267 {
268 
269 	BPFD_LOCK_ASSERT(d);
270 
271 	switch (d->bd_bufmode) {
272 	case BPF_BUFMODE_ZBUF:
273 		bpf_zerocopy_buffull(d);
274 		break;
275 	}
276 }
277 
278 /*
279  * Notify the buffer model that a buffer has moved into the hold position.
280  */
281 void
282 bpf_bufheld(struct bpf_d *d)
283 {
284 
285 	BPFD_LOCK_ASSERT(d);
286 
287 	switch (d->bd_bufmode) {
288 	case BPF_BUFMODE_ZBUF:
289 		bpf_zerocopy_bufheld(d);
290 		break;
291 	}
292 }
293 
294 static void
295 bpf_free(struct bpf_d *d)
296 {
297 
298 	switch (d->bd_bufmode) {
299 	case BPF_BUFMODE_BUFFER:
300 		return (bpf_buffer_free(d));
301 
302 	case BPF_BUFMODE_ZBUF:
303 		return (bpf_zerocopy_free(d));
304 
305 	default:
306 		panic("bpf_buf_free");
307 	}
308 }
309 
310 static int
311 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
312 {
313 
314 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
315 		return (EOPNOTSUPP);
316 	return (bpf_buffer_uiomove(d, buf, len, uio));
317 }
318 
319 static int
320 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
321 {
322 
323 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
324 		return (EOPNOTSUPP);
325 	return (bpf_buffer_ioctl_sblen(d, i));
326 }
327 
328 static int
329 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
330 {
331 
332 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
333 		return (EOPNOTSUPP);
334 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
335 }
336 
337 static int
338 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
339 {
340 
341 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
342 		return (EOPNOTSUPP);
343 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
344 }
345 
346 static int
347 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
348 {
349 
350 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
351 		return (EOPNOTSUPP);
352 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
353 }
354 
355 /*
356  * General BPF functions.
357  */
358 static int
359 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
360     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
361 {
362 	const struct ieee80211_bpf_params *p;
363 	struct ether_header *eh;
364 	struct mbuf *m;
365 	int error;
366 	int len;
367 	int hlen;
368 	int slen;
369 
370 	/*
371 	 * Build a sockaddr based on the data link layer type.
372 	 * We do this at this level because the ethernet header
373 	 * is copied directly into the data field of the sockaddr.
374 	 * In the case of SLIP, there is no header and the packet
375 	 * is forwarded as is.
376 	 * Also, we are careful to leave room at the front of the mbuf
377 	 * for the link level header.
378 	 */
379 	switch (linktype) {
380 
381 	case DLT_SLIP:
382 		sockp->sa_family = AF_INET;
383 		hlen = 0;
384 		break;
385 
386 	case DLT_EN10MB:
387 		sockp->sa_family = AF_UNSPEC;
388 		/* XXX Would MAXLINKHDR be better? */
389 		hlen = ETHER_HDR_LEN;
390 		break;
391 
392 	case DLT_FDDI:
393 		sockp->sa_family = AF_IMPLINK;
394 		hlen = 0;
395 		break;
396 
397 	case DLT_RAW:
398 		sockp->sa_family = AF_UNSPEC;
399 		hlen = 0;
400 		break;
401 
402 	case DLT_NULL:
403 		/*
404 		 * null interface types require a 4 byte pseudo header which
405 		 * corresponds to the address family of the packet.
406 		 */
407 		sockp->sa_family = AF_UNSPEC;
408 		hlen = 4;
409 		break;
410 
411 	case DLT_ATM_RFC1483:
412 		/*
413 		 * en atm driver requires 4-byte atm pseudo header.
414 		 * though it isn't standard, vpi:vci needs to be
415 		 * specified anyway.
416 		 */
417 		sockp->sa_family = AF_UNSPEC;
418 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
419 		break;
420 
421 	case DLT_PPP:
422 		sockp->sa_family = AF_UNSPEC;
423 		hlen = 4;	/* This should match PPP_HDRLEN */
424 		break;
425 
426 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
427 		sockp->sa_family = AF_IEEE80211;
428 		hlen = 0;
429 		break;
430 
431 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
432 		sockp->sa_family = AF_IEEE80211;
433 		sockp->sa_len = 12;	/* XXX != 0 */
434 		hlen = sizeof(struct ieee80211_bpf_params);
435 		break;
436 
437 	default:
438 		return (EIO);
439 	}
440 
441 	len = uio->uio_resid;
442 
443 	if (len - hlen > ifp->if_mtu)
444 		return (EMSGSIZE);
445 
446 	if ((unsigned)len > MJUM16BYTES)
447 		return (EIO);
448 
449 	if (len <= MHLEN)
450 		MGETHDR(m, M_WAIT, MT_DATA);
451 	else if (len <= MCLBYTES)
452 		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
453 	else
454 		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
455 #if (MJUMPAGESIZE > MCLBYTES)
456 		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
457 #endif
458 		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
459 	m->m_pkthdr.len = m->m_len = len;
460 	m->m_pkthdr.rcvif = NULL;
461 	*mp = m;
462 
463 	if (m->m_len < hlen) {
464 		error = EPERM;
465 		goto bad;
466 	}
467 
468 	error = uiomove(mtod(m, u_char *), len, uio);
469 	if (error)
470 		goto bad;
471 
472 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
473 	if (slen == 0) {
474 		error = EPERM;
475 		goto bad;
476 	}
477 
478 	/* Check for multicast destination */
479 	switch (linktype) {
480 	case DLT_EN10MB:
481 		eh = mtod(m, struct ether_header *);
482 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
483 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
484 			    ETHER_ADDR_LEN) == 0)
485 				m->m_flags |= M_BCAST;
486 			else
487 				m->m_flags |= M_MCAST;
488 		}
489 		break;
490 	}
491 
492 	/*
493 	 * Make room for link header, and copy it to sockaddr
494 	 */
495 	if (hlen != 0) {
496 		if (sockp->sa_family == AF_IEEE80211) {
497 			/*
498 			 * Collect true length from the parameter header
499 			 * NB: sockp is known to be zero'd so if we do a
500 			 *     short copy unspecified parameters will be
501 			 *     zero.
502 			 * NB: packet may not be aligned after stripping
503 			 *     bpf params
504 			 * XXX check ibp_vers
505 			 */
506 			p = mtod(m, const struct ieee80211_bpf_params *);
507 			hlen = p->ibp_len;
508 			if (hlen > sizeof(sockp->sa_data)) {
509 				error = EINVAL;
510 				goto bad;
511 			}
512 		}
513 		bcopy(m->m_data, sockp->sa_data, hlen);
514 	}
515 	*hdrlen = hlen;
516 
517 	return (0);
518 bad:
519 	m_freem(m);
520 	return (error);
521 }
522 
523 /*
524  * Attach file to the bpf interface, i.e. make d listen on bp.
525  */
526 static void
527 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
528 {
529 	/*
530 	 * Point d at bp, and add d to the interface's list of listeners.
531 	 * Finally, point the driver's bpf cookie at the interface so
532 	 * it will divert packets to bpf.
533 	 */
534 	BPFIF_LOCK(bp);
535 	d->bd_bif = bp;
536 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
537 
538 	bpf_bpfd_cnt++;
539 	BPFIF_UNLOCK(bp);
540 
541 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
542 }
543 
544 /*
545  * Detach a file from its interface.
546  */
547 static void
548 bpf_detachd(struct bpf_d *d)
549 {
550 	int error;
551 	struct bpf_if *bp;
552 	struct ifnet *ifp;
553 
554 	bp = d->bd_bif;
555 	BPFIF_LOCK(bp);
556 	BPFD_LOCK(d);
557 	ifp = d->bd_bif->bif_ifp;
558 
559 	/*
560 	 * Remove d from the interface's descriptor list.
561 	 */
562 	LIST_REMOVE(d, bd_next);
563 
564 	bpf_bpfd_cnt--;
565 	d->bd_bif = NULL;
566 	BPFD_UNLOCK(d);
567 	BPFIF_UNLOCK(bp);
568 
569 	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
570 
571 	/*
572 	 * Check if this descriptor had requested promiscuous mode.
573 	 * If so, turn it off.
574 	 */
575 	if (d->bd_promisc) {
576 		d->bd_promisc = 0;
577 		CURVNET_SET(ifp->if_vnet);
578 		error = ifpromisc(ifp, 0);
579 		CURVNET_RESTORE();
580 		if (error != 0 && error != ENXIO) {
581 			/*
582 			 * ENXIO can happen if a pccard is unplugged
583 			 * Something is really wrong if we were able to put
584 			 * the driver into promiscuous mode, but can't
585 			 * take it out.
586 			 */
587 			if_printf(bp->bif_ifp,
588 				"bpf_detach: ifpromisc failed (%d)\n", error);
589 		}
590 	}
591 }
592 
593 /*
594  * Close the descriptor by detaching it from its interface,
595  * deallocating its buffers, and marking it free.
596  */
597 static void
598 bpf_dtor(void *data)
599 {
600 	struct bpf_d *d = data;
601 
602 	BPFD_LOCK(d);
603 	if (d->bd_state == BPF_WAITING)
604 		callout_stop(&d->bd_callout);
605 	d->bd_state = BPF_IDLE;
606 	BPFD_UNLOCK(d);
607 	funsetown(&d->bd_sigio);
608 	mtx_lock(&bpf_mtx);
609 	if (d->bd_bif)
610 		bpf_detachd(d);
611 	mtx_unlock(&bpf_mtx);
612 	selwakeuppri(&d->bd_sel, PRINET);
613 #ifdef MAC
614 	mac_bpfdesc_destroy(d);
615 #endif /* MAC */
616 	knlist_destroy(&d->bd_sel.si_note);
617 	bpf_freed(d);
618 	free(d, M_BPF);
619 }
620 
621 /*
622  * Open ethernet device.  Returns ENXIO for illegal minor device number,
623  * EBUSY if file is open by another process.
624  */
625 /* ARGSUSED */
626 static	int
627 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
628 {
629 	struct bpf_d *d;
630 	int error;
631 
632 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
633 	error = devfs_set_cdevpriv(d, bpf_dtor);
634 	if (error != 0) {
635 		free(d, M_BPF);
636 		return (error);
637 	}
638 
639 	/*
640 	 * For historical reasons, perform a one-time initialization call to
641 	 * the buffer routines, even though we're not yet committed to a
642 	 * particular buffer method.
643 	 */
644 	bpf_buffer_init(d);
645 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
646 	d->bd_sig = SIGIO;
647 	d->bd_direction = BPF_D_INOUT;
648 	d->bd_pid = td->td_proc->p_pid;
649 #ifdef MAC
650 	mac_bpfdesc_init(d);
651 	mac_bpfdesc_create(td->td_ucred, d);
652 #endif
653 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
654 	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
655 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
656 
657 	return (0);
658 }
659 
660 /*
661  *  bpfread - read next chunk of packets from buffers
662  */
663 static	int
664 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
665 {
666 	struct bpf_d *d;
667 	int timed_out;
668 	int error;
669 
670 	error = devfs_get_cdevpriv((void **)&d);
671 	if (error != 0)
672 		return (error);
673 
674 	/*
675 	 * Restrict application to use a buffer the same size as
676 	 * as kernel buffers.
677 	 */
678 	if (uio->uio_resid != d->bd_bufsize)
679 		return (EINVAL);
680 
681 	BPFD_LOCK(d);
682 	d->bd_pid = curthread->td_proc->p_pid;
683 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
684 		BPFD_UNLOCK(d);
685 		return (EOPNOTSUPP);
686 	}
687 	if (d->bd_state == BPF_WAITING)
688 		callout_stop(&d->bd_callout);
689 	timed_out = (d->bd_state == BPF_TIMED_OUT);
690 	d->bd_state = BPF_IDLE;
691 	/*
692 	 * If the hold buffer is empty, then do a timed sleep, which
693 	 * ends when the timeout expires or when enough packets
694 	 * have arrived to fill the store buffer.
695 	 */
696 	while (d->bd_hbuf == NULL) {
697 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
698 			/*
699 			 * A packet(s) either arrived since the previous
700 			 * read or arrived while we were asleep.
701 			 * Rotate the buffers and return what's here.
702 			 */
703 			ROTATE_BUFFERS(d);
704 			break;
705 		}
706 
707 		/*
708 		 * No data is available, check to see if the bpf device
709 		 * is still pointed at a real interface.  If not, return
710 		 * ENXIO so that the userland process knows to rebind
711 		 * it before using it again.
712 		 */
713 		if (d->bd_bif == NULL) {
714 			BPFD_UNLOCK(d);
715 			return (ENXIO);
716 		}
717 
718 		if (ioflag & O_NONBLOCK) {
719 			BPFD_UNLOCK(d);
720 			return (EWOULDBLOCK);
721 		}
722 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
723 		     "bpf", d->bd_rtout);
724 		if (error == EINTR || error == ERESTART) {
725 			BPFD_UNLOCK(d);
726 			return (error);
727 		}
728 		if (error == EWOULDBLOCK) {
729 			/*
730 			 * On a timeout, return what's in the buffer,
731 			 * which may be nothing.  If there is something
732 			 * in the store buffer, we can rotate the buffers.
733 			 */
734 			if (d->bd_hbuf)
735 				/*
736 				 * We filled up the buffer in between
737 				 * getting the timeout and arriving
738 				 * here, so we don't need to rotate.
739 				 */
740 				break;
741 
742 			if (d->bd_slen == 0) {
743 				BPFD_UNLOCK(d);
744 				return (0);
745 			}
746 			ROTATE_BUFFERS(d);
747 			break;
748 		}
749 	}
750 	/*
751 	 * At this point, we know we have something in the hold slot.
752 	 */
753 	BPFD_UNLOCK(d);
754 
755 	/*
756 	 * Move data from hold buffer into user space.
757 	 * We know the entire buffer is transferred since
758 	 * we checked above that the read buffer is bpf_bufsize bytes.
759 	 *
760 	 * XXXRW: More synchronization needed here: what if a second thread
761 	 * issues a read on the same fd at the same time?  Don't want this
762 	 * getting invalidated.
763 	 */
764 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
765 
766 	BPFD_LOCK(d);
767 	d->bd_fbuf = d->bd_hbuf;
768 	d->bd_hbuf = NULL;
769 	d->bd_hlen = 0;
770 	bpf_buf_reclaimed(d);
771 	BPFD_UNLOCK(d);
772 
773 	return (error);
774 }
775 
776 /*
777  * If there are processes sleeping on this descriptor, wake them up.
778  */
779 static __inline void
780 bpf_wakeup(struct bpf_d *d)
781 {
782 
783 	BPFD_LOCK_ASSERT(d);
784 	if (d->bd_state == BPF_WAITING) {
785 		callout_stop(&d->bd_callout);
786 		d->bd_state = BPF_IDLE;
787 	}
788 	wakeup(d);
789 	if (d->bd_async && d->bd_sig && d->bd_sigio)
790 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
791 
792 	selwakeuppri(&d->bd_sel, PRINET);
793 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
794 }
795 
796 static void
797 bpf_timed_out(void *arg)
798 {
799 	struct bpf_d *d = (struct bpf_d *)arg;
800 
801 	BPFD_LOCK(d);
802 	if (d->bd_state == BPF_WAITING) {
803 		d->bd_state = BPF_TIMED_OUT;
804 		if (d->bd_slen != 0)
805 			bpf_wakeup(d);
806 	}
807 	BPFD_UNLOCK(d);
808 }
809 
810 static int
811 bpf_ready(struct bpf_d *d)
812 {
813 
814 	BPFD_LOCK_ASSERT(d);
815 
816 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
817 		return (1);
818 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
819 	    d->bd_slen != 0)
820 		return (1);
821 	return (0);
822 }
823 
824 static int
825 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
826 {
827 	struct bpf_d *d;
828 	struct ifnet *ifp;
829 	struct mbuf *m, *mc;
830 	struct sockaddr dst;
831 	int error, hlen;
832 
833 	error = devfs_get_cdevpriv((void **)&d);
834 	if (error != 0)
835 		return (error);
836 
837 	d->bd_pid = curthread->td_proc->p_pid;
838 	d->bd_wcount++;
839 	if (d->bd_bif == NULL) {
840 		d->bd_wdcount++;
841 		return (ENXIO);
842 	}
843 
844 	ifp = d->bd_bif->bif_ifp;
845 
846 	if ((ifp->if_flags & IFF_UP) == 0) {
847 		d->bd_wdcount++;
848 		return (ENETDOWN);
849 	}
850 
851 	if (uio->uio_resid == 0) {
852 		d->bd_wdcount++;
853 		return (0);
854 	}
855 
856 	bzero(&dst, sizeof(dst));
857 	m = NULL;
858 	hlen = 0;
859 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
860 	    &m, &dst, &hlen, d->bd_wfilter);
861 	if (error) {
862 		d->bd_wdcount++;
863 		return (error);
864 	}
865 	d->bd_wfcount++;
866 	if (d->bd_hdrcmplt)
867 		dst.sa_family = pseudo_AF_HDRCMPLT;
868 
869 	if (d->bd_feedback) {
870 		mc = m_dup(m, M_DONTWAIT);
871 		if (mc != NULL)
872 			mc->m_pkthdr.rcvif = ifp;
873 		/* Set M_PROMISC for outgoing packets to be discarded. */
874 		if (d->bd_direction == BPF_D_INOUT)
875 			m->m_flags |= M_PROMISC;
876 	} else
877 		mc = NULL;
878 
879 	m->m_pkthdr.len -= hlen;
880 	m->m_len -= hlen;
881 	m->m_data += hlen;	/* XXX */
882 
883 	CURVNET_SET(ifp->if_vnet);
884 #ifdef MAC
885 	BPFD_LOCK(d);
886 	mac_bpfdesc_create_mbuf(d, m);
887 	if (mc != NULL)
888 		mac_bpfdesc_create_mbuf(d, mc);
889 	BPFD_UNLOCK(d);
890 #endif
891 
892 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
893 	if (error)
894 		d->bd_wdcount++;
895 
896 	if (mc != NULL) {
897 		if (error == 0)
898 			(*ifp->if_input)(ifp, mc);
899 		else
900 			m_freem(mc);
901 	}
902 	CURVNET_RESTORE();
903 
904 	return (error);
905 }
906 
907 /*
908  * Reset a descriptor by flushing its packet buffer and clearing the receive
909  * and drop counts.  This is doable for kernel-only buffers, but with
910  * zero-copy buffers, we can't write to (or rotate) buffers that are
911  * currently owned by userspace.  It would be nice if we could encapsulate
912  * this logic in the buffer code rather than here.
913  */
914 static void
915 reset_d(struct bpf_d *d)
916 {
917 
918 	mtx_assert(&d->bd_mtx, MA_OWNED);
919 
920 	if ((d->bd_hbuf != NULL) &&
921 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
922 		/* Free the hold buffer. */
923 		d->bd_fbuf = d->bd_hbuf;
924 		d->bd_hbuf = NULL;
925 		d->bd_hlen = 0;
926 		bpf_buf_reclaimed(d);
927 	}
928 	if (bpf_canwritebuf(d))
929 		d->bd_slen = 0;
930 	d->bd_rcount = 0;
931 	d->bd_dcount = 0;
932 	d->bd_fcount = 0;
933 	d->bd_wcount = 0;
934 	d->bd_wfcount = 0;
935 	d->bd_wdcount = 0;
936 	d->bd_zcopy = 0;
937 }
938 
939 /*
940  *  FIONREAD		Check for read packet available.
941  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
942  *  BIOCGBLEN		Get buffer len [for read()].
943  *  BIOCSETF		Set read filter.
944  *  BIOCSETFNR		Set read filter without resetting descriptor.
945  *  BIOCSETWF		Set write filter.
946  *  BIOCFLUSH		Flush read packet buffer.
947  *  BIOCPROMISC		Put interface into promiscuous mode.
948  *  BIOCGDLT		Get link layer type.
949  *  BIOCGETIF		Get interface name.
950  *  BIOCSETIF		Set interface.
951  *  BIOCSRTIMEOUT	Set read timeout.
952  *  BIOCGRTIMEOUT	Get read timeout.
953  *  BIOCGSTATS		Get packet stats.
954  *  BIOCIMMEDIATE	Set immediate mode.
955  *  BIOCVERSION		Get filter language version.
956  *  BIOCGHDRCMPLT	Get "header already complete" flag
957  *  BIOCSHDRCMPLT	Set "header already complete" flag
958  *  BIOCGDIRECTION	Get packet direction flag
959  *  BIOCSDIRECTION	Set packet direction flag
960  *  BIOCLOCK		Set "locked" flag
961  *  BIOCFEEDBACK	Set packet feedback mode.
962  *  BIOCSETZBUF		Set current zero-copy buffer locations.
963  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
964  *  BIOCROTZBUF		Force rotation of zero-copy buffer
965  *  BIOCSETBUFMODE	Set buffer mode.
966  *  BIOCGETBUFMODE	Get current buffer mode.
967  */
968 /* ARGSUSED */
969 static	int
970 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
971     struct thread *td)
972 {
973 	struct bpf_d *d;
974 	int error;
975 
976 	error = devfs_get_cdevpriv((void **)&d);
977 	if (error != 0)
978 		return (error);
979 
980 	/*
981 	 * Refresh PID associated with this descriptor.
982 	 */
983 	BPFD_LOCK(d);
984 	d->bd_pid = td->td_proc->p_pid;
985 	if (d->bd_state == BPF_WAITING)
986 		callout_stop(&d->bd_callout);
987 	d->bd_state = BPF_IDLE;
988 	BPFD_UNLOCK(d);
989 
990 	if (d->bd_locked == 1) {
991 		switch (cmd) {
992 		case BIOCGBLEN:
993 		case BIOCFLUSH:
994 		case BIOCGDLT:
995 		case BIOCGDLTLIST:
996 		case BIOCGETIF:
997 		case BIOCGRTIMEOUT:
998 		case BIOCGSTATS:
999 		case BIOCVERSION:
1000 		case BIOCGRSIG:
1001 		case BIOCGHDRCMPLT:
1002 		case BIOCFEEDBACK:
1003 		case FIONREAD:
1004 		case BIOCLOCK:
1005 		case BIOCSRTIMEOUT:
1006 		case BIOCIMMEDIATE:
1007 		case TIOCGPGRP:
1008 		case BIOCROTZBUF:
1009 			break;
1010 		default:
1011 			return (EPERM);
1012 		}
1013 	}
1014 	CURVNET_SET(TD_TO_VNET(td));
1015 	switch (cmd) {
1016 
1017 	default:
1018 		error = EINVAL;
1019 		break;
1020 
1021 	/*
1022 	 * Check for read packet available.
1023 	 */
1024 	case FIONREAD:
1025 		{
1026 			int n;
1027 
1028 			BPFD_LOCK(d);
1029 			n = d->bd_slen;
1030 			if (d->bd_hbuf)
1031 				n += d->bd_hlen;
1032 			BPFD_UNLOCK(d);
1033 
1034 			*(int *)addr = n;
1035 			break;
1036 		}
1037 
1038 	case SIOCGIFADDR:
1039 		{
1040 			struct ifnet *ifp;
1041 
1042 			if (d->bd_bif == NULL)
1043 				error = EINVAL;
1044 			else {
1045 				ifp = d->bd_bif->bif_ifp;
1046 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1047 			}
1048 			break;
1049 		}
1050 
1051 	/*
1052 	 * Get buffer len [for read()].
1053 	 */
1054 	case BIOCGBLEN:
1055 		*(u_int *)addr = d->bd_bufsize;
1056 		break;
1057 
1058 	/*
1059 	 * Set buffer length.
1060 	 */
1061 	case BIOCSBLEN:
1062 		error = bpf_ioctl_sblen(d, (u_int *)addr);
1063 		break;
1064 
1065 	/*
1066 	 * Set link layer read filter.
1067 	 */
1068 	case BIOCSETF:
1069 	case BIOCSETFNR:
1070 	case BIOCSETWF:
1071 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1072 		break;
1073 
1074 	/*
1075 	 * Flush read packet buffer.
1076 	 */
1077 	case BIOCFLUSH:
1078 		BPFD_LOCK(d);
1079 		reset_d(d);
1080 		BPFD_UNLOCK(d);
1081 		break;
1082 
1083 	/*
1084 	 * Put interface into promiscuous mode.
1085 	 */
1086 	case BIOCPROMISC:
1087 		if (d->bd_bif == NULL) {
1088 			/*
1089 			 * No interface attached yet.
1090 			 */
1091 			error = EINVAL;
1092 			break;
1093 		}
1094 		if (d->bd_promisc == 0) {
1095 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1096 			if (error == 0)
1097 				d->bd_promisc = 1;
1098 		}
1099 		break;
1100 
1101 	/*
1102 	 * Get current data link type.
1103 	 */
1104 	case BIOCGDLT:
1105 		if (d->bd_bif == NULL)
1106 			error = EINVAL;
1107 		else
1108 			*(u_int *)addr = d->bd_bif->bif_dlt;
1109 		break;
1110 
1111 	/*
1112 	 * Get a list of supported data link types.
1113 	 */
1114 	case BIOCGDLTLIST:
1115 		if (d->bd_bif == NULL)
1116 			error = EINVAL;
1117 		else
1118 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1119 		break;
1120 
1121 	/*
1122 	 * Set data link type.
1123 	 */
1124 	case BIOCSDLT:
1125 		if (d->bd_bif == NULL)
1126 			error = EINVAL;
1127 		else
1128 			error = bpf_setdlt(d, *(u_int *)addr);
1129 		break;
1130 
1131 	/*
1132 	 * Get interface name.
1133 	 */
1134 	case BIOCGETIF:
1135 		if (d->bd_bif == NULL)
1136 			error = EINVAL;
1137 		else {
1138 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1139 			struct ifreq *const ifr = (struct ifreq *)addr;
1140 
1141 			strlcpy(ifr->ifr_name, ifp->if_xname,
1142 			    sizeof(ifr->ifr_name));
1143 		}
1144 		break;
1145 
1146 	/*
1147 	 * Set interface.
1148 	 */
1149 	case BIOCSETIF:
1150 		error = bpf_setif(d, (struct ifreq *)addr);
1151 		break;
1152 
1153 	/*
1154 	 * Set read timeout.
1155 	 */
1156 	case BIOCSRTIMEOUT:
1157 		{
1158 			struct timeval *tv = (struct timeval *)addr;
1159 
1160 			/*
1161 			 * Subtract 1 tick from tvtohz() since this isn't
1162 			 * a one-shot timer.
1163 			 */
1164 			if ((error = itimerfix(tv)) == 0)
1165 				d->bd_rtout = tvtohz(tv) - 1;
1166 			break;
1167 		}
1168 
1169 	/*
1170 	 * Get read timeout.
1171 	 */
1172 	case BIOCGRTIMEOUT:
1173 		{
1174 			struct timeval *tv = (struct timeval *)addr;
1175 
1176 			tv->tv_sec = d->bd_rtout / hz;
1177 			tv->tv_usec = (d->bd_rtout % hz) * tick;
1178 			break;
1179 		}
1180 
1181 	/*
1182 	 * Get packet stats.
1183 	 */
1184 	case BIOCGSTATS:
1185 		{
1186 			struct bpf_stat *bs = (struct bpf_stat *)addr;
1187 
1188 			/* XXXCSJP overflow */
1189 			bs->bs_recv = d->bd_rcount;
1190 			bs->bs_drop = d->bd_dcount;
1191 			break;
1192 		}
1193 
1194 	/*
1195 	 * Set immediate mode.
1196 	 */
1197 	case BIOCIMMEDIATE:
1198 		d->bd_immediate = *(u_int *)addr;
1199 		break;
1200 
1201 	case BIOCVERSION:
1202 		{
1203 			struct bpf_version *bv = (struct bpf_version *)addr;
1204 
1205 			bv->bv_major = BPF_MAJOR_VERSION;
1206 			bv->bv_minor = BPF_MINOR_VERSION;
1207 			break;
1208 		}
1209 
1210 	/*
1211 	 * Get "header already complete" flag
1212 	 */
1213 	case BIOCGHDRCMPLT:
1214 		*(u_int *)addr = d->bd_hdrcmplt;
1215 		break;
1216 
1217 	/*
1218 	 * Set "header already complete" flag
1219 	 */
1220 	case BIOCSHDRCMPLT:
1221 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1222 		break;
1223 
1224 	/*
1225 	 * Get packet direction flag
1226 	 */
1227 	case BIOCGDIRECTION:
1228 		*(u_int *)addr = d->bd_direction;
1229 		break;
1230 
1231 	/*
1232 	 * Set packet direction flag
1233 	 */
1234 	case BIOCSDIRECTION:
1235 		{
1236 			u_int	direction;
1237 
1238 			direction = *(u_int *)addr;
1239 			switch (direction) {
1240 			case BPF_D_IN:
1241 			case BPF_D_INOUT:
1242 			case BPF_D_OUT:
1243 				d->bd_direction = direction;
1244 				break;
1245 			default:
1246 				error = EINVAL;
1247 			}
1248 		}
1249 		break;
1250 
1251 	case BIOCFEEDBACK:
1252 		d->bd_feedback = *(u_int *)addr;
1253 		break;
1254 
1255 	case BIOCLOCK:
1256 		d->bd_locked = 1;
1257 		break;
1258 
1259 	case FIONBIO:		/* Non-blocking I/O */
1260 		break;
1261 
1262 	case FIOASYNC:		/* Send signal on receive packets */
1263 		d->bd_async = *(int *)addr;
1264 		break;
1265 
1266 	case FIOSETOWN:
1267 		error = fsetown(*(int *)addr, &d->bd_sigio);
1268 		break;
1269 
1270 	case FIOGETOWN:
1271 		*(int *)addr = fgetown(&d->bd_sigio);
1272 		break;
1273 
1274 	/* This is deprecated, FIOSETOWN should be used instead. */
1275 	case TIOCSPGRP:
1276 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1277 		break;
1278 
1279 	/* This is deprecated, FIOGETOWN should be used instead. */
1280 	case TIOCGPGRP:
1281 		*(int *)addr = -fgetown(&d->bd_sigio);
1282 		break;
1283 
1284 	case BIOCSRSIG:		/* Set receive signal */
1285 		{
1286 			u_int sig;
1287 
1288 			sig = *(u_int *)addr;
1289 
1290 			if (sig >= NSIG)
1291 				error = EINVAL;
1292 			else
1293 				d->bd_sig = sig;
1294 			break;
1295 		}
1296 	case BIOCGRSIG:
1297 		*(u_int *)addr = d->bd_sig;
1298 		break;
1299 
1300 	case BIOCGETBUFMODE:
1301 		*(u_int *)addr = d->bd_bufmode;
1302 		break;
1303 
1304 	case BIOCSETBUFMODE:
1305 		/*
1306 		 * Allow the buffering mode to be changed as long as we
1307 		 * haven't yet committed to a particular mode.  Our
1308 		 * definition of commitment, for now, is whether or not a
1309 		 * buffer has been allocated or an interface attached, since
1310 		 * that's the point where things get tricky.
1311 		 */
1312 		switch (*(u_int *)addr) {
1313 		case BPF_BUFMODE_BUFFER:
1314 			break;
1315 
1316 		case BPF_BUFMODE_ZBUF:
1317 			if (bpf_zerocopy_enable)
1318 				break;
1319 			/* FALLSTHROUGH */
1320 
1321 		default:
1322 			return (EINVAL);
1323 		}
1324 
1325 		BPFD_LOCK(d);
1326 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1327 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1328 			BPFD_UNLOCK(d);
1329 			return (EBUSY);
1330 		}
1331 		d->bd_bufmode = *(u_int *)addr;
1332 		BPFD_UNLOCK(d);
1333 		break;
1334 
1335 	case BIOCGETZMAX:
1336 		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1337 
1338 	case BIOCSETZBUF:
1339 		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1340 
1341 	case BIOCROTZBUF:
1342 		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1343 	}
1344 	CURVNET_RESTORE();
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Set d's packet filter program to fp.  If this file already has a filter,
1350  * free it and replace it.  Returns EINVAL for bogus requests.
1351  */
1352 static int
1353 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1354 {
1355 	struct bpf_insn *fcode, *old;
1356 	u_int wfilter, flen, size;
1357 #ifdef BPF_JITTER
1358 	bpf_jit_filter *ofunc;
1359 #endif
1360 
1361 	if (cmd == BIOCSETWF) {
1362 		old = d->bd_wfilter;
1363 		wfilter = 1;
1364 #ifdef BPF_JITTER
1365 		ofunc = NULL;
1366 #endif
1367 	} else {
1368 		wfilter = 0;
1369 		old = d->bd_rfilter;
1370 #ifdef BPF_JITTER
1371 		ofunc = d->bd_bfilter;
1372 #endif
1373 	}
1374 	if (fp->bf_insns == NULL) {
1375 		if (fp->bf_len != 0)
1376 			return (EINVAL);
1377 		BPFD_LOCK(d);
1378 		if (wfilter)
1379 			d->bd_wfilter = NULL;
1380 		else {
1381 			d->bd_rfilter = NULL;
1382 #ifdef BPF_JITTER
1383 			d->bd_bfilter = NULL;
1384 #endif
1385 			if (cmd == BIOCSETF)
1386 				reset_d(d);
1387 		}
1388 		BPFD_UNLOCK(d);
1389 		if (old != NULL)
1390 			free((caddr_t)old, M_BPF);
1391 #ifdef BPF_JITTER
1392 		if (ofunc != NULL)
1393 			bpf_destroy_jit_filter(ofunc);
1394 #endif
1395 		return (0);
1396 	}
1397 	flen = fp->bf_len;
1398 	if (flen > bpf_maxinsns)
1399 		return (EINVAL);
1400 
1401 	size = flen * sizeof(*fp->bf_insns);
1402 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1403 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1404 	    bpf_validate(fcode, (int)flen)) {
1405 		BPFD_LOCK(d);
1406 		if (wfilter)
1407 			d->bd_wfilter = fcode;
1408 		else {
1409 			d->bd_rfilter = fcode;
1410 #ifdef BPF_JITTER
1411 			d->bd_bfilter = bpf_jitter(fcode, flen);
1412 #endif
1413 			if (cmd == BIOCSETF)
1414 				reset_d(d);
1415 		}
1416 		BPFD_UNLOCK(d);
1417 		if (old != NULL)
1418 			free((caddr_t)old, M_BPF);
1419 #ifdef BPF_JITTER
1420 		if (ofunc != NULL)
1421 			bpf_destroy_jit_filter(ofunc);
1422 #endif
1423 
1424 		return (0);
1425 	}
1426 	free((caddr_t)fcode, M_BPF);
1427 	return (EINVAL);
1428 }
1429 
1430 /*
1431  * Detach a file from its current interface (if attached at all) and attach
1432  * to the interface indicated by the name stored in ifr.
1433  * Return an errno or 0.
1434  */
1435 static int
1436 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1437 {
1438 	struct bpf_if *bp;
1439 	struct ifnet *theywant;
1440 
1441 	theywant = ifunit(ifr->ifr_name);
1442 	if (theywant == NULL || theywant->if_bpf == NULL)
1443 		return (ENXIO);
1444 
1445 	bp = theywant->if_bpf;
1446 
1447 	/*
1448 	 * Behavior here depends on the buffering model.  If we're using
1449 	 * kernel memory buffers, then we can allocate them here.  If we're
1450 	 * using zero-copy, then the user process must have registered
1451 	 * buffers by the time we get here.  If not, return an error.
1452 	 *
1453 	 * XXXRW: There are locking issues here with multi-threaded use: what
1454 	 * if two threads try to set the interface at once?
1455 	 */
1456 	switch (d->bd_bufmode) {
1457 	case BPF_BUFMODE_BUFFER:
1458 		if (d->bd_sbuf == NULL)
1459 			bpf_buffer_alloc(d);
1460 		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1461 		break;
1462 
1463 	case BPF_BUFMODE_ZBUF:
1464 		if (d->bd_sbuf == NULL)
1465 			return (EINVAL);
1466 		break;
1467 
1468 	default:
1469 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1470 	}
1471 	if (bp != d->bd_bif) {
1472 		if (d->bd_bif)
1473 			/*
1474 			 * Detach if attached to something else.
1475 			 */
1476 			bpf_detachd(d);
1477 
1478 		bpf_attachd(d, bp);
1479 	}
1480 	BPFD_LOCK(d);
1481 	reset_d(d);
1482 	BPFD_UNLOCK(d);
1483 	return (0);
1484 }
1485 
1486 /*
1487  * Support for select() and poll() system calls
1488  *
1489  * Return true iff the specific operation will not block indefinitely.
1490  * Otherwise, return false but make a note that a selwakeup() must be done.
1491  */
1492 static int
1493 bpfpoll(struct cdev *dev, int events, struct thread *td)
1494 {
1495 	struct bpf_d *d;
1496 	int revents;
1497 
1498 	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1499 		return (events &
1500 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1501 
1502 	/*
1503 	 * Refresh PID associated with this descriptor.
1504 	 */
1505 	revents = events & (POLLOUT | POLLWRNORM);
1506 	BPFD_LOCK(d);
1507 	d->bd_pid = td->td_proc->p_pid;
1508 	if (events & (POLLIN | POLLRDNORM)) {
1509 		if (bpf_ready(d))
1510 			revents |= events & (POLLIN | POLLRDNORM);
1511 		else {
1512 			selrecord(td, &d->bd_sel);
1513 			/* Start the read timeout if necessary. */
1514 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1515 				callout_reset(&d->bd_callout, d->bd_rtout,
1516 				    bpf_timed_out, d);
1517 				d->bd_state = BPF_WAITING;
1518 			}
1519 		}
1520 	}
1521 	BPFD_UNLOCK(d);
1522 	return (revents);
1523 }
1524 
1525 /*
1526  * Support for kevent() system call.  Register EVFILT_READ filters and
1527  * reject all others.
1528  */
1529 int
1530 bpfkqfilter(struct cdev *dev, struct knote *kn)
1531 {
1532 	struct bpf_d *d;
1533 
1534 	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1535 	    kn->kn_filter != EVFILT_READ)
1536 		return (1);
1537 
1538 	/*
1539 	 * Refresh PID associated with this descriptor.
1540 	 */
1541 	BPFD_LOCK(d);
1542 	d->bd_pid = curthread->td_proc->p_pid;
1543 	kn->kn_fop = &bpfread_filtops;
1544 	kn->kn_hook = d;
1545 	knlist_add(&d->bd_sel.si_note, kn, 1);
1546 	BPFD_UNLOCK(d);
1547 
1548 	return (0);
1549 }
1550 
1551 static void
1552 filt_bpfdetach(struct knote *kn)
1553 {
1554 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1555 
1556 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1557 }
1558 
1559 static int
1560 filt_bpfread(struct knote *kn, long hint)
1561 {
1562 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1563 	int ready;
1564 
1565 	BPFD_LOCK_ASSERT(d);
1566 	ready = bpf_ready(d);
1567 	if (ready) {
1568 		kn->kn_data = d->bd_slen;
1569 		if (d->bd_hbuf)
1570 			kn->kn_data += d->bd_hlen;
1571 	}
1572 	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1573 		callout_reset(&d->bd_callout, d->bd_rtout,
1574 		    bpf_timed_out, d);
1575 		d->bd_state = BPF_WAITING;
1576 	}
1577 
1578 	return (ready);
1579 }
1580 
1581 /*
1582  * Incoming linkage from device drivers.  Process the packet pkt, of length
1583  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1584  * by each process' filter, and if accepted, stashed into the corresponding
1585  * buffer.
1586  */
1587 void
1588 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1589 {
1590 	struct bpf_d *d;
1591 #ifdef BPF_JITTER
1592 	bpf_jit_filter *bf;
1593 #endif
1594 	u_int slen;
1595 	int gottime;
1596 	struct timeval tv;
1597 
1598 	gottime = 0;
1599 	BPFIF_LOCK(bp);
1600 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1601 		BPFD_LOCK(d);
1602 		++d->bd_rcount;
1603 		/*
1604 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1605 		 * way for the caller to indiciate to us whether this packet
1606 		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1607 		 * the interface pointers on the mbuf to figure it out.
1608 		 */
1609 #ifdef BPF_JITTER
1610 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1611 		if (bf != NULL)
1612 			slen = (*(bf->func))(pkt, pktlen, pktlen);
1613 		else
1614 #endif
1615 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1616 		if (slen != 0) {
1617 			d->bd_fcount++;
1618 			if (!gottime) {
1619 				microtime(&tv);
1620 				gottime = 1;
1621 			}
1622 #ifdef MAC
1623 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1624 #endif
1625 				catchpacket(d, pkt, pktlen, slen,
1626 				    bpf_append_bytes, &tv);
1627 		}
1628 		BPFD_UNLOCK(d);
1629 	}
1630 	BPFIF_UNLOCK(bp);
1631 }
1632 
1633 #define	BPF_CHECK_DIRECTION(d, r, i)				\
1634 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1635 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1636 
1637 /*
1638  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1639  */
1640 void
1641 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1642 {
1643 	struct bpf_d *d;
1644 #ifdef BPF_JITTER
1645 	bpf_jit_filter *bf;
1646 #endif
1647 	u_int pktlen, slen;
1648 	int gottime;
1649 	struct timeval tv;
1650 
1651 	/* Skip outgoing duplicate packets. */
1652 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1653 		m->m_flags &= ~M_PROMISC;
1654 		return;
1655 	}
1656 
1657 	gottime = 0;
1658 
1659 	pktlen = m_length(m, NULL);
1660 
1661 	BPFIF_LOCK(bp);
1662 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1663 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1664 			continue;
1665 		BPFD_LOCK(d);
1666 		++d->bd_rcount;
1667 #ifdef BPF_JITTER
1668 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1669 		/* XXX We cannot handle multiple mbufs. */
1670 		if (bf != NULL && m->m_next == NULL)
1671 			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
1672 		else
1673 #endif
1674 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1675 		if (slen != 0) {
1676 			d->bd_fcount++;
1677 			if (!gottime) {
1678 				microtime(&tv);
1679 				gottime = 1;
1680 			}
1681 #ifdef MAC
1682 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1683 #endif
1684 				catchpacket(d, (u_char *)m, pktlen, slen,
1685 				    bpf_append_mbuf, &tv);
1686 		}
1687 		BPFD_UNLOCK(d);
1688 	}
1689 	BPFIF_UNLOCK(bp);
1690 }
1691 
1692 /*
1693  * Incoming linkage from device drivers, when packet is in
1694  * an mbuf chain and to be prepended by a contiguous header.
1695  */
1696 void
1697 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1698 {
1699 	struct mbuf mb;
1700 	struct bpf_d *d;
1701 	u_int pktlen, slen;
1702 	int gottime;
1703 	struct timeval tv;
1704 
1705 	/* Skip outgoing duplicate packets. */
1706 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1707 		m->m_flags &= ~M_PROMISC;
1708 		return;
1709 	}
1710 
1711 	gottime = 0;
1712 
1713 	pktlen = m_length(m, NULL);
1714 	/*
1715 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1716 	 * Note that we cut corners here; we only setup what's
1717 	 * absolutely needed--this mbuf should never go anywhere else.
1718 	 */
1719 	mb.m_next = m;
1720 	mb.m_data = data;
1721 	mb.m_len = dlen;
1722 	pktlen += dlen;
1723 
1724 	BPFIF_LOCK(bp);
1725 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1726 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1727 			continue;
1728 		BPFD_LOCK(d);
1729 		++d->bd_rcount;
1730 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1731 		if (slen != 0) {
1732 			d->bd_fcount++;
1733 			if (!gottime) {
1734 				microtime(&tv);
1735 				gottime = 1;
1736 			}
1737 #ifdef MAC
1738 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1739 #endif
1740 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1741 				    bpf_append_mbuf, &tv);
1742 		}
1743 		BPFD_UNLOCK(d);
1744 	}
1745 	BPFIF_UNLOCK(bp);
1746 }
1747 
1748 #undef	BPF_CHECK_DIRECTION
1749 
1750 /*
1751  * Move the packet data from interface memory (pkt) into the
1752  * store buffer.  "cpfn" is the routine called to do the actual data
1753  * transfer.  bcopy is passed in to copy contiguous chunks, while
1754  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1755  * pkt is really an mbuf.
1756  */
1757 static void
1758 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1759     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1760     struct timeval *tv)
1761 {
1762 	struct bpf_hdr hdr;
1763 	int totlen, curlen;
1764 	int hdrlen = d->bd_bif->bif_hdrlen;
1765 	int do_wakeup = 0;
1766 
1767 	BPFD_LOCK_ASSERT(d);
1768 
1769 	/*
1770 	 * Detect whether user space has released a buffer back to us, and if
1771 	 * so, move it from being a hold buffer to a free buffer.  This may
1772 	 * not be the best place to do it (for example, we might only want to
1773 	 * run this check if we need the space), but for now it's a reliable
1774 	 * spot to do it.
1775 	 */
1776 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1777 		d->bd_fbuf = d->bd_hbuf;
1778 		d->bd_hbuf = NULL;
1779 		d->bd_hlen = 0;
1780 		bpf_buf_reclaimed(d);
1781 	}
1782 
1783 	/*
1784 	 * Figure out how many bytes to move.  If the packet is
1785 	 * greater or equal to the snapshot length, transfer that
1786 	 * much.  Otherwise, transfer the whole packet (unless
1787 	 * we hit the buffer size limit).
1788 	 */
1789 	totlen = hdrlen + min(snaplen, pktlen);
1790 	if (totlen > d->bd_bufsize)
1791 		totlen = d->bd_bufsize;
1792 
1793 	/*
1794 	 * Round up the end of the previous packet to the next longword.
1795 	 *
1796 	 * Drop the packet if there's no room and no hope of room
1797 	 * If the packet would overflow the storage buffer or the storage
1798 	 * buffer is considered immutable by the buffer model, try to rotate
1799 	 * the buffer and wakeup pending processes.
1800 	 */
1801 	curlen = BPF_WORDALIGN(d->bd_slen);
1802 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1803 		if (d->bd_fbuf == NULL) {
1804 			/*
1805 			 * There's no room in the store buffer, and no
1806 			 * prospect of room, so drop the packet.  Notify the
1807 			 * buffer model.
1808 			 */
1809 			bpf_buffull(d);
1810 			++d->bd_dcount;
1811 			return;
1812 		}
1813 		ROTATE_BUFFERS(d);
1814 		do_wakeup = 1;
1815 		curlen = 0;
1816 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1817 		/*
1818 		 * Immediate mode is set, or the read timeout has already
1819 		 * expired during a select call.  A packet arrived, so the
1820 		 * reader should be woken up.
1821 		 */
1822 		do_wakeup = 1;
1823 
1824 	/*
1825 	 * Append the bpf header.  Note we append the actual header size, but
1826 	 * move forward the length of the header plus padding.
1827 	 */
1828 	bzero(&hdr, sizeof(hdr));
1829 	hdr.bh_tstamp = *tv;
1830 	hdr.bh_datalen = pktlen;
1831 	hdr.bh_hdrlen = hdrlen;
1832 	hdr.bh_caplen = totlen - hdrlen;
1833 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1834 
1835 	/*
1836 	 * Copy the packet data into the store buffer and update its length.
1837 	 */
1838 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1839 	d->bd_slen = curlen + totlen;
1840 
1841 	if (do_wakeup)
1842 		bpf_wakeup(d);
1843 }
1844 
1845 /*
1846  * Free buffers currently in use by a descriptor.
1847  * Called on close.
1848  */
1849 static void
1850 bpf_freed(struct bpf_d *d)
1851 {
1852 
1853 	/*
1854 	 * We don't need to lock out interrupts since this descriptor has
1855 	 * been detached from its interface and it yet hasn't been marked
1856 	 * free.
1857 	 */
1858 	bpf_free(d);
1859 	if (d->bd_rfilter) {
1860 		free((caddr_t)d->bd_rfilter, M_BPF);
1861 #ifdef BPF_JITTER
1862 		bpf_destroy_jit_filter(d->bd_bfilter);
1863 #endif
1864 	}
1865 	if (d->bd_wfilter)
1866 		free((caddr_t)d->bd_wfilter, M_BPF);
1867 	mtx_destroy(&d->bd_mtx);
1868 }
1869 
1870 /*
1871  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1872  * fixed size of the link header (variable length headers not yet supported).
1873  */
1874 void
1875 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1876 {
1877 
1878 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1879 }
1880 
1881 /*
1882  * Attach an interface to bpf.  ifp is a pointer to the structure
1883  * defining the interface to be attached, dlt is the link layer type,
1884  * and hdrlen is the fixed size of the link header (variable length
1885  * headers are not yet supporrted).
1886  */
1887 void
1888 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1889 {
1890 	struct bpf_if *bp;
1891 
1892 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1893 	if (bp == NULL)
1894 		panic("bpfattach");
1895 
1896 	LIST_INIT(&bp->bif_dlist);
1897 	bp->bif_ifp = ifp;
1898 	bp->bif_dlt = dlt;
1899 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1900 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1901 	*driverp = bp;
1902 
1903 	mtx_lock(&bpf_mtx);
1904 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1905 	mtx_unlock(&bpf_mtx);
1906 
1907 	/*
1908 	 * Compute the length of the bpf header.  This is not necessarily
1909 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1910 	 * that the network layer header begins on a longword boundary (for
1911 	 * performance reasons and to alleviate alignment restrictions).
1912 	 */
1913 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1914 
1915 	if (bootverbose)
1916 		if_printf(ifp, "bpf attached\n");
1917 }
1918 
1919 /*
1920  * Detach bpf from an interface.  This involves detaching each descriptor
1921  * associated with the interface, and leaving bd_bif NULL.  Notify each
1922  * descriptor as it's detached so that any sleepers wake up and get
1923  * ENXIO.
1924  */
1925 void
1926 bpfdetach(struct ifnet *ifp)
1927 {
1928 	struct bpf_if	*bp;
1929 	struct bpf_d	*d;
1930 
1931 	/* Locate BPF interface information */
1932 	mtx_lock(&bpf_mtx);
1933 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1934 		if (ifp == bp->bif_ifp)
1935 			break;
1936 	}
1937 
1938 	/* Interface wasn't attached */
1939 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1940 		mtx_unlock(&bpf_mtx);
1941 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1942 		return;
1943 	}
1944 
1945 	LIST_REMOVE(bp, bif_next);
1946 	mtx_unlock(&bpf_mtx);
1947 
1948 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1949 		bpf_detachd(d);
1950 		BPFD_LOCK(d);
1951 		bpf_wakeup(d);
1952 		BPFD_UNLOCK(d);
1953 	}
1954 
1955 	mtx_destroy(&bp->bif_mtx);
1956 	free(bp, M_BPF);
1957 }
1958 
1959 /*
1960  * Get a list of available data link type of the interface.
1961  */
1962 static int
1963 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1964 {
1965 	int n, error;
1966 	struct ifnet *ifp;
1967 	struct bpf_if *bp;
1968 
1969 	ifp = d->bd_bif->bif_ifp;
1970 	n = 0;
1971 	error = 0;
1972 	mtx_lock(&bpf_mtx);
1973 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1974 		if (bp->bif_ifp != ifp)
1975 			continue;
1976 		if (bfl->bfl_list != NULL) {
1977 			if (n >= bfl->bfl_len) {
1978 				mtx_unlock(&bpf_mtx);
1979 				return (ENOMEM);
1980 			}
1981 			error = copyout(&bp->bif_dlt,
1982 			    bfl->bfl_list + n, sizeof(u_int));
1983 		}
1984 		n++;
1985 	}
1986 	mtx_unlock(&bpf_mtx);
1987 	bfl->bfl_len = n;
1988 	return (error);
1989 }
1990 
1991 /*
1992  * Set the data link type of a BPF instance.
1993  */
1994 static int
1995 bpf_setdlt(struct bpf_d *d, u_int dlt)
1996 {
1997 	int error, opromisc;
1998 	struct ifnet *ifp;
1999 	struct bpf_if *bp;
2000 
2001 	if (d->bd_bif->bif_dlt == dlt)
2002 		return (0);
2003 	ifp = d->bd_bif->bif_ifp;
2004 	mtx_lock(&bpf_mtx);
2005 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2006 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2007 			break;
2008 	}
2009 	mtx_unlock(&bpf_mtx);
2010 	if (bp != NULL) {
2011 		opromisc = d->bd_promisc;
2012 		bpf_detachd(d);
2013 		bpf_attachd(d, bp);
2014 		BPFD_LOCK(d);
2015 		reset_d(d);
2016 		BPFD_UNLOCK(d);
2017 		if (opromisc) {
2018 			error = ifpromisc(bp->bif_ifp, 1);
2019 			if (error)
2020 				if_printf(bp->bif_ifp,
2021 					"bpf_setdlt: ifpromisc failed (%d)\n",
2022 					error);
2023 			else
2024 				d->bd_promisc = 1;
2025 		}
2026 	}
2027 	return (bp == NULL ? EINVAL : 0);
2028 }
2029 
2030 static void
2031 bpf_drvinit(void *unused)
2032 {
2033 	struct cdev *dev;
2034 
2035 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2036 	LIST_INIT(&bpf_iflist);
2037 
2038 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2039 	/* For compatibility */
2040 	make_dev_alias(dev, "bpf0");
2041 }
2042 
2043 /*
2044  * Zero out the various packet counters associated with all of the bpf
2045  * descriptors.  At some point, we will probably want to get a bit more
2046  * granular and allow the user to specify descriptors to be zeroed.
2047  */
2048 static void
2049 bpf_zero_counters(void)
2050 {
2051 	struct bpf_if *bp;
2052 	struct bpf_d *bd;
2053 
2054 	mtx_lock(&bpf_mtx);
2055 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2056 		BPFIF_LOCK(bp);
2057 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2058 			BPFD_LOCK(bd);
2059 			bd->bd_rcount = 0;
2060 			bd->bd_dcount = 0;
2061 			bd->bd_fcount = 0;
2062 			bd->bd_wcount = 0;
2063 			bd->bd_wfcount = 0;
2064 			bd->bd_zcopy = 0;
2065 			BPFD_UNLOCK(bd);
2066 		}
2067 		BPFIF_UNLOCK(bp);
2068 	}
2069 	mtx_unlock(&bpf_mtx);
2070 }
2071 
2072 static void
2073 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2074 {
2075 
2076 	bzero(d, sizeof(*d));
2077 	BPFD_LOCK_ASSERT(bd);
2078 	d->bd_structsize = sizeof(*d);
2079 	d->bd_immediate = bd->bd_immediate;
2080 	d->bd_promisc = bd->bd_promisc;
2081 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2082 	d->bd_direction = bd->bd_direction;
2083 	d->bd_feedback = bd->bd_feedback;
2084 	d->bd_async = bd->bd_async;
2085 	d->bd_rcount = bd->bd_rcount;
2086 	d->bd_dcount = bd->bd_dcount;
2087 	d->bd_fcount = bd->bd_fcount;
2088 	d->bd_sig = bd->bd_sig;
2089 	d->bd_slen = bd->bd_slen;
2090 	d->bd_hlen = bd->bd_hlen;
2091 	d->bd_bufsize = bd->bd_bufsize;
2092 	d->bd_pid = bd->bd_pid;
2093 	strlcpy(d->bd_ifname,
2094 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2095 	d->bd_locked = bd->bd_locked;
2096 	d->bd_wcount = bd->bd_wcount;
2097 	d->bd_wdcount = bd->bd_wdcount;
2098 	d->bd_wfcount = bd->bd_wfcount;
2099 	d->bd_zcopy = bd->bd_zcopy;
2100 	d->bd_bufmode = bd->bd_bufmode;
2101 }
2102 
2103 static int
2104 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2105 {
2106 	struct xbpf_d *xbdbuf, *xbd, zerostats;
2107 	int index, error;
2108 	struct bpf_if *bp;
2109 	struct bpf_d *bd;
2110 
2111 	/*
2112 	 * XXX This is not technically correct. It is possible for non
2113 	 * privileged users to open bpf devices. It would make sense
2114 	 * if the users who opened the devices were able to retrieve
2115 	 * the statistics for them, too.
2116 	 */
2117 	error = priv_check(req->td, PRIV_NET_BPF);
2118 	if (error)
2119 		return (error);
2120 	/*
2121 	 * Check to see if the user is requesting that the counters be
2122 	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2123 	 * as we aren't allowing the user to set the counters currently.
2124 	 */
2125 	if (req->newptr != NULL) {
2126 		if (req->newlen != sizeof(zerostats))
2127 			return (EINVAL);
2128 		bzero(&zerostats, sizeof(zerostats));
2129 		xbd = req->newptr;
2130 		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2131 			return (EINVAL);
2132 		bpf_zero_counters();
2133 		return (0);
2134 	}
2135 	if (req->oldptr == NULL)
2136 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2137 	if (bpf_bpfd_cnt == 0)
2138 		return (SYSCTL_OUT(req, 0, 0));
2139 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2140 	mtx_lock(&bpf_mtx);
2141 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2142 		mtx_unlock(&bpf_mtx);
2143 		free(xbdbuf, M_BPF);
2144 		return (ENOMEM);
2145 	}
2146 	index = 0;
2147 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2148 		BPFIF_LOCK(bp);
2149 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2150 			xbd = &xbdbuf[index++];
2151 			BPFD_LOCK(bd);
2152 			bpfstats_fill_xbpf(xbd, bd);
2153 			BPFD_UNLOCK(bd);
2154 		}
2155 		BPFIF_UNLOCK(bp);
2156 	}
2157 	mtx_unlock(&bpf_mtx);
2158 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2159 	free(xbdbuf, M_BPF);
2160 	return (error);
2161 }
2162 
2163 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2164 
2165 #else /* !DEV_BPF && !NETGRAPH_BPF */
2166 /*
2167  * NOP stubs to allow bpf-using drivers to load and function.
2168  *
2169  * A 'better' implementation would allow the core bpf functionality
2170  * to be loaded at runtime.
2171  */
2172 static struct bpf_if bp_null;
2173 
2174 void
2175 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2176 {
2177 }
2178 
2179 void
2180 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2181 {
2182 }
2183 
2184 void
2185 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2186 {
2187 }
2188 
2189 void
2190 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2191 {
2192 
2193 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2194 }
2195 
2196 void
2197 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2198 {
2199 
2200 	*driverp = &bp_null;
2201 }
2202 
2203 void
2204 bpfdetach(struct ifnet *ifp)
2205 {
2206 }
2207 
2208 u_int
2209 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2210 {
2211 	return -1;	/* "no filter" behaviour */
2212 }
2213 
2214 int
2215 bpf_validate(const struct bpf_insn *f, int len)
2216 {
2217 	return 0;		/* false */
2218 }
2219 
2220 #endif /* !DEV_BPF && !NETGRAPH_BPF */
2221