xref: /freebsd/sys/net/bpf.c (revision 9fd69f37d28cfd7438cac3eeb45fe9dd46b4d7dd)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_bpf.h"
41 #include "opt_netgraph.h"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/conf.h>
47 #include <sys/fcntl.h>
48 #include <sys/jail.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 
65 #include <sys/socket.h>
66 
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #include <net/bpf_buffer.h>
70 #ifdef BPF_JITTER
71 #include <net/bpf_jitter.h>
72 #endif
73 #include <net/bpf_zerocopy.h>
74 #include <net/bpfdesc.h>
75 #include <net/vnet.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <sys/kernel.h>
80 #include <sys/sysctl.h>
81 
82 #include <net80211/ieee80211_freebsd.h>
83 
84 #include <security/mac/mac_framework.h>
85 
86 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87 
88 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
89 
90 #define PRINET  26			/* interruptible */
91 
92 /*
93  * bpf_iflist is a list of BPF interface structures, each corresponding to a
94  * specific DLT.  The same network interface might have several BPF interface
95  * structures registered by different layers in the stack (i.e., 802.11
96  * frames, ethernet frames, etc).
97  */
98 static LIST_HEAD(, bpf_if)	bpf_iflist;
99 static struct mtx	bpf_mtx;		/* bpf global lock */
100 static int		bpf_bpfd_cnt;
101 
102 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103 static void	bpf_detachd(struct bpf_d *);
104 static void	bpf_freed(struct bpf_d *);
105 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106 		    struct sockaddr *, int *, struct bpf_insn *);
107 static int	bpf_setif(struct bpf_d *, struct ifreq *);
108 static void	bpf_timed_out(void *);
109 static __inline void
110 		bpf_wakeup(struct bpf_d *);
111 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
112 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
113 		    struct timeval *);
114 static void	reset_d(struct bpf_d *);
115 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117 static int	bpf_setdlt(struct bpf_d *, u_int);
118 static void	filt_bpfdetach(struct knote *);
119 static int	filt_bpfread(struct knote *, long);
120 static void	bpf_drvinit(void *);
121 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
122 
123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124 int bpf_maxinsns = BPF_MAXINSNS;
125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126     &bpf_maxinsns, 0, "Maximum bpf program instructions");
127 static int bpf_zerocopy_enable = 0;
128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
129     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
131     bpf_stats_sysctl, "bpf statistics portal");
132 
133 static	d_open_t	bpfopen;
134 static	d_read_t	bpfread;
135 static	d_write_t	bpfwrite;
136 static	d_ioctl_t	bpfioctl;
137 static	d_poll_t	bpfpoll;
138 static	d_kqfilter_t	bpfkqfilter;
139 
140 static struct cdevsw bpf_cdevsw = {
141 	.d_version =	D_VERSION,
142 	.d_open =	bpfopen,
143 	.d_read =	bpfread,
144 	.d_write =	bpfwrite,
145 	.d_ioctl =	bpfioctl,
146 	.d_poll =	bpfpoll,
147 	.d_name =	"bpf",
148 	.d_kqfilter =	bpfkqfilter,
149 };
150 
151 static struct filterops bpfread_filtops = {
152 	.f_isfd = 1,
153 	.f_detach = filt_bpfdetach,
154 	.f_event = filt_bpfread,
155 };
156 
157 /*
158  * Wrapper functions for various buffering methods.  If the set of buffer
159  * modes expands, we will probably want to introduce a switch data structure
160  * similar to protosw, et.
161  */
162 static void
163 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
164     u_int len)
165 {
166 
167 	BPFD_LOCK_ASSERT(d);
168 
169 	switch (d->bd_bufmode) {
170 	case BPF_BUFMODE_BUFFER:
171 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
172 
173 	case BPF_BUFMODE_ZBUF:
174 		d->bd_zcopy++;
175 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
176 
177 	default:
178 		panic("bpf_buf_append_bytes");
179 	}
180 }
181 
182 static void
183 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
184     u_int len)
185 {
186 
187 	BPFD_LOCK_ASSERT(d);
188 
189 	switch (d->bd_bufmode) {
190 	case BPF_BUFMODE_BUFFER:
191 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
192 
193 	case BPF_BUFMODE_ZBUF:
194 		d->bd_zcopy++;
195 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
196 
197 	default:
198 		panic("bpf_buf_append_mbuf");
199 	}
200 }
201 
202 /*
203  * This function gets called when the free buffer is re-assigned.
204  */
205 static void
206 bpf_buf_reclaimed(struct bpf_d *d)
207 {
208 
209 	BPFD_LOCK_ASSERT(d);
210 
211 	switch (d->bd_bufmode) {
212 	case BPF_BUFMODE_BUFFER:
213 		return;
214 
215 	case BPF_BUFMODE_ZBUF:
216 		bpf_zerocopy_buf_reclaimed(d);
217 		return;
218 
219 	default:
220 		panic("bpf_buf_reclaimed");
221 	}
222 }
223 
224 /*
225  * If the buffer mechanism has a way to decide that a held buffer can be made
226  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
227  * returned if the buffer can be discarded, (0) is returned if it cannot.
228  */
229 static int
230 bpf_canfreebuf(struct bpf_d *d)
231 {
232 
233 	BPFD_LOCK_ASSERT(d);
234 
235 	switch (d->bd_bufmode) {
236 	case BPF_BUFMODE_ZBUF:
237 		return (bpf_zerocopy_canfreebuf(d));
238 	}
239 	return (0);
240 }
241 
242 /*
243  * Allow the buffer model to indicate that the current store buffer is
244  * immutable, regardless of the appearance of space.  Return (1) if the
245  * buffer is writable, and (0) if not.
246  */
247 static int
248 bpf_canwritebuf(struct bpf_d *d)
249 {
250 
251 	BPFD_LOCK_ASSERT(d);
252 
253 	switch (d->bd_bufmode) {
254 	case BPF_BUFMODE_ZBUF:
255 		return (bpf_zerocopy_canwritebuf(d));
256 	}
257 	return (1);
258 }
259 
260 /*
261  * Notify buffer model that an attempt to write to the store buffer has
262  * resulted in a dropped packet, in which case the buffer may be considered
263  * full.
264  */
265 static void
266 bpf_buffull(struct bpf_d *d)
267 {
268 
269 	BPFD_LOCK_ASSERT(d);
270 
271 	switch (d->bd_bufmode) {
272 	case BPF_BUFMODE_ZBUF:
273 		bpf_zerocopy_buffull(d);
274 		break;
275 	}
276 }
277 
278 /*
279  * Notify the buffer model that a buffer has moved into the hold position.
280  */
281 void
282 bpf_bufheld(struct bpf_d *d)
283 {
284 
285 	BPFD_LOCK_ASSERT(d);
286 
287 	switch (d->bd_bufmode) {
288 	case BPF_BUFMODE_ZBUF:
289 		bpf_zerocopy_bufheld(d);
290 		break;
291 	}
292 }
293 
294 static void
295 bpf_free(struct bpf_d *d)
296 {
297 
298 	switch (d->bd_bufmode) {
299 	case BPF_BUFMODE_BUFFER:
300 		return (bpf_buffer_free(d));
301 
302 	case BPF_BUFMODE_ZBUF:
303 		return (bpf_zerocopy_free(d));
304 
305 	default:
306 		panic("bpf_buf_free");
307 	}
308 }
309 
310 static int
311 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
312 {
313 
314 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
315 		return (EOPNOTSUPP);
316 	return (bpf_buffer_uiomove(d, buf, len, uio));
317 }
318 
319 static int
320 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
321 {
322 
323 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
324 		return (EOPNOTSUPP);
325 	return (bpf_buffer_ioctl_sblen(d, i));
326 }
327 
328 static int
329 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
330 {
331 
332 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
333 		return (EOPNOTSUPP);
334 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
335 }
336 
337 static int
338 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
339 {
340 
341 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
342 		return (EOPNOTSUPP);
343 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
344 }
345 
346 static int
347 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
348 {
349 
350 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
351 		return (EOPNOTSUPP);
352 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
353 }
354 
355 /*
356  * General BPF functions.
357  */
358 static int
359 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
360     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
361 {
362 	const struct ieee80211_bpf_params *p;
363 	struct ether_header *eh;
364 	struct mbuf *m;
365 	int error;
366 	int len;
367 	int hlen;
368 	int slen;
369 
370 	/*
371 	 * Build a sockaddr based on the data link layer type.
372 	 * We do this at this level because the ethernet header
373 	 * is copied directly into the data field of the sockaddr.
374 	 * In the case of SLIP, there is no header and the packet
375 	 * is forwarded as is.
376 	 * Also, we are careful to leave room at the front of the mbuf
377 	 * for the link level header.
378 	 */
379 	switch (linktype) {
380 
381 	case DLT_SLIP:
382 		sockp->sa_family = AF_INET;
383 		hlen = 0;
384 		break;
385 
386 	case DLT_EN10MB:
387 		sockp->sa_family = AF_UNSPEC;
388 		/* XXX Would MAXLINKHDR be better? */
389 		hlen = ETHER_HDR_LEN;
390 		break;
391 
392 	case DLT_FDDI:
393 		sockp->sa_family = AF_IMPLINK;
394 		hlen = 0;
395 		break;
396 
397 	case DLT_RAW:
398 		sockp->sa_family = AF_UNSPEC;
399 		hlen = 0;
400 		break;
401 
402 	case DLT_NULL:
403 		/*
404 		 * null interface types require a 4 byte pseudo header which
405 		 * corresponds to the address family of the packet.
406 		 */
407 		sockp->sa_family = AF_UNSPEC;
408 		hlen = 4;
409 		break;
410 
411 	case DLT_ATM_RFC1483:
412 		/*
413 		 * en atm driver requires 4-byte atm pseudo header.
414 		 * though it isn't standard, vpi:vci needs to be
415 		 * specified anyway.
416 		 */
417 		sockp->sa_family = AF_UNSPEC;
418 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
419 		break;
420 
421 	case DLT_PPP:
422 		sockp->sa_family = AF_UNSPEC;
423 		hlen = 4;	/* This should match PPP_HDRLEN */
424 		break;
425 
426 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
427 		sockp->sa_family = AF_IEEE80211;
428 		hlen = 0;
429 		break;
430 
431 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
432 		sockp->sa_family = AF_IEEE80211;
433 		sockp->sa_len = 12;	/* XXX != 0 */
434 		hlen = sizeof(struct ieee80211_bpf_params);
435 		break;
436 
437 	default:
438 		return (EIO);
439 	}
440 
441 	len = uio->uio_resid;
442 
443 	if (len - hlen > ifp->if_mtu)
444 		return (EMSGSIZE);
445 
446 	if ((unsigned)len > MJUM16BYTES)
447 		return (EIO);
448 
449 	if (len <= MHLEN)
450 		MGETHDR(m, M_WAIT, MT_DATA);
451 	else if (len <= MCLBYTES)
452 		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
453 	else
454 		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
455 #if (MJUMPAGESIZE > MCLBYTES)
456 		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
457 #endif
458 		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
459 	m->m_pkthdr.len = m->m_len = len;
460 	m->m_pkthdr.rcvif = NULL;
461 	*mp = m;
462 
463 	if (m->m_len < hlen) {
464 		error = EPERM;
465 		goto bad;
466 	}
467 
468 	error = uiomove(mtod(m, u_char *), len, uio);
469 	if (error)
470 		goto bad;
471 
472 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
473 	if (slen == 0) {
474 		error = EPERM;
475 		goto bad;
476 	}
477 
478 	/* Check for multicast destination */
479 	switch (linktype) {
480 	case DLT_EN10MB:
481 		eh = mtod(m, struct ether_header *);
482 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
483 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
484 			    ETHER_ADDR_LEN) == 0)
485 				m->m_flags |= M_BCAST;
486 			else
487 				m->m_flags |= M_MCAST;
488 		}
489 		break;
490 	}
491 
492 	/*
493 	 * Make room for link header, and copy it to sockaddr
494 	 */
495 	if (hlen != 0) {
496 		if (sockp->sa_family == AF_IEEE80211) {
497 			/*
498 			 * Collect true length from the parameter header
499 			 * NB: sockp is known to be zero'd so if we do a
500 			 *     short copy unspecified parameters will be
501 			 *     zero.
502 			 * NB: packet may not be aligned after stripping
503 			 *     bpf params
504 			 * XXX check ibp_vers
505 			 */
506 			p = mtod(m, const struct ieee80211_bpf_params *);
507 			hlen = p->ibp_len;
508 			if (hlen > sizeof(sockp->sa_data)) {
509 				error = EINVAL;
510 				goto bad;
511 			}
512 		}
513 		bcopy(m->m_data, sockp->sa_data, hlen);
514 	}
515 	*hdrlen = hlen;
516 
517 	return (0);
518 bad:
519 	m_freem(m);
520 	return (error);
521 }
522 
523 /*
524  * Attach file to the bpf interface, i.e. make d listen on bp.
525  */
526 static void
527 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
528 {
529 	/*
530 	 * Point d at bp, and add d to the interface's list of listeners.
531 	 * Finally, point the driver's bpf cookie at the interface so
532 	 * it will divert packets to bpf.
533 	 */
534 	BPFIF_LOCK(bp);
535 	d->bd_bif = bp;
536 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
537 
538 	bpf_bpfd_cnt++;
539 	BPFIF_UNLOCK(bp);
540 
541 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
542 }
543 
544 /*
545  * Detach a file from its interface.
546  */
547 static void
548 bpf_detachd(struct bpf_d *d)
549 {
550 	int error;
551 	struct bpf_if *bp;
552 	struct ifnet *ifp;
553 
554 	bp = d->bd_bif;
555 	BPFIF_LOCK(bp);
556 	BPFD_LOCK(d);
557 	ifp = d->bd_bif->bif_ifp;
558 
559 	/*
560 	 * Remove d from the interface's descriptor list.
561 	 */
562 	LIST_REMOVE(d, bd_next);
563 
564 	bpf_bpfd_cnt--;
565 	d->bd_bif = NULL;
566 	BPFD_UNLOCK(d);
567 	BPFIF_UNLOCK(bp);
568 
569 	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
570 
571 	/*
572 	 * Check if this descriptor had requested promiscuous mode.
573 	 * If so, turn it off.
574 	 */
575 	if (d->bd_promisc) {
576 		d->bd_promisc = 0;
577 		CURVNET_SET(ifp->if_vnet);
578 		error = ifpromisc(ifp, 0);
579 		CURVNET_RESTORE();
580 		if (error != 0 && error != ENXIO) {
581 			/*
582 			 * ENXIO can happen if a pccard is unplugged
583 			 * Something is really wrong if we were able to put
584 			 * the driver into promiscuous mode, but can't
585 			 * take it out.
586 			 */
587 			if_printf(bp->bif_ifp,
588 				"bpf_detach: ifpromisc failed (%d)\n", error);
589 		}
590 	}
591 }
592 
593 /*
594  * Close the descriptor by detaching it from its interface,
595  * deallocating its buffers, and marking it free.
596  */
597 static void
598 bpf_dtor(void *data)
599 {
600 	struct bpf_d *d = data;
601 
602 	BPFD_LOCK(d);
603 	if (d->bd_state == BPF_WAITING)
604 		callout_stop(&d->bd_callout);
605 	d->bd_state = BPF_IDLE;
606 	BPFD_UNLOCK(d);
607 	funsetown(&d->bd_sigio);
608 	mtx_lock(&bpf_mtx);
609 	if (d->bd_bif)
610 		bpf_detachd(d);
611 	mtx_unlock(&bpf_mtx);
612 	selwakeuppri(&d->bd_sel, PRINET);
613 #ifdef MAC
614 	mac_bpfdesc_destroy(d);
615 #endif /* MAC */
616 	knlist_destroy(&d->bd_sel.si_note);
617 	bpf_freed(d);
618 	free(d, M_BPF);
619 }
620 
621 /*
622  * Open ethernet device.  Returns ENXIO for illegal minor device number,
623  * EBUSY if file is open by another process.
624  */
625 /* ARGSUSED */
626 static	int
627 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
628 {
629 	struct bpf_d *d;
630 	int error;
631 
632 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
633 	error = devfs_set_cdevpriv(d, bpf_dtor);
634 	if (error != 0) {
635 		free(d, M_BPF);
636 		return (error);
637 	}
638 
639 	/*
640 	 * For historical reasons, perform a one-time initialization call to
641 	 * the buffer routines, even though we're not yet committed to a
642 	 * particular buffer method.
643 	 */
644 	bpf_buffer_init(d);
645 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
646 	d->bd_sig = SIGIO;
647 	d->bd_direction = BPF_D_INOUT;
648 	d->bd_pid = td->td_proc->p_pid;
649 #ifdef MAC
650 	mac_bpfdesc_init(d);
651 	mac_bpfdesc_create(td->td_ucred, d);
652 #endif
653 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
654 	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
655 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
656 
657 	return (0);
658 }
659 
660 /*
661  *  bpfread - read next chunk of packets from buffers
662  */
663 static	int
664 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
665 {
666 	struct bpf_d *d;
667 	int error;
668 	int non_block;
669 	int timed_out;
670 
671 	error = devfs_get_cdevpriv((void **)&d);
672 	if (error != 0)
673 		return (error);
674 
675 	/*
676 	 * Restrict application to use a buffer the same size as
677 	 * as kernel buffers.
678 	 */
679 	if (uio->uio_resid != d->bd_bufsize)
680 		return (EINVAL);
681 
682 	non_block = ((ioflag & O_NONBLOCK) != 0);
683 
684 	BPFD_LOCK(d);
685 	d->bd_pid = curthread->td_proc->p_pid;
686 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
687 		BPFD_UNLOCK(d);
688 		return (EOPNOTSUPP);
689 	}
690 	if (d->bd_state == BPF_WAITING)
691 		callout_stop(&d->bd_callout);
692 	timed_out = (d->bd_state == BPF_TIMED_OUT);
693 	d->bd_state = BPF_IDLE;
694 	/*
695 	 * If the hold buffer is empty, then do a timed sleep, which
696 	 * ends when the timeout expires or when enough packets
697 	 * have arrived to fill the store buffer.
698 	 */
699 	while (d->bd_hbuf == NULL) {
700 		if (d->bd_slen != 0) {
701 			/*
702 			 * A packet(s) either arrived since the previous
703 			 * read or arrived while we were asleep.
704 			 */
705 			if (d->bd_immediate || non_block || timed_out) {
706 				/*
707 				 * Rotate the buffers and return what's here
708 				 * if we are in immediate mode, non-blocking
709 				 * flag is set, or this descriptor timed out.
710 				 */
711 				ROTATE_BUFFERS(d);
712 				break;
713 			}
714 		}
715 
716 		/*
717 		 * No data is available, check to see if the bpf device
718 		 * is still pointed at a real interface.  If not, return
719 		 * ENXIO so that the userland process knows to rebind
720 		 * it before using it again.
721 		 */
722 		if (d->bd_bif == NULL) {
723 			BPFD_UNLOCK(d);
724 			return (ENXIO);
725 		}
726 
727 		if (non_block) {
728 			BPFD_UNLOCK(d);
729 			return (EWOULDBLOCK);
730 		}
731 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
732 		     "bpf", d->bd_rtout);
733 		if (error == EINTR || error == ERESTART) {
734 			BPFD_UNLOCK(d);
735 			return (error);
736 		}
737 		if (error == EWOULDBLOCK) {
738 			/*
739 			 * On a timeout, return what's in the buffer,
740 			 * which may be nothing.  If there is something
741 			 * in the store buffer, we can rotate the buffers.
742 			 */
743 			if (d->bd_hbuf)
744 				/*
745 				 * We filled up the buffer in between
746 				 * getting the timeout and arriving
747 				 * here, so we don't need to rotate.
748 				 */
749 				break;
750 
751 			if (d->bd_slen == 0) {
752 				BPFD_UNLOCK(d);
753 				return (0);
754 			}
755 			ROTATE_BUFFERS(d);
756 			break;
757 		}
758 	}
759 	/*
760 	 * At this point, we know we have something in the hold slot.
761 	 */
762 	BPFD_UNLOCK(d);
763 
764 	/*
765 	 * Move data from hold buffer into user space.
766 	 * We know the entire buffer is transferred since
767 	 * we checked above that the read buffer is bpf_bufsize bytes.
768 	 *
769 	 * XXXRW: More synchronization needed here: what if a second thread
770 	 * issues a read on the same fd at the same time?  Don't want this
771 	 * getting invalidated.
772 	 */
773 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
774 
775 	BPFD_LOCK(d);
776 	d->bd_fbuf = d->bd_hbuf;
777 	d->bd_hbuf = NULL;
778 	d->bd_hlen = 0;
779 	bpf_buf_reclaimed(d);
780 	BPFD_UNLOCK(d);
781 
782 	return (error);
783 }
784 
785 /*
786  * If there are processes sleeping on this descriptor, wake them up.
787  */
788 static __inline void
789 bpf_wakeup(struct bpf_d *d)
790 {
791 
792 	BPFD_LOCK_ASSERT(d);
793 	if (d->bd_state == BPF_WAITING) {
794 		callout_stop(&d->bd_callout);
795 		d->bd_state = BPF_IDLE;
796 	}
797 	wakeup(d);
798 	if (d->bd_async && d->bd_sig && d->bd_sigio)
799 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
800 
801 	selwakeuppri(&d->bd_sel, PRINET);
802 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
803 }
804 
805 static void
806 bpf_timed_out(void *arg)
807 {
808 	struct bpf_d *d = (struct bpf_d *)arg;
809 
810 	BPFD_LOCK(d);
811 	if (d->bd_state == BPF_WAITING) {
812 		d->bd_state = BPF_TIMED_OUT;
813 		if (d->bd_slen != 0)
814 			bpf_wakeup(d);
815 	}
816 	BPFD_UNLOCK(d);
817 }
818 
819 static int
820 bpf_ready(struct bpf_d *d)
821 {
822 
823 	BPFD_LOCK_ASSERT(d);
824 
825 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
826 		return (1);
827 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
828 	    d->bd_slen != 0)
829 		return (1);
830 	return (0);
831 }
832 
833 static int
834 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
835 {
836 	struct bpf_d *d;
837 	struct ifnet *ifp;
838 	struct mbuf *m, *mc;
839 	struct sockaddr dst;
840 	int error, hlen;
841 
842 	error = devfs_get_cdevpriv((void **)&d);
843 	if (error != 0)
844 		return (error);
845 
846 	d->bd_pid = curthread->td_proc->p_pid;
847 	d->bd_wcount++;
848 	if (d->bd_bif == NULL) {
849 		d->bd_wdcount++;
850 		return (ENXIO);
851 	}
852 
853 	ifp = d->bd_bif->bif_ifp;
854 
855 	if ((ifp->if_flags & IFF_UP) == 0) {
856 		d->bd_wdcount++;
857 		return (ENETDOWN);
858 	}
859 
860 	if (uio->uio_resid == 0) {
861 		d->bd_wdcount++;
862 		return (0);
863 	}
864 
865 	bzero(&dst, sizeof(dst));
866 	m = NULL;
867 	hlen = 0;
868 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
869 	    &m, &dst, &hlen, d->bd_wfilter);
870 	if (error) {
871 		d->bd_wdcount++;
872 		return (error);
873 	}
874 	d->bd_wfcount++;
875 	if (d->bd_hdrcmplt)
876 		dst.sa_family = pseudo_AF_HDRCMPLT;
877 
878 	if (d->bd_feedback) {
879 		mc = m_dup(m, M_DONTWAIT);
880 		if (mc != NULL)
881 			mc->m_pkthdr.rcvif = ifp;
882 		/* Set M_PROMISC for outgoing packets to be discarded. */
883 		if (d->bd_direction == BPF_D_INOUT)
884 			m->m_flags |= M_PROMISC;
885 	} else
886 		mc = NULL;
887 
888 	m->m_pkthdr.len -= hlen;
889 	m->m_len -= hlen;
890 	m->m_data += hlen;	/* XXX */
891 
892 	CURVNET_SET(ifp->if_vnet);
893 #ifdef MAC
894 	BPFD_LOCK(d);
895 	mac_bpfdesc_create_mbuf(d, m);
896 	if (mc != NULL)
897 		mac_bpfdesc_create_mbuf(d, mc);
898 	BPFD_UNLOCK(d);
899 #endif
900 
901 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
902 	if (error)
903 		d->bd_wdcount++;
904 
905 	if (mc != NULL) {
906 		if (error == 0)
907 			(*ifp->if_input)(ifp, mc);
908 		else
909 			m_freem(mc);
910 	}
911 	CURVNET_RESTORE();
912 
913 	return (error);
914 }
915 
916 /*
917  * Reset a descriptor by flushing its packet buffer and clearing the receive
918  * and drop counts.  This is doable for kernel-only buffers, but with
919  * zero-copy buffers, we can't write to (or rotate) buffers that are
920  * currently owned by userspace.  It would be nice if we could encapsulate
921  * this logic in the buffer code rather than here.
922  */
923 static void
924 reset_d(struct bpf_d *d)
925 {
926 
927 	mtx_assert(&d->bd_mtx, MA_OWNED);
928 
929 	if ((d->bd_hbuf != NULL) &&
930 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
931 		/* Free the hold buffer. */
932 		d->bd_fbuf = d->bd_hbuf;
933 		d->bd_hbuf = NULL;
934 		d->bd_hlen = 0;
935 		bpf_buf_reclaimed(d);
936 	}
937 	if (bpf_canwritebuf(d))
938 		d->bd_slen = 0;
939 	d->bd_rcount = 0;
940 	d->bd_dcount = 0;
941 	d->bd_fcount = 0;
942 	d->bd_wcount = 0;
943 	d->bd_wfcount = 0;
944 	d->bd_wdcount = 0;
945 	d->bd_zcopy = 0;
946 }
947 
948 /*
949  *  FIONREAD		Check for read packet available.
950  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
951  *  BIOCGBLEN		Get buffer len [for read()].
952  *  BIOCSETF		Set read filter.
953  *  BIOCSETFNR		Set read filter without resetting descriptor.
954  *  BIOCSETWF		Set write filter.
955  *  BIOCFLUSH		Flush read packet buffer.
956  *  BIOCPROMISC		Put interface into promiscuous mode.
957  *  BIOCGDLT		Get link layer type.
958  *  BIOCGETIF		Get interface name.
959  *  BIOCSETIF		Set interface.
960  *  BIOCSRTIMEOUT	Set read timeout.
961  *  BIOCGRTIMEOUT	Get read timeout.
962  *  BIOCGSTATS		Get packet stats.
963  *  BIOCIMMEDIATE	Set immediate mode.
964  *  BIOCVERSION		Get filter language version.
965  *  BIOCGHDRCMPLT	Get "header already complete" flag
966  *  BIOCSHDRCMPLT	Set "header already complete" flag
967  *  BIOCGDIRECTION	Get packet direction flag
968  *  BIOCSDIRECTION	Set packet direction flag
969  *  BIOCLOCK		Set "locked" flag
970  *  BIOCFEEDBACK	Set packet feedback mode.
971  *  BIOCSETZBUF		Set current zero-copy buffer locations.
972  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
973  *  BIOCROTZBUF		Force rotation of zero-copy buffer
974  *  BIOCSETBUFMODE	Set buffer mode.
975  *  BIOCGETBUFMODE	Get current buffer mode.
976  */
977 /* ARGSUSED */
978 static	int
979 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
980     struct thread *td)
981 {
982 	struct bpf_d *d;
983 	int error;
984 
985 	error = devfs_get_cdevpriv((void **)&d);
986 	if (error != 0)
987 		return (error);
988 
989 	/*
990 	 * Refresh PID associated with this descriptor.
991 	 */
992 	BPFD_LOCK(d);
993 	d->bd_pid = td->td_proc->p_pid;
994 	if (d->bd_state == BPF_WAITING)
995 		callout_stop(&d->bd_callout);
996 	d->bd_state = BPF_IDLE;
997 	BPFD_UNLOCK(d);
998 
999 	if (d->bd_locked == 1) {
1000 		switch (cmd) {
1001 		case BIOCGBLEN:
1002 		case BIOCFLUSH:
1003 		case BIOCGDLT:
1004 		case BIOCGDLTLIST:
1005 		case BIOCGETIF:
1006 		case BIOCGRTIMEOUT:
1007 		case BIOCGSTATS:
1008 		case BIOCVERSION:
1009 		case BIOCGRSIG:
1010 		case BIOCGHDRCMPLT:
1011 		case BIOCFEEDBACK:
1012 		case FIONREAD:
1013 		case BIOCLOCK:
1014 		case BIOCSRTIMEOUT:
1015 		case BIOCIMMEDIATE:
1016 		case TIOCGPGRP:
1017 		case BIOCROTZBUF:
1018 			break;
1019 		default:
1020 			return (EPERM);
1021 		}
1022 	}
1023 	CURVNET_SET(TD_TO_VNET(td));
1024 	switch (cmd) {
1025 
1026 	default:
1027 		error = EINVAL;
1028 		break;
1029 
1030 	/*
1031 	 * Check for read packet available.
1032 	 */
1033 	case FIONREAD:
1034 		{
1035 			int n;
1036 
1037 			BPFD_LOCK(d);
1038 			n = d->bd_slen;
1039 			if (d->bd_hbuf)
1040 				n += d->bd_hlen;
1041 			BPFD_UNLOCK(d);
1042 
1043 			*(int *)addr = n;
1044 			break;
1045 		}
1046 
1047 	case SIOCGIFADDR:
1048 		{
1049 			struct ifnet *ifp;
1050 
1051 			if (d->bd_bif == NULL)
1052 				error = EINVAL;
1053 			else {
1054 				ifp = d->bd_bif->bif_ifp;
1055 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1056 			}
1057 			break;
1058 		}
1059 
1060 	/*
1061 	 * Get buffer len [for read()].
1062 	 */
1063 	case BIOCGBLEN:
1064 		*(u_int *)addr = d->bd_bufsize;
1065 		break;
1066 
1067 	/*
1068 	 * Set buffer length.
1069 	 */
1070 	case BIOCSBLEN:
1071 		error = bpf_ioctl_sblen(d, (u_int *)addr);
1072 		break;
1073 
1074 	/*
1075 	 * Set link layer read filter.
1076 	 */
1077 	case BIOCSETF:
1078 	case BIOCSETFNR:
1079 	case BIOCSETWF:
1080 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1081 		break;
1082 
1083 	/*
1084 	 * Flush read packet buffer.
1085 	 */
1086 	case BIOCFLUSH:
1087 		BPFD_LOCK(d);
1088 		reset_d(d);
1089 		BPFD_UNLOCK(d);
1090 		break;
1091 
1092 	/*
1093 	 * Put interface into promiscuous mode.
1094 	 */
1095 	case BIOCPROMISC:
1096 		if (d->bd_bif == NULL) {
1097 			/*
1098 			 * No interface attached yet.
1099 			 */
1100 			error = EINVAL;
1101 			break;
1102 		}
1103 		if (d->bd_promisc == 0) {
1104 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1105 			if (error == 0)
1106 				d->bd_promisc = 1;
1107 		}
1108 		break;
1109 
1110 	/*
1111 	 * Get current data link type.
1112 	 */
1113 	case BIOCGDLT:
1114 		if (d->bd_bif == NULL)
1115 			error = EINVAL;
1116 		else
1117 			*(u_int *)addr = d->bd_bif->bif_dlt;
1118 		break;
1119 
1120 	/*
1121 	 * Get a list of supported data link types.
1122 	 */
1123 	case BIOCGDLTLIST:
1124 		if (d->bd_bif == NULL)
1125 			error = EINVAL;
1126 		else
1127 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1128 		break;
1129 
1130 	/*
1131 	 * Set data link type.
1132 	 */
1133 	case BIOCSDLT:
1134 		if (d->bd_bif == NULL)
1135 			error = EINVAL;
1136 		else
1137 			error = bpf_setdlt(d, *(u_int *)addr);
1138 		break;
1139 
1140 	/*
1141 	 * Get interface name.
1142 	 */
1143 	case BIOCGETIF:
1144 		if (d->bd_bif == NULL)
1145 			error = EINVAL;
1146 		else {
1147 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1148 			struct ifreq *const ifr = (struct ifreq *)addr;
1149 
1150 			strlcpy(ifr->ifr_name, ifp->if_xname,
1151 			    sizeof(ifr->ifr_name));
1152 		}
1153 		break;
1154 
1155 	/*
1156 	 * Set interface.
1157 	 */
1158 	case BIOCSETIF:
1159 		error = bpf_setif(d, (struct ifreq *)addr);
1160 		break;
1161 
1162 	/*
1163 	 * Set read timeout.
1164 	 */
1165 	case BIOCSRTIMEOUT:
1166 		{
1167 			struct timeval *tv = (struct timeval *)addr;
1168 
1169 			/*
1170 			 * Subtract 1 tick from tvtohz() since this isn't
1171 			 * a one-shot timer.
1172 			 */
1173 			if ((error = itimerfix(tv)) == 0)
1174 				d->bd_rtout = tvtohz(tv) - 1;
1175 			break;
1176 		}
1177 
1178 	/*
1179 	 * Get read timeout.
1180 	 */
1181 	case BIOCGRTIMEOUT:
1182 		{
1183 			struct timeval *tv = (struct timeval *)addr;
1184 
1185 			tv->tv_sec = d->bd_rtout / hz;
1186 			tv->tv_usec = (d->bd_rtout % hz) * tick;
1187 			break;
1188 		}
1189 
1190 	/*
1191 	 * Get packet stats.
1192 	 */
1193 	case BIOCGSTATS:
1194 		{
1195 			struct bpf_stat *bs = (struct bpf_stat *)addr;
1196 
1197 			/* XXXCSJP overflow */
1198 			bs->bs_recv = d->bd_rcount;
1199 			bs->bs_drop = d->bd_dcount;
1200 			break;
1201 		}
1202 
1203 	/*
1204 	 * Set immediate mode.
1205 	 */
1206 	case BIOCIMMEDIATE:
1207 		d->bd_immediate = *(u_int *)addr;
1208 		break;
1209 
1210 	case BIOCVERSION:
1211 		{
1212 			struct bpf_version *bv = (struct bpf_version *)addr;
1213 
1214 			bv->bv_major = BPF_MAJOR_VERSION;
1215 			bv->bv_minor = BPF_MINOR_VERSION;
1216 			break;
1217 		}
1218 
1219 	/*
1220 	 * Get "header already complete" flag
1221 	 */
1222 	case BIOCGHDRCMPLT:
1223 		*(u_int *)addr = d->bd_hdrcmplt;
1224 		break;
1225 
1226 	/*
1227 	 * Set "header already complete" flag
1228 	 */
1229 	case BIOCSHDRCMPLT:
1230 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1231 		break;
1232 
1233 	/*
1234 	 * Get packet direction flag
1235 	 */
1236 	case BIOCGDIRECTION:
1237 		*(u_int *)addr = d->bd_direction;
1238 		break;
1239 
1240 	/*
1241 	 * Set packet direction flag
1242 	 */
1243 	case BIOCSDIRECTION:
1244 		{
1245 			u_int	direction;
1246 
1247 			direction = *(u_int *)addr;
1248 			switch (direction) {
1249 			case BPF_D_IN:
1250 			case BPF_D_INOUT:
1251 			case BPF_D_OUT:
1252 				d->bd_direction = direction;
1253 				break;
1254 			default:
1255 				error = EINVAL;
1256 			}
1257 		}
1258 		break;
1259 
1260 	case BIOCFEEDBACK:
1261 		d->bd_feedback = *(u_int *)addr;
1262 		break;
1263 
1264 	case BIOCLOCK:
1265 		d->bd_locked = 1;
1266 		break;
1267 
1268 	case FIONBIO:		/* Non-blocking I/O */
1269 		break;
1270 
1271 	case FIOASYNC:		/* Send signal on receive packets */
1272 		d->bd_async = *(int *)addr;
1273 		break;
1274 
1275 	case FIOSETOWN:
1276 		error = fsetown(*(int *)addr, &d->bd_sigio);
1277 		break;
1278 
1279 	case FIOGETOWN:
1280 		*(int *)addr = fgetown(&d->bd_sigio);
1281 		break;
1282 
1283 	/* This is deprecated, FIOSETOWN should be used instead. */
1284 	case TIOCSPGRP:
1285 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1286 		break;
1287 
1288 	/* This is deprecated, FIOGETOWN should be used instead. */
1289 	case TIOCGPGRP:
1290 		*(int *)addr = -fgetown(&d->bd_sigio);
1291 		break;
1292 
1293 	case BIOCSRSIG:		/* Set receive signal */
1294 		{
1295 			u_int sig;
1296 
1297 			sig = *(u_int *)addr;
1298 
1299 			if (sig >= NSIG)
1300 				error = EINVAL;
1301 			else
1302 				d->bd_sig = sig;
1303 			break;
1304 		}
1305 	case BIOCGRSIG:
1306 		*(u_int *)addr = d->bd_sig;
1307 		break;
1308 
1309 	case BIOCGETBUFMODE:
1310 		*(u_int *)addr = d->bd_bufmode;
1311 		break;
1312 
1313 	case BIOCSETBUFMODE:
1314 		/*
1315 		 * Allow the buffering mode to be changed as long as we
1316 		 * haven't yet committed to a particular mode.  Our
1317 		 * definition of commitment, for now, is whether or not a
1318 		 * buffer has been allocated or an interface attached, since
1319 		 * that's the point where things get tricky.
1320 		 */
1321 		switch (*(u_int *)addr) {
1322 		case BPF_BUFMODE_BUFFER:
1323 			break;
1324 
1325 		case BPF_BUFMODE_ZBUF:
1326 			if (bpf_zerocopy_enable)
1327 				break;
1328 			/* FALLSTHROUGH */
1329 
1330 		default:
1331 			return (EINVAL);
1332 		}
1333 
1334 		BPFD_LOCK(d);
1335 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1336 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1337 			BPFD_UNLOCK(d);
1338 			return (EBUSY);
1339 		}
1340 		d->bd_bufmode = *(u_int *)addr;
1341 		BPFD_UNLOCK(d);
1342 		break;
1343 
1344 	case BIOCGETZMAX:
1345 		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1346 
1347 	case BIOCSETZBUF:
1348 		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1349 
1350 	case BIOCROTZBUF:
1351 		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1352 	}
1353 	CURVNET_RESTORE();
1354 	return (error);
1355 }
1356 
1357 /*
1358  * Set d's packet filter program to fp.  If this file already has a filter,
1359  * free it and replace it.  Returns EINVAL for bogus requests.
1360  */
1361 static int
1362 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1363 {
1364 	struct bpf_insn *fcode, *old;
1365 	u_int wfilter, flen, size;
1366 #ifdef BPF_JITTER
1367 	bpf_jit_filter *ofunc;
1368 #endif
1369 
1370 	if (cmd == BIOCSETWF) {
1371 		old = d->bd_wfilter;
1372 		wfilter = 1;
1373 #ifdef BPF_JITTER
1374 		ofunc = NULL;
1375 #endif
1376 	} else {
1377 		wfilter = 0;
1378 		old = d->bd_rfilter;
1379 #ifdef BPF_JITTER
1380 		ofunc = d->bd_bfilter;
1381 #endif
1382 	}
1383 	if (fp->bf_insns == NULL) {
1384 		if (fp->bf_len != 0)
1385 			return (EINVAL);
1386 		BPFD_LOCK(d);
1387 		if (wfilter)
1388 			d->bd_wfilter = NULL;
1389 		else {
1390 			d->bd_rfilter = NULL;
1391 #ifdef BPF_JITTER
1392 			d->bd_bfilter = NULL;
1393 #endif
1394 			if (cmd == BIOCSETF)
1395 				reset_d(d);
1396 		}
1397 		BPFD_UNLOCK(d);
1398 		if (old != NULL)
1399 			free((caddr_t)old, M_BPF);
1400 #ifdef BPF_JITTER
1401 		if (ofunc != NULL)
1402 			bpf_destroy_jit_filter(ofunc);
1403 #endif
1404 		return (0);
1405 	}
1406 	flen = fp->bf_len;
1407 	if (flen > bpf_maxinsns)
1408 		return (EINVAL);
1409 
1410 	size = flen * sizeof(*fp->bf_insns);
1411 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1412 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1413 	    bpf_validate(fcode, (int)flen)) {
1414 		BPFD_LOCK(d);
1415 		if (wfilter)
1416 			d->bd_wfilter = fcode;
1417 		else {
1418 			d->bd_rfilter = fcode;
1419 #ifdef BPF_JITTER
1420 			d->bd_bfilter = bpf_jitter(fcode, flen);
1421 #endif
1422 			if (cmd == BIOCSETF)
1423 				reset_d(d);
1424 		}
1425 		BPFD_UNLOCK(d);
1426 		if (old != NULL)
1427 			free((caddr_t)old, M_BPF);
1428 #ifdef BPF_JITTER
1429 		if (ofunc != NULL)
1430 			bpf_destroy_jit_filter(ofunc);
1431 #endif
1432 
1433 		return (0);
1434 	}
1435 	free((caddr_t)fcode, M_BPF);
1436 	return (EINVAL);
1437 }
1438 
1439 /*
1440  * Detach a file from its current interface (if attached at all) and attach
1441  * to the interface indicated by the name stored in ifr.
1442  * Return an errno or 0.
1443  */
1444 static int
1445 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1446 {
1447 	struct bpf_if *bp;
1448 	struct ifnet *theywant;
1449 
1450 	theywant = ifunit(ifr->ifr_name);
1451 	if (theywant == NULL || theywant->if_bpf == NULL)
1452 		return (ENXIO);
1453 
1454 	bp = theywant->if_bpf;
1455 
1456 	/*
1457 	 * Behavior here depends on the buffering model.  If we're using
1458 	 * kernel memory buffers, then we can allocate them here.  If we're
1459 	 * using zero-copy, then the user process must have registered
1460 	 * buffers by the time we get here.  If not, return an error.
1461 	 *
1462 	 * XXXRW: There are locking issues here with multi-threaded use: what
1463 	 * if two threads try to set the interface at once?
1464 	 */
1465 	switch (d->bd_bufmode) {
1466 	case BPF_BUFMODE_BUFFER:
1467 		if (d->bd_sbuf == NULL)
1468 			bpf_buffer_alloc(d);
1469 		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1470 		break;
1471 
1472 	case BPF_BUFMODE_ZBUF:
1473 		if (d->bd_sbuf == NULL)
1474 			return (EINVAL);
1475 		break;
1476 
1477 	default:
1478 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1479 	}
1480 	if (bp != d->bd_bif) {
1481 		if (d->bd_bif)
1482 			/*
1483 			 * Detach if attached to something else.
1484 			 */
1485 			bpf_detachd(d);
1486 
1487 		bpf_attachd(d, bp);
1488 	}
1489 	BPFD_LOCK(d);
1490 	reset_d(d);
1491 	BPFD_UNLOCK(d);
1492 	return (0);
1493 }
1494 
1495 /*
1496  * Support for select() and poll() system calls
1497  *
1498  * Return true iff the specific operation will not block indefinitely.
1499  * Otherwise, return false but make a note that a selwakeup() must be done.
1500  */
1501 static int
1502 bpfpoll(struct cdev *dev, int events, struct thread *td)
1503 {
1504 	struct bpf_d *d;
1505 	int revents;
1506 
1507 	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1508 		return (events &
1509 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1510 
1511 	/*
1512 	 * Refresh PID associated with this descriptor.
1513 	 */
1514 	revents = events & (POLLOUT | POLLWRNORM);
1515 	BPFD_LOCK(d);
1516 	d->bd_pid = td->td_proc->p_pid;
1517 	if (events & (POLLIN | POLLRDNORM)) {
1518 		if (bpf_ready(d))
1519 			revents |= events & (POLLIN | POLLRDNORM);
1520 		else {
1521 			selrecord(td, &d->bd_sel);
1522 			/* Start the read timeout if necessary. */
1523 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1524 				callout_reset(&d->bd_callout, d->bd_rtout,
1525 				    bpf_timed_out, d);
1526 				d->bd_state = BPF_WAITING;
1527 			}
1528 		}
1529 	}
1530 	BPFD_UNLOCK(d);
1531 	return (revents);
1532 }
1533 
1534 /*
1535  * Support for kevent() system call.  Register EVFILT_READ filters and
1536  * reject all others.
1537  */
1538 int
1539 bpfkqfilter(struct cdev *dev, struct knote *kn)
1540 {
1541 	struct bpf_d *d;
1542 
1543 	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1544 	    kn->kn_filter != EVFILT_READ)
1545 		return (1);
1546 
1547 	/*
1548 	 * Refresh PID associated with this descriptor.
1549 	 */
1550 	BPFD_LOCK(d);
1551 	d->bd_pid = curthread->td_proc->p_pid;
1552 	kn->kn_fop = &bpfread_filtops;
1553 	kn->kn_hook = d;
1554 	knlist_add(&d->bd_sel.si_note, kn, 1);
1555 	BPFD_UNLOCK(d);
1556 
1557 	return (0);
1558 }
1559 
1560 static void
1561 filt_bpfdetach(struct knote *kn)
1562 {
1563 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1564 
1565 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1566 }
1567 
1568 static int
1569 filt_bpfread(struct knote *kn, long hint)
1570 {
1571 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1572 	int ready;
1573 
1574 	BPFD_LOCK_ASSERT(d);
1575 	ready = bpf_ready(d);
1576 	if (ready) {
1577 		kn->kn_data = d->bd_slen;
1578 		if (d->bd_hbuf)
1579 			kn->kn_data += d->bd_hlen;
1580 	}
1581 	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1582 		callout_reset(&d->bd_callout, d->bd_rtout,
1583 		    bpf_timed_out, d);
1584 		d->bd_state = BPF_WAITING;
1585 	}
1586 
1587 	return (ready);
1588 }
1589 
1590 /*
1591  * Incoming linkage from device drivers.  Process the packet pkt, of length
1592  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1593  * by each process' filter, and if accepted, stashed into the corresponding
1594  * buffer.
1595  */
1596 void
1597 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1598 {
1599 	struct bpf_d *d;
1600 #ifdef BPF_JITTER
1601 	bpf_jit_filter *bf;
1602 #endif
1603 	u_int slen;
1604 	int gottime;
1605 	struct timeval tv;
1606 
1607 	gottime = 0;
1608 	BPFIF_LOCK(bp);
1609 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1610 		BPFD_LOCK(d);
1611 		++d->bd_rcount;
1612 		/*
1613 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1614 		 * way for the caller to indiciate to us whether this packet
1615 		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1616 		 * the interface pointers on the mbuf to figure it out.
1617 		 */
1618 #ifdef BPF_JITTER
1619 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1620 		if (bf != NULL)
1621 			slen = (*(bf->func))(pkt, pktlen, pktlen);
1622 		else
1623 #endif
1624 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1625 		if (slen != 0) {
1626 			d->bd_fcount++;
1627 			if (!gottime) {
1628 				microtime(&tv);
1629 				gottime = 1;
1630 			}
1631 #ifdef MAC
1632 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1633 #endif
1634 				catchpacket(d, pkt, pktlen, slen,
1635 				    bpf_append_bytes, &tv);
1636 		}
1637 		BPFD_UNLOCK(d);
1638 	}
1639 	BPFIF_UNLOCK(bp);
1640 }
1641 
1642 #define	BPF_CHECK_DIRECTION(d, r, i)				\
1643 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1644 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1645 
1646 /*
1647  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1648  */
1649 void
1650 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1651 {
1652 	struct bpf_d *d;
1653 #ifdef BPF_JITTER
1654 	bpf_jit_filter *bf;
1655 #endif
1656 	u_int pktlen, slen;
1657 	int gottime;
1658 	struct timeval tv;
1659 
1660 	/* Skip outgoing duplicate packets. */
1661 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1662 		m->m_flags &= ~M_PROMISC;
1663 		return;
1664 	}
1665 
1666 	gottime = 0;
1667 
1668 	pktlen = m_length(m, NULL);
1669 
1670 	BPFIF_LOCK(bp);
1671 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1672 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1673 			continue;
1674 		BPFD_LOCK(d);
1675 		++d->bd_rcount;
1676 #ifdef BPF_JITTER
1677 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1678 		/* XXX We cannot handle multiple mbufs. */
1679 		if (bf != NULL && m->m_next == NULL)
1680 			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
1681 		else
1682 #endif
1683 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1684 		if (slen != 0) {
1685 			d->bd_fcount++;
1686 			if (!gottime) {
1687 				microtime(&tv);
1688 				gottime = 1;
1689 			}
1690 #ifdef MAC
1691 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1692 #endif
1693 				catchpacket(d, (u_char *)m, pktlen, slen,
1694 				    bpf_append_mbuf, &tv);
1695 		}
1696 		BPFD_UNLOCK(d);
1697 	}
1698 	BPFIF_UNLOCK(bp);
1699 }
1700 
1701 /*
1702  * Incoming linkage from device drivers, when packet is in
1703  * an mbuf chain and to be prepended by a contiguous header.
1704  */
1705 void
1706 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1707 {
1708 	struct mbuf mb;
1709 	struct bpf_d *d;
1710 	u_int pktlen, slen;
1711 	int gottime;
1712 	struct timeval tv;
1713 
1714 	/* Skip outgoing duplicate packets. */
1715 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1716 		m->m_flags &= ~M_PROMISC;
1717 		return;
1718 	}
1719 
1720 	gottime = 0;
1721 
1722 	pktlen = m_length(m, NULL);
1723 	/*
1724 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1725 	 * Note that we cut corners here; we only setup what's
1726 	 * absolutely needed--this mbuf should never go anywhere else.
1727 	 */
1728 	mb.m_next = m;
1729 	mb.m_data = data;
1730 	mb.m_len = dlen;
1731 	pktlen += dlen;
1732 
1733 	BPFIF_LOCK(bp);
1734 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1735 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1736 			continue;
1737 		BPFD_LOCK(d);
1738 		++d->bd_rcount;
1739 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1740 		if (slen != 0) {
1741 			d->bd_fcount++;
1742 			if (!gottime) {
1743 				microtime(&tv);
1744 				gottime = 1;
1745 			}
1746 #ifdef MAC
1747 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1748 #endif
1749 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1750 				    bpf_append_mbuf, &tv);
1751 		}
1752 		BPFD_UNLOCK(d);
1753 	}
1754 	BPFIF_UNLOCK(bp);
1755 }
1756 
1757 #undef	BPF_CHECK_DIRECTION
1758 
1759 /*
1760  * Move the packet data from interface memory (pkt) into the
1761  * store buffer.  "cpfn" is the routine called to do the actual data
1762  * transfer.  bcopy is passed in to copy contiguous chunks, while
1763  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1764  * pkt is really an mbuf.
1765  */
1766 static void
1767 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1768     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1769     struct timeval *tv)
1770 {
1771 	struct bpf_hdr hdr;
1772 	int totlen, curlen;
1773 	int hdrlen = d->bd_bif->bif_hdrlen;
1774 	int do_wakeup = 0;
1775 
1776 	BPFD_LOCK_ASSERT(d);
1777 
1778 	/*
1779 	 * Detect whether user space has released a buffer back to us, and if
1780 	 * so, move it from being a hold buffer to a free buffer.  This may
1781 	 * not be the best place to do it (for example, we might only want to
1782 	 * run this check if we need the space), but for now it's a reliable
1783 	 * spot to do it.
1784 	 */
1785 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1786 		d->bd_fbuf = d->bd_hbuf;
1787 		d->bd_hbuf = NULL;
1788 		d->bd_hlen = 0;
1789 		bpf_buf_reclaimed(d);
1790 	}
1791 
1792 	/*
1793 	 * Figure out how many bytes to move.  If the packet is
1794 	 * greater or equal to the snapshot length, transfer that
1795 	 * much.  Otherwise, transfer the whole packet (unless
1796 	 * we hit the buffer size limit).
1797 	 */
1798 	totlen = hdrlen + min(snaplen, pktlen);
1799 	if (totlen > d->bd_bufsize)
1800 		totlen = d->bd_bufsize;
1801 
1802 	/*
1803 	 * Round up the end of the previous packet to the next longword.
1804 	 *
1805 	 * Drop the packet if there's no room and no hope of room
1806 	 * If the packet would overflow the storage buffer or the storage
1807 	 * buffer is considered immutable by the buffer model, try to rotate
1808 	 * the buffer and wakeup pending processes.
1809 	 */
1810 	curlen = BPF_WORDALIGN(d->bd_slen);
1811 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1812 		if (d->bd_fbuf == NULL) {
1813 			/*
1814 			 * There's no room in the store buffer, and no
1815 			 * prospect of room, so drop the packet.  Notify the
1816 			 * buffer model.
1817 			 */
1818 			bpf_buffull(d);
1819 			++d->bd_dcount;
1820 			return;
1821 		}
1822 		ROTATE_BUFFERS(d);
1823 		do_wakeup = 1;
1824 		curlen = 0;
1825 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1826 		/*
1827 		 * Immediate mode is set, or the read timeout has already
1828 		 * expired during a select call.  A packet arrived, so the
1829 		 * reader should be woken up.
1830 		 */
1831 		do_wakeup = 1;
1832 
1833 	/*
1834 	 * Append the bpf header.  Note we append the actual header size, but
1835 	 * move forward the length of the header plus padding.
1836 	 */
1837 	bzero(&hdr, sizeof(hdr));
1838 	hdr.bh_tstamp = *tv;
1839 	hdr.bh_datalen = pktlen;
1840 	hdr.bh_hdrlen = hdrlen;
1841 	hdr.bh_caplen = totlen - hdrlen;
1842 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1843 
1844 	/*
1845 	 * Copy the packet data into the store buffer and update its length.
1846 	 */
1847 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1848 	d->bd_slen = curlen + totlen;
1849 
1850 	if (do_wakeup)
1851 		bpf_wakeup(d);
1852 }
1853 
1854 /*
1855  * Free buffers currently in use by a descriptor.
1856  * Called on close.
1857  */
1858 static void
1859 bpf_freed(struct bpf_d *d)
1860 {
1861 
1862 	/*
1863 	 * We don't need to lock out interrupts since this descriptor has
1864 	 * been detached from its interface and it yet hasn't been marked
1865 	 * free.
1866 	 */
1867 	bpf_free(d);
1868 	if (d->bd_rfilter) {
1869 		free((caddr_t)d->bd_rfilter, M_BPF);
1870 #ifdef BPF_JITTER
1871 		bpf_destroy_jit_filter(d->bd_bfilter);
1872 #endif
1873 	}
1874 	if (d->bd_wfilter)
1875 		free((caddr_t)d->bd_wfilter, M_BPF);
1876 	mtx_destroy(&d->bd_mtx);
1877 }
1878 
1879 /*
1880  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1881  * fixed size of the link header (variable length headers not yet supported).
1882  */
1883 void
1884 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1885 {
1886 
1887 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1888 }
1889 
1890 /*
1891  * Attach an interface to bpf.  ifp is a pointer to the structure
1892  * defining the interface to be attached, dlt is the link layer type,
1893  * and hdrlen is the fixed size of the link header (variable length
1894  * headers are not yet supporrted).
1895  */
1896 void
1897 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1898 {
1899 	struct bpf_if *bp;
1900 
1901 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1902 	if (bp == NULL)
1903 		panic("bpfattach");
1904 
1905 	LIST_INIT(&bp->bif_dlist);
1906 	bp->bif_ifp = ifp;
1907 	bp->bif_dlt = dlt;
1908 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1909 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1910 	*driverp = bp;
1911 
1912 	mtx_lock(&bpf_mtx);
1913 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1914 	mtx_unlock(&bpf_mtx);
1915 
1916 	/*
1917 	 * Compute the length of the bpf header.  This is not necessarily
1918 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1919 	 * that the network layer header begins on a longword boundary (for
1920 	 * performance reasons and to alleviate alignment restrictions).
1921 	 */
1922 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1923 
1924 	if (bootverbose)
1925 		if_printf(ifp, "bpf attached\n");
1926 }
1927 
1928 /*
1929  * Detach bpf from an interface.  This involves detaching each descriptor
1930  * associated with the interface, and leaving bd_bif NULL.  Notify each
1931  * descriptor as it's detached so that any sleepers wake up and get
1932  * ENXIO.
1933  */
1934 void
1935 bpfdetach(struct ifnet *ifp)
1936 {
1937 	struct bpf_if	*bp;
1938 	struct bpf_d	*d;
1939 
1940 	/* Locate BPF interface information */
1941 	mtx_lock(&bpf_mtx);
1942 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1943 		if (ifp == bp->bif_ifp)
1944 			break;
1945 	}
1946 
1947 	/* Interface wasn't attached */
1948 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1949 		mtx_unlock(&bpf_mtx);
1950 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1951 		return;
1952 	}
1953 
1954 	LIST_REMOVE(bp, bif_next);
1955 	mtx_unlock(&bpf_mtx);
1956 
1957 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1958 		bpf_detachd(d);
1959 		BPFD_LOCK(d);
1960 		bpf_wakeup(d);
1961 		BPFD_UNLOCK(d);
1962 	}
1963 
1964 	mtx_destroy(&bp->bif_mtx);
1965 	free(bp, M_BPF);
1966 }
1967 
1968 /*
1969  * Get a list of available data link type of the interface.
1970  */
1971 static int
1972 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1973 {
1974 	int n, error;
1975 	struct ifnet *ifp;
1976 	struct bpf_if *bp;
1977 
1978 	ifp = d->bd_bif->bif_ifp;
1979 	n = 0;
1980 	error = 0;
1981 	mtx_lock(&bpf_mtx);
1982 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1983 		if (bp->bif_ifp != ifp)
1984 			continue;
1985 		if (bfl->bfl_list != NULL) {
1986 			if (n >= bfl->bfl_len) {
1987 				mtx_unlock(&bpf_mtx);
1988 				return (ENOMEM);
1989 			}
1990 			error = copyout(&bp->bif_dlt,
1991 			    bfl->bfl_list + n, sizeof(u_int));
1992 		}
1993 		n++;
1994 	}
1995 	mtx_unlock(&bpf_mtx);
1996 	bfl->bfl_len = n;
1997 	return (error);
1998 }
1999 
2000 /*
2001  * Set the data link type of a BPF instance.
2002  */
2003 static int
2004 bpf_setdlt(struct bpf_d *d, u_int dlt)
2005 {
2006 	int error, opromisc;
2007 	struct ifnet *ifp;
2008 	struct bpf_if *bp;
2009 
2010 	if (d->bd_bif->bif_dlt == dlt)
2011 		return (0);
2012 	ifp = d->bd_bif->bif_ifp;
2013 	mtx_lock(&bpf_mtx);
2014 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2015 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2016 			break;
2017 	}
2018 	mtx_unlock(&bpf_mtx);
2019 	if (bp != NULL) {
2020 		opromisc = d->bd_promisc;
2021 		bpf_detachd(d);
2022 		bpf_attachd(d, bp);
2023 		BPFD_LOCK(d);
2024 		reset_d(d);
2025 		BPFD_UNLOCK(d);
2026 		if (opromisc) {
2027 			error = ifpromisc(bp->bif_ifp, 1);
2028 			if (error)
2029 				if_printf(bp->bif_ifp,
2030 					"bpf_setdlt: ifpromisc failed (%d)\n",
2031 					error);
2032 			else
2033 				d->bd_promisc = 1;
2034 		}
2035 	}
2036 	return (bp == NULL ? EINVAL : 0);
2037 }
2038 
2039 static void
2040 bpf_drvinit(void *unused)
2041 {
2042 	struct cdev *dev;
2043 
2044 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2045 	LIST_INIT(&bpf_iflist);
2046 
2047 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2048 	/* For compatibility */
2049 	make_dev_alias(dev, "bpf0");
2050 }
2051 
2052 /*
2053  * Zero out the various packet counters associated with all of the bpf
2054  * descriptors.  At some point, we will probably want to get a bit more
2055  * granular and allow the user to specify descriptors to be zeroed.
2056  */
2057 static void
2058 bpf_zero_counters(void)
2059 {
2060 	struct bpf_if *bp;
2061 	struct bpf_d *bd;
2062 
2063 	mtx_lock(&bpf_mtx);
2064 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2065 		BPFIF_LOCK(bp);
2066 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2067 			BPFD_LOCK(bd);
2068 			bd->bd_rcount = 0;
2069 			bd->bd_dcount = 0;
2070 			bd->bd_fcount = 0;
2071 			bd->bd_wcount = 0;
2072 			bd->bd_wfcount = 0;
2073 			bd->bd_zcopy = 0;
2074 			BPFD_UNLOCK(bd);
2075 		}
2076 		BPFIF_UNLOCK(bp);
2077 	}
2078 	mtx_unlock(&bpf_mtx);
2079 }
2080 
2081 static void
2082 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2083 {
2084 
2085 	bzero(d, sizeof(*d));
2086 	BPFD_LOCK_ASSERT(bd);
2087 	d->bd_structsize = sizeof(*d);
2088 	d->bd_immediate = bd->bd_immediate;
2089 	d->bd_promisc = bd->bd_promisc;
2090 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2091 	d->bd_direction = bd->bd_direction;
2092 	d->bd_feedback = bd->bd_feedback;
2093 	d->bd_async = bd->bd_async;
2094 	d->bd_rcount = bd->bd_rcount;
2095 	d->bd_dcount = bd->bd_dcount;
2096 	d->bd_fcount = bd->bd_fcount;
2097 	d->bd_sig = bd->bd_sig;
2098 	d->bd_slen = bd->bd_slen;
2099 	d->bd_hlen = bd->bd_hlen;
2100 	d->bd_bufsize = bd->bd_bufsize;
2101 	d->bd_pid = bd->bd_pid;
2102 	strlcpy(d->bd_ifname,
2103 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2104 	d->bd_locked = bd->bd_locked;
2105 	d->bd_wcount = bd->bd_wcount;
2106 	d->bd_wdcount = bd->bd_wdcount;
2107 	d->bd_wfcount = bd->bd_wfcount;
2108 	d->bd_zcopy = bd->bd_zcopy;
2109 	d->bd_bufmode = bd->bd_bufmode;
2110 }
2111 
2112 static int
2113 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2114 {
2115 	struct xbpf_d *xbdbuf, *xbd, zerostats;
2116 	int index, error;
2117 	struct bpf_if *bp;
2118 	struct bpf_d *bd;
2119 
2120 	/*
2121 	 * XXX This is not technically correct. It is possible for non
2122 	 * privileged users to open bpf devices. It would make sense
2123 	 * if the users who opened the devices were able to retrieve
2124 	 * the statistics for them, too.
2125 	 */
2126 	error = priv_check(req->td, PRIV_NET_BPF);
2127 	if (error)
2128 		return (error);
2129 	/*
2130 	 * Check to see if the user is requesting that the counters be
2131 	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2132 	 * as we aren't allowing the user to set the counters currently.
2133 	 */
2134 	if (req->newptr != NULL) {
2135 		if (req->newlen != sizeof(zerostats))
2136 			return (EINVAL);
2137 		bzero(&zerostats, sizeof(zerostats));
2138 		xbd = req->newptr;
2139 		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2140 			return (EINVAL);
2141 		bpf_zero_counters();
2142 		return (0);
2143 	}
2144 	if (req->oldptr == NULL)
2145 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2146 	if (bpf_bpfd_cnt == 0)
2147 		return (SYSCTL_OUT(req, 0, 0));
2148 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2149 	mtx_lock(&bpf_mtx);
2150 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2151 		mtx_unlock(&bpf_mtx);
2152 		free(xbdbuf, M_BPF);
2153 		return (ENOMEM);
2154 	}
2155 	index = 0;
2156 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2157 		BPFIF_LOCK(bp);
2158 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2159 			xbd = &xbdbuf[index++];
2160 			BPFD_LOCK(bd);
2161 			bpfstats_fill_xbpf(xbd, bd);
2162 			BPFD_UNLOCK(bd);
2163 		}
2164 		BPFIF_UNLOCK(bp);
2165 	}
2166 	mtx_unlock(&bpf_mtx);
2167 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2168 	free(xbdbuf, M_BPF);
2169 	return (error);
2170 }
2171 
2172 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2173 
2174 #else /* !DEV_BPF && !NETGRAPH_BPF */
2175 /*
2176  * NOP stubs to allow bpf-using drivers to load and function.
2177  *
2178  * A 'better' implementation would allow the core bpf functionality
2179  * to be loaded at runtime.
2180  */
2181 static struct bpf_if bp_null;
2182 
2183 void
2184 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2185 {
2186 }
2187 
2188 void
2189 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2190 {
2191 }
2192 
2193 void
2194 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2195 {
2196 }
2197 
2198 void
2199 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2200 {
2201 
2202 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2203 }
2204 
2205 void
2206 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2207 {
2208 
2209 	*driverp = &bp_null;
2210 }
2211 
2212 void
2213 bpfdetach(struct ifnet *ifp)
2214 {
2215 }
2216 
2217 u_int
2218 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2219 {
2220 	return -1;	/* "no filter" behaviour */
2221 }
2222 
2223 int
2224 bpf_validate(const struct bpf_insn *f, int len)
2225 {
2226 	return 0;		/* false */
2227 }
2228 
2229 #endif /* !DEV_BPF && !NETGRAPH_BPF */
2230