xref: /freebsd/sys/net/bpf.c (revision b7e9bee71c1ddb18cc3ee5bcaf4a3e2360e1d6b0)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_bpf.h"
41 #include "opt_netgraph.h"
42 
43 #include <sys/types.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/conf.h>
47 #include <sys/fcntl.h>
48 #include <sys/jail.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 
65 #include <sys/socket.h>
66 
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #include <net/bpf_buffer.h>
70 #ifdef BPF_JITTER
71 #include <net/bpf_jitter.h>
72 #endif
73 #include <net/bpf_zerocopy.h>
74 #include <net/bpfdesc.h>
75 #include <net/vnet.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <sys/kernel.h>
80 #include <sys/sysctl.h>
81 
82 #include <net80211/ieee80211_freebsd.h>
83 
84 #include <security/mac/mac_framework.h>
85 
86 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
87 
88 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
89 
90 #define PRINET  26			/* interruptible */
91 
92 /*
93  * bpf_iflist is a list of BPF interface structures, each corresponding to a
94  * specific DLT.  The same network interface might have several BPF interface
95  * structures registered by different layers in the stack (i.e., 802.11
96  * frames, ethernet frames, etc).
97  */
98 static LIST_HEAD(, bpf_if)	bpf_iflist;
99 static struct mtx	bpf_mtx;		/* bpf global lock */
100 static int		bpf_bpfd_cnt;
101 
102 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
103 static void	bpf_detachd(struct bpf_d *);
104 static void	bpf_freed(struct bpf_d *);
105 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
106 		    struct sockaddr *, int *, struct bpf_insn *);
107 static int	bpf_setif(struct bpf_d *, struct ifreq *);
108 static void	bpf_timed_out(void *);
109 static __inline void
110 		bpf_wakeup(struct bpf_d *);
111 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
112 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
113 		    struct timeval *);
114 static void	reset_d(struct bpf_d *);
115 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
116 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
117 static int	bpf_setdlt(struct bpf_d *, u_int);
118 static void	filt_bpfdetach(struct knote *);
119 static int	filt_bpfread(struct knote *, long);
120 static void	bpf_drvinit(void *);
121 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
122 
123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124 int bpf_maxinsns = BPF_MAXINSNS;
125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126     &bpf_maxinsns, 0, "Maximum bpf program instructions");
127 static int bpf_zerocopy_enable = 0;
128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
129     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
131     bpf_stats_sysctl, "bpf statistics portal");
132 
133 static	d_open_t	bpfopen;
134 static	d_read_t	bpfread;
135 static	d_write_t	bpfwrite;
136 static	d_ioctl_t	bpfioctl;
137 static	d_poll_t	bpfpoll;
138 static	d_kqfilter_t	bpfkqfilter;
139 
140 static struct cdevsw bpf_cdevsw = {
141 	.d_version =	D_VERSION,
142 	.d_open =	bpfopen,
143 	.d_read =	bpfread,
144 	.d_write =	bpfwrite,
145 	.d_ioctl =	bpfioctl,
146 	.d_poll =	bpfpoll,
147 	.d_name =	"bpf",
148 	.d_kqfilter =	bpfkqfilter,
149 };
150 
151 static struct filterops bpfread_filtops = {
152 	.f_isfd = 1,
153 	.f_detach = filt_bpfdetach,
154 	.f_event = filt_bpfread,
155 };
156 
157 /*
158  * Wrapper functions for various buffering methods.  If the set of buffer
159  * modes expands, we will probably want to introduce a switch data structure
160  * similar to protosw, et.
161  */
162 static void
163 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
164     u_int len)
165 {
166 
167 	BPFD_LOCK_ASSERT(d);
168 
169 	switch (d->bd_bufmode) {
170 	case BPF_BUFMODE_BUFFER:
171 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
172 
173 	case BPF_BUFMODE_ZBUF:
174 		d->bd_zcopy++;
175 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
176 
177 	default:
178 		panic("bpf_buf_append_bytes");
179 	}
180 }
181 
182 static void
183 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
184     u_int len)
185 {
186 
187 	BPFD_LOCK_ASSERT(d);
188 
189 	switch (d->bd_bufmode) {
190 	case BPF_BUFMODE_BUFFER:
191 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
192 
193 	case BPF_BUFMODE_ZBUF:
194 		d->bd_zcopy++;
195 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
196 
197 	default:
198 		panic("bpf_buf_append_mbuf");
199 	}
200 }
201 
202 /*
203  * This function gets called when the free buffer is re-assigned.
204  */
205 static void
206 bpf_buf_reclaimed(struct bpf_d *d)
207 {
208 
209 	BPFD_LOCK_ASSERT(d);
210 
211 	switch (d->bd_bufmode) {
212 	case BPF_BUFMODE_BUFFER:
213 		return;
214 
215 	case BPF_BUFMODE_ZBUF:
216 		bpf_zerocopy_buf_reclaimed(d);
217 		return;
218 
219 	default:
220 		panic("bpf_buf_reclaimed");
221 	}
222 }
223 
224 /*
225  * If the buffer mechanism has a way to decide that a held buffer can be made
226  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
227  * returned if the buffer can be discarded, (0) is returned if it cannot.
228  */
229 static int
230 bpf_canfreebuf(struct bpf_d *d)
231 {
232 
233 	BPFD_LOCK_ASSERT(d);
234 
235 	switch (d->bd_bufmode) {
236 	case BPF_BUFMODE_ZBUF:
237 		return (bpf_zerocopy_canfreebuf(d));
238 	}
239 	return (0);
240 }
241 
242 /*
243  * Allow the buffer model to indicate that the current store buffer is
244  * immutable, regardless of the appearance of space.  Return (1) if the
245  * buffer is writable, and (0) if not.
246  */
247 static int
248 bpf_canwritebuf(struct bpf_d *d)
249 {
250 
251 	BPFD_LOCK_ASSERT(d);
252 
253 	switch (d->bd_bufmode) {
254 	case BPF_BUFMODE_ZBUF:
255 		return (bpf_zerocopy_canwritebuf(d));
256 	}
257 	return (1);
258 }
259 
260 /*
261  * Notify buffer model that an attempt to write to the store buffer has
262  * resulted in a dropped packet, in which case the buffer may be considered
263  * full.
264  */
265 static void
266 bpf_buffull(struct bpf_d *d)
267 {
268 
269 	BPFD_LOCK_ASSERT(d);
270 
271 	switch (d->bd_bufmode) {
272 	case BPF_BUFMODE_ZBUF:
273 		bpf_zerocopy_buffull(d);
274 		break;
275 	}
276 }
277 
278 /*
279  * Notify the buffer model that a buffer has moved into the hold position.
280  */
281 void
282 bpf_bufheld(struct bpf_d *d)
283 {
284 
285 	BPFD_LOCK_ASSERT(d);
286 
287 	switch (d->bd_bufmode) {
288 	case BPF_BUFMODE_ZBUF:
289 		bpf_zerocopy_bufheld(d);
290 		break;
291 	}
292 }
293 
294 static void
295 bpf_free(struct bpf_d *d)
296 {
297 
298 	switch (d->bd_bufmode) {
299 	case BPF_BUFMODE_BUFFER:
300 		return (bpf_buffer_free(d));
301 
302 	case BPF_BUFMODE_ZBUF:
303 		return (bpf_zerocopy_free(d));
304 
305 	default:
306 		panic("bpf_buf_free");
307 	}
308 }
309 
310 static int
311 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
312 {
313 
314 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
315 		return (EOPNOTSUPP);
316 	return (bpf_buffer_uiomove(d, buf, len, uio));
317 }
318 
319 static int
320 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
321 {
322 
323 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
324 		return (EOPNOTSUPP);
325 	return (bpf_buffer_ioctl_sblen(d, i));
326 }
327 
328 static int
329 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
330 {
331 
332 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
333 		return (EOPNOTSUPP);
334 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
335 }
336 
337 static int
338 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
339 {
340 
341 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
342 		return (EOPNOTSUPP);
343 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
344 }
345 
346 static int
347 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
348 {
349 
350 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
351 		return (EOPNOTSUPP);
352 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
353 }
354 
355 /*
356  * General BPF functions.
357  */
358 static int
359 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
360     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
361 {
362 	const struct ieee80211_bpf_params *p;
363 	struct ether_header *eh;
364 	struct mbuf *m;
365 	int error;
366 	int len;
367 	int hlen;
368 	int slen;
369 
370 	/*
371 	 * Build a sockaddr based on the data link layer type.
372 	 * We do this at this level because the ethernet header
373 	 * is copied directly into the data field of the sockaddr.
374 	 * In the case of SLIP, there is no header and the packet
375 	 * is forwarded as is.
376 	 * Also, we are careful to leave room at the front of the mbuf
377 	 * for the link level header.
378 	 */
379 	switch (linktype) {
380 
381 	case DLT_SLIP:
382 		sockp->sa_family = AF_INET;
383 		hlen = 0;
384 		break;
385 
386 	case DLT_EN10MB:
387 		sockp->sa_family = AF_UNSPEC;
388 		/* XXX Would MAXLINKHDR be better? */
389 		hlen = ETHER_HDR_LEN;
390 		break;
391 
392 	case DLT_FDDI:
393 		sockp->sa_family = AF_IMPLINK;
394 		hlen = 0;
395 		break;
396 
397 	case DLT_RAW:
398 		sockp->sa_family = AF_UNSPEC;
399 		hlen = 0;
400 		break;
401 
402 	case DLT_NULL:
403 		/*
404 		 * null interface types require a 4 byte pseudo header which
405 		 * corresponds to the address family of the packet.
406 		 */
407 		sockp->sa_family = AF_UNSPEC;
408 		hlen = 4;
409 		break;
410 
411 	case DLT_ATM_RFC1483:
412 		/*
413 		 * en atm driver requires 4-byte atm pseudo header.
414 		 * though it isn't standard, vpi:vci needs to be
415 		 * specified anyway.
416 		 */
417 		sockp->sa_family = AF_UNSPEC;
418 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
419 		break;
420 
421 	case DLT_PPP:
422 		sockp->sa_family = AF_UNSPEC;
423 		hlen = 4;	/* This should match PPP_HDRLEN */
424 		break;
425 
426 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
427 		sockp->sa_family = AF_IEEE80211;
428 		hlen = 0;
429 		break;
430 
431 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
432 		sockp->sa_family = AF_IEEE80211;
433 		sockp->sa_len = 12;	/* XXX != 0 */
434 		hlen = sizeof(struct ieee80211_bpf_params);
435 		break;
436 
437 	default:
438 		return (EIO);
439 	}
440 
441 	len = uio->uio_resid;
442 
443 	if (len - hlen > ifp->if_mtu)
444 		return (EMSGSIZE);
445 
446 	if ((unsigned)len > MJUM16BYTES)
447 		return (EIO);
448 
449 	if (len <= MHLEN)
450 		MGETHDR(m, M_WAIT, MT_DATA);
451 	else if (len <= MCLBYTES)
452 		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
453 	else
454 		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
455 #if (MJUMPAGESIZE > MCLBYTES)
456 		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
457 #endif
458 		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
459 	m->m_pkthdr.len = m->m_len = len;
460 	m->m_pkthdr.rcvif = NULL;
461 	*mp = m;
462 
463 	if (m->m_len < hlen) {
464 		error = EPERM;
465 		goto bad;
466 	}
467 
468 	error = uiomove(mtod(m, u_char *), len, uio);
469 	if (error)
470 		goto bad;
471 
472 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
473 	if (slen == 0) {
474 		error = EPERM;
475 		goto bad;
476 	}
477 
478 	/* Check for multicast destination */
479 	switch (linktype) {
480 	case DLT_EN10MB:
481 		eh = mtod(m, struct ether_header *);
482 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
483 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
484 			    ETHER_ADDR_LEN) == 0)
485 				m->m_flags |= M_BCAST;
486 			else
487 				m->m_flags |= M_MCAST;
488 		}
489 		break;
490 	}
491 
492 	/*
493 	 * Make room for link header, and copy it to sockaddr
494 	 */
495 	if (hlen != 0) {
496 		if (sockp->sa_family == AF_IEEE80211) {
497 			/*
498 			 * Collect true length from the parameter header
499 			 * NB: sockp is known to be zero'd so if we do a
500 			 *     short copy unspecified parameters will be
501 			 *     zero.
502 			 * NB: packet may not be aligned after stripping
503 			 *     bpf params
504 			 * XXX check ibp_vers
505 			 */
506 			p = mtod(m, const struct ieee80211_bpf_params *);
507 			hlen = p->ibp_len;
508 			if (hlen > sizeof(sockp->sa_data)) {
509 				error = EINVAL;
510 				goto bad;
511 			}
512 		}
513 		bcopy(m->m_data, sockp->sa_data, hlen);
514 	}
515 	*hdrlen = hlen;
516 
517 	return (0);
518 bad:
519 	m_freem(m);
520 	return (error);
521 }
522 
523 /*
524  * Attach file to the bpf interface, i.e. make d listen on bp.
525  */
526 static void
527 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
528 {
529 	/*
530 	 * Point d at bp, and add d to the interface's list of listeners.
531 	 * Finally, point the driver's bpf cookie at the interface so
532 	 * it will divert packets to bpf.
533 	 */
534 	BPFIF_LOCK(bp);
535 	d->bd_bif = bp;
536 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
537 
538 	bpf_bpfd_cnt++;
539 	BPFIF_UNLOCK(bp);
540 
541 	EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
542 }
543 
544 /*
545  * Detach a file from its interface.
546  */
547 static void
548 bpf_detachd(struct bpf_d *d)
549 {
550 	int error;
551 	struct bpf_if *bp;
552 	struct ifnet *ifp;
553 
554 	bp = d->bd_bif;
555 	BPFIF_LOCK(bp);
556 	BPFD_LOCK(d);
557 	ifp = d->bd_bif->bif_ifp;
558 
559 	/*
560 	 * Remove d from the interface's descriptor list.
561 	 */
562 	LIST_REMOVE(d, bd_next);
563 
564 	bpf_bpfd_cnt--;
565 	d->bd_bif = NULL;
566 	BPFD_UNLOCK(d);
567 	BPFIF_UNLOCK(bp);
568 
569 	EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
570 
571 	/*
572 	 * Check if this descriptor had requested promiscuous mode.
573 	 * If so, turn it off.
574 	 */
575 	if (d->bd_promisc) {
576 		d->bd_promisc = 0;
577 		CURVNET_SET(ifp->if_vnet);
578 		error = ifpromisc(ifp, 0);
579 		CURVNET_RESTORE();
580 		if (error != 0 && error != ENXIO) {
581 			/*
582 			 * ENXIO can happen if a pccard is unplugged
583 			 * Something is really wrong if we were able to put
584 			 * the driver into promiscuous mode, but can't
585 			 * take it out.
586 			 */
587 			if_printf(bp->bif_ifp,
588 				"bpf_detach: ifpromisc failed (%d)\n", error);
589 		}
590 	}
591 }
592 
593 /*
594  * Close the descriptor by detaching it from its interface,
595  * deallocating its buffers, and marking it free.
596  */
597 static void
598 bpf_dtor(void *data)
599 {
600 	struct bpf_d *d = data;
601 
602 	BPFD_LOCK(d);
603 	if (d->bd_state == BPF_WAITING)
604 		callout_stop(&d->bd_callout);
605 	d->bd_state = BPF_IDLE;
606 	BPFD_UNLOCK(d);
607 	funsetown(&d->bd_sigio);
608 	mtx_lock(&bpf_mtx);
609 	if (d->bd_bif)
610 		bpf_detachd(d);
611 	mtx_unlock(&bpf_mtx);
612 	selwakeuppri(&d->bd_sel, PRINET);
613 #ifdef MAC
614 	mac_bpfdesc_destroy(d);
615 #endif /* MAC */
616 	knlist_destroy(&d->bd_sel.si_note);
617 	callout_drain(&d->bd_callout);
618 	bpf_freed(d);
619 	free(d, M_BPF);
620 }
621 
622 /*
623  * Open ethernet device.  Returns ENXIO for illegal minor device number,
624  * EBUSY if file is open by another process.
625  */
626 /* ARGSUSED */
627 static	int
628 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
629 {
630 	struct bpf_d *d;
631 	int error;
632 
633 	d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
634 	error = devfs_set_cdevpriv(d, bpf_dtor);
635 	if (error != 0) {
636 		free(d, M_BPF);
637 		return (error);
638 	}
639 
640 	/*
641 	 * For historical reasons, perform a one-time initialization call to
642 	 * the buffer routines, even though we're not yet committed to a
643 	 * particular buffer method.
644 	 */
645 	bpf_buffer_init(d);
646 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
647 	d->bd_sig = SIGIO;
648 	d->bd_direction = BPF_D_INOUT;
649 	d->bd_pid = td->td_proc->p_pid;
650 #ifdef MAC
651 	mac_bpfdesc_init(d);
652 	mac_bpfdesc_create(td->td_ucred, d);
653 #endif
654 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
655 	callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
656 	knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
657 
658 	return (0);
659 }
660 
661 /*
662  *  bpfread - read next chunk of packets from buffers
663  */
664 static	int
665 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
666 {
667 	struct bpf_d *d;
668 	int error;
669 	int non_block;
670 	int timed_out;
671 
672 	error = devfs_get_cdevpriv((void **)&d);
673 	if (error != 0)
674 		return (error);
675 
676 	/*
677 	 * Restrict application to use a buffer the same size as
678 	 * as kernel buffers.
679 	 */
680 	if (uio->uio_resid != d->bd_bufsize)
681 		return (EINVAL);
682 
683 	non_block = ((ioflag & O_NONBLOCK) != 0);
684 
685 	BPFD_LOCK(d);
686 	d->bd_pid = curthread->td_proc->p_pid;
687 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
688 		BPFD_UNLOCK(d);
689 		return (EOPNOTSUPP);
690 	}
691 	if (d->bd_state == BPF_WAITING)
692 		callout_stop(&d->bd_callout);
693 	timed_out = (d->bd_state == BPF_TIMED_OUT);
694 	d->bd_state = BPF_IDLE;
695 	/*
696 	 * If the hold buffer is empty, then do a timed sleep, which
697 	 * ends when the timeout expires or when enough packets
698 	 * have arrived to fill the store buffer.
699 	 */
700 	while (d->bd_hbuf == NULL) {
701 		if (d->bd_slen != 0) {
702 			/*
703 			 * A packet(s) either arrived since the previous
704 			 * read or arrived while we were asleep.
705 			 */
706 			if (d->bd_immediate || non_block || timed_out) {
707 				/*
708 				 * Rotate the buffers and return what's here
709 				 * if we are in immediate mode, non-blocking
710 				 * flag is set, or this descriptor timed out.
711 				 */
712 				ROTATE_BUFFERS(d);
713 				break;
714 			}
715 		}
716 
717 		/*
718 		 * No data is available, check to see if the bpf device
719 		 * is still pointed at a real interface.  If not, return
720 		 * ENXIO so that the userland process knows to rebind
721 		 * it before using it again.
722 		 */
723 		if (d->bd_bif == NULL) {
724 			BPFD_UNLOCK(d);
725 			return (ENXIO);
726 		}
727 
728 		if (non_block) {
729 			BPFD_UNLOCK(d);
730 			return (EWOULDBLOCK);
731 		}
732 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
733 		     "bpf", d->bd_rtout);
734 		if (error == EINTR || error == ERESTART) {
735 			BPFD_UNLOCK(d);
736 			return (error);
737 		}
738 		if (error == EWOULDBLOCK) {
739 			/*
740 			 * On a timeout, return what's in the buffer,
741 			 * which may be nothing.  If there is something
742 			 * in the store buffer, we can rotate the buffers.
743 			 */
744 			if (d->bd_hbuf)
745 				/*
746 				 * We filled up the buffer in between
747 				 * getting the timeout and arriving
748 				 * here, so we don't need to rotate.
749 				 */
750 				break;
751 
752 			if (d->bd_slen == 0) {
753 				BPFD_UNLOCK(d);
754 				return (0);
755 			}
756 			ROTATE_BUFFERS(d);
757 			break;
758 		}
759 	}
760 	/*
761 	 * At this point, we know we have something in the hold slot.
762 	 */
763 	BPFD_UNLOCK(d);
764 
765 	/*
766 	 * Move data from hold buffer into user space.
767 	 * We know the entire buffer is transferred since
768 	 * we checked above that the read buffer is bpf_bufsize bytes.
769 	 *
770 	 * XXXRW: More synchronization needed here: what if a second thread
771 	 * issues a read on the same fd at the same time?  Don't want this
772 	 * getting invalidated.
773 	 */
774 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
775 
776 	BPFD_LOCK(d);
777 	d->bd_fbuf = d->bd_hbuf;
778 	d->bd_hbuf = NULL;
779 	d->bd_hlen = 0;
780 	bpf_buf_reclaimed(d);
781 	BPFD_UNLOCK(d);
782 
783 	return (error);
784 }
785 
786 /*
787  * If there are processes sleeping on this descriptor, wake them up.
788  */
789 static __inline void
790 bpf_wakeup(struct bpf_d *d)
791 {
792 
793 	BPFD_LOCK_ASSERT(d);
794 	if (d->bd_state == BPF_WAITING) {
795 		callout_stop(&d->bd_callout);
796 		d->bd_state = BPF_IDLE;
797 	}
798 	wakeup(d);
799 	if (d->bd_async && d->bd_sig && d->bd_sigio)
800 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
801 
802 	selwakeuppri(&d->bd_sel, PRINET);
803 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
804 }
805 
806 static void
807 bpf_timed_out(void *arg)
808 {
809 	struct bpf_d *d = (struct bpf_d *)arg;
810 
811 	BPFD_LOCK_ASSERT(d);
812 
813 	if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
814 		return;
815 	if (d->bd_state == BPF_WAITING) {
816 		d->bd_state = BPF_TIMED_OUT;
817 		if (d->bd_slen != 0)
818 			bpf_wakeup(d);
819 	}
820 }
821 
822 static int
823 bpf_ready(struct bpf_d *d)
824 {
825 
826 	BPFD_LOCK_ASSERT(d);
827 
828 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
829 		return (1);
830 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
831 	    d->bd_slen != 0)
832 		return (1);
833 	return (0);
834 }
835 
836 static int
837 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
838 {
839 	struct bpf_d *d;
840 	struct ifnet *ifp;
841 	struct mbuf *m, *mc;
842 	struct sockaddr dst;
843 	int error, hlen;
844 
845 	error = devfs_get_cdevpriv((void **)&d);
846 	if (error != 0)
847 		return (error);
848 
849 	d->bd_pid = curthread->td_proc->p_pid;
850 	d->bd_wcount++;
851 	if (d->bd_bif == NULL) {
852 		d->bd_wdcount++;
853 		return (ENXIO);
854 	}
855 
856 	ifp = d->bd_bif->bif_ifp;
857 
858 	if ((ifp->if_flags & IFF_UP) == 0) {
859 		d->bd_wdcount++;
860 		return (ENETDOWN);
861 	}
862 
863 	if (uio->uio_resid == 0) {
864 		d->bd_wdcount++;
865 		return (0);
866 	}
867 
868 	bzero(&dst, sizeof(dst));
869 	m = NULL;
870 	hlen = 0;
871 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
872 	    &m, &dst, &hlen, d->bd_wfilter);
873 	if (error) {
874 		d->bd_wdcount++;
875 		return (error);
876 	}
877 	d->bd_wfcount++;
878 	if (d->bd_hdrcmplt)
879 		dst.sa_family = pseudo_AF_HDRCMPLT;
880 
881 	if (d->bd_feedback) {
882 		mc = m_dup(m, M_DONTWAIT);
883 		if (mc != NULL)
884 			mc->m_pkthdr.rcvif = ifp;
885 		/* Set M_PROMISC for outgoing packets to be discarded. */
886 		if (d->bd_direction == BPF_D_INOUT)
887 			m->m_flags |= M_PROMISC;
888 	} else
889 		mc = NULL;
890 
891 	m->m_pkthdr.len -= hlen;
892 	m->m_len -= hlen;
893 	m->m_data += hlen;	/* XXX */
894 
895 	CURVNET_SET(ifp->if_vnet);
896 #ifdef MAC
897 	BPFD_LOCK(d);
898 	mac_bpfdesc_create_mbuf(d, m);
899 	if (mc != NULL)
900 		mac_bpfdesc_create_mbuf(d, mc);
901 	BPFD_UNLOCK(d);
902 #endif
903 
904 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
905 	if (error)
906 		d->bd_wdcount++;
907 
908 	if (mc != NULL) {
909 		if (error == 0)
910 			(*ifp->if_input)(ifp, mc);
911 		else
912 			m_freem(mc);
913 	}
914 	CURVNET_RESTORE();
915 
916 	return (error);
917 }
918 
919 /*
920  * Reset a descriptor by flushing its packet buffer and clearing the receive
921  * and drop counts.  This is doable for kernel-only buffers, but with
922  * zero-copy buffers, we can't write to (or rotate) buffers that are
923  * currently owned by userspace.  It would be nice if we could encapsulate
924  * this logic in the buffer code rather than here.
925  */
926 static void
927 reset_d(struct bpf_d *d)
928 {
929 
930 	mtx_assert(&d->bd_mtx, MA_OWNED);
931 
932 	if ((d->bd_hbuf != NULL) &&
933 	    (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
934 		/* Free the hold buffer. */
935 		d->bd_fbuf = d->bd_hbuf;
936 		d->bd_hbuf = NULL;
937 		d->bd_hlen = 0;
938 		bpf_buf_reclaimed(d);
939 	}
940 	if (bpf_canwritebuf(d))
941 		d->bd_slen = 0;
942 	d->bd_rcount = 0;
943 	d->bd_dcount = 0;
944 	d->bd_fcount = 0;
945 	d->bd_wcount = 0;
946 	d->bd_wfcount = 0;
947 	d->bd_wdcount = 0;
948 	d->bd_zcopy = 0;
949 }
950 
951 /*
952  *  FIONREAD		Check for read packet available.
953  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
954  *  BIOCGBLEN		Get buffer len [for read()].
955  *  BIOCSETF		Set read filter.
956  *  BIOCSETFNR		Set read filter without resetting descriptor.
957  *  BIOCSETWF		Set write filter.
958  *  BIOCFLUSH		Flush read packet buffer.
959  *  BIOCPROMISC		Put interface into promiscuous mode.
960  *  BIOCGDLT		Get link layer type.
961  *  BIOCGETIF		Get interface name.
962  *  BIOCSETIF		Set interface.
963  *  BIOCSRTIMEOUT	Set read timeout.
964  *  BIOCGRTIMEOUT	Get read timeout.
965  *  BIOCGSTATS		Get packet stats.
966  *  BIOCIMMEDIATE	Set immediate mode.
967  *  BIOCVERSION		Get filter language version.
968  *  BIOCGHDRCMPLT	Get "header already complete" flag
969  *  BIOCSHDRCMPLT	Set "header already complete" flag
970  *  BIOCGDIRECTION	Get packet direction flag
971  *  BIOCSDIRECTION	Set packet direction flag
972  *  BIOCLOCK		Set "locked" flag
973  *  BIOCFEEDBACK	Set packet feedback mode.
974  *  BIOCSETZBUF		Set current zero-copy buffer locations.
975  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
976  *  BIOCROTZBUF		Force rotation of zero-copy buffer
977  *  BIOCSETBUFMODE	Set buffer mode.
978  *  BIOCGETBUFMODE	Get current buffer mode.
979  */
980 /* ARGSUSED */
981 static	int
982 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
983     struct thread *td)
984 {
985 	struct bpf_d *d;
986 	int error;
987 
988 	error = devfs_get_cdevpriv((void **)&d);
989 	if (error != 0)
990 		return (error);
991 
992 	/*
993 	 * Refresh PID associated with this descriptor.
994 	 */
995 	BPFD_LOCK(d);
996 	d->bd_pid = td->td_proc->p_pid;
997 	if (d->bd_state == BPF_WAITING)
998 		callout_stop(&d->bd_callout);
999 	d->bd_state = BPF_IDLE;
1000 	BPFD_UNLOCK(d);
1001 
1002 	if (d->bd_locked == 1) {
1003 		switch (cmd) {
1004 		case BIOCGBLEN:
1005 		case BIOCFLUSH:
1006 		case BIOCGDLT:
1007 		case BIOCGDLTLIST:
1008 		case BIOCGETIF:
1009 		case BIOCGRTIMEOUT:
1010 		case BIOCGSTATS:
1011 		case BIOCVERSION:
1012 		case BIOCGRSIG:
1013 		case BIOCGHDRCMPLT:
1014 		case BIOCFEEDBACK:
1015 		case FIONREAD:
1016 		case BIOCLOCK:
1017 		case BIOCSRTIMEOUT:
1018 		case BIOCIMMEDIATE:
1019 		case TIOCGPGRP:
1020 		case BIOCROTZBUF:
1021 			break;
1022 		default:
1023 			return (EPERM);
1024 		}
1025 	}
1026 	CURVNET_SET(TD_TO_VNET(td));
1027 	switch (cmd) {
1028 
1029 	default:
1030 		error = EINVAL;
1031 		break;
1032 
1033 	/*
1034 	 * Check for read packet available.
1035 	 */
1036 	case FIONREAD:
1037 		{
1038 			int n;
1039 
1040 			BPFD_LOCK(d);
1041 			n = d->bd_slen;
1042 			if (d->bd_hbuf)
1043 				n += d->bd_hlen;
1044 			BPFD_UNLOCK(d);
1045 
1046 			*(int *)addr = n;
1047 			break;
1048 		}
1049 
1050 	case SIOCGIFADDR:
1051 		{
1052 			struct ifnet *ifp;
1053 
1054 			if (d->bd_bif == NULL)
1055 				error = EINVAL;
1056 			else {
1057 				ifp = d->bd_bif->bif_ifp;
1058 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1059 			}
1060 			break;
1061 		}
1062 
1063 	/*
1064 	 * Get buffer len [for read()].
1065 	 */
1066 	case BIOCGBLEN:
1067 		*(u_int *)addr = d->bd_bufsize;
1068 		break;
1069 
1070 	/*
1071 	 * Set buffer length.
1072 	 */
1073 	case BIOCSBLEN:
1074 		error = bpf_ioctl_sblen(d, (u_int *)addr);
1075 		break;
1076 
1077 	/*
1078 	 * Set link layer read filter.
1079 	 */
1080 	case BIOCSETF:
1081 	case BIOCSETFNR:
1082 	case BIOCSETWF:
1083 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1084 		break;
1085 
1086 	/*
1087 	 * Flush read packet buffer.
1088 	 */
1089 	case BIOCFLUSH:
1090 		BPFD_LOCK(d);
1091 		reset_d(d);
1092 		BPFD_UNLOCK(d);
1093 		break;
1094 
1095 	/*
1096 	 * Put interface into promiscuous mode.
1097 	 */
1098 	case BIOCPROMISC:
1099 		if (d->bd_bif == NULL) {
1100 			/*
1101 			 * No interface attached yet.
1102 			 */
1103 			error = EINVAL;
1104 			break;
1105 		}
1106 		if (d->bd_promisc == 0) {
1107 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1108 			if (error == 0)
1109 				d->bd_promisc = 1;
1110 		}
1111 		break;
1112 
1113 	/*
1114 	 * Get current data link type.
1115 	 */
1116 	case BIOCGDLT:
1117 		if (d->bd_bif == NULL)
1118 			error = EINVAL;
1119 		else
1120 			*(u_int *)addr = d->bd_bif->bif_dlt;
1121 		break;
1122 
1123 	/*
1124 	 * Get a list of supported data link types.
1125 	 */
1126 	case BIOCGDLTLIST:
1127 		if (d->bd_bif == NULL)
1128 			error = EINVAL;
1129 		else
1130 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1131 		break;
1132 
1133 	/*
1134 	 * Set data link type.
1135 	 */
1136 	case BIOCSDLT:
1137 		if (d->bd_bif == NULL)
1138 			error = EINVAL;
1139 		else
1140 			error = bpf_setdlt(d, *(u_int *)addr);
1141 		break;
1142 
1143 	/*
1144 	 * Get interface name.
1145 	 */
1146 	case BIOCGETIF:
1147 		if (d->bd_bif == NULL)
1148 			error = EINVAL;
1149 		else {
1150 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1151 			struct ifreq *const ifr = (struct ifreq *)addr;
1152 
1153 			strlcpy(ifr->ifr_name, ifp->if_xname,
1154 			    sizeof(ifr->ifr_name));
1155 		}
1156 		break;
1157 
1158 	/*
1159 	 * Set interface.
1160 	 */
1161 	case BIOCSETIF:
1162 		error = bpf_setif(d, (struct ifreq *)addr);
1163 		break;
1164 
1165 	/*
1166 	 * Set read timeout.
1167 	 */
1168 	case BIOCSRTIMEOUT:
1169 		{
1170 			struct timeval *tv = (struct timeval *)addr;
1171 
1172 			/*
1173 			 * Subtract 1 tick from tvtohz() since this isn't
1174 			 * a one-shot timer.
1175 			 */
1176 			if ((error = itimerfix(tv)) == 0)
1177 				d->bd_rtout = tvtohz(tv) - 1;
1178 			break;
1179 		}
1180 
1181 	/*
1182 	 * Get read timeout.
1183 	 */
1184 	case BIOCGRTIMEOUT:
1185 		{
1186 			struct timeval *tv = (struct timeval *)addr;
1187 
1188 			tv->tv_sec = d->bd_rtout / hz;
1189 			tv->tv_usec = (d->bd_rtout % hz) * tick;
1190 			break;
1191 		}
1192 
1193 	/*
1194 	 * Get packet stats.
1195 	 */
1196 	case BIOCGSTATS:
1197 		{
1198 			struct bpf_stat *bs = (struct bpf_stat *)addr;
1199 
1200 			/* XXXCSJP overflow */
1201 			bs->bs_recv = d->bd_rcount;
1202 			bs->bs_drop = d->bd_dcount;
1203 			break;
1204 		}
1205 
1206 	/*
1207 	 * Set immediate mode.
1208 	 */
1209 	case BIOCIMMEDIATE:
1210 		d->bd_immediate = *(u_int *)addr;
1211 		break;
1212 
1213 	case BIOCVERSION:
1214 		{
1215 			struct bpf_version *bv = (struct bpf_version *)addr;
1216 
1217 			bv->bv_major = BPF_MAJOR_VERSION;
1218 			bv->bv_minor = BPF_MINOR_VERSION;
1219 			break;
1220 		}
1221 
1222 	/*
1223 	 * Get "header already complete" flag
1224 	 */
1225 	case BIOCGHDRCMPLT:
1226 		*(u_int *)addr = d->bd_hdrcmplt;
1227 		break;
1228 
1229 	/*
1230 	 * Set "header already complete" flag
1231 	 */
1232 	case BIOCSHDRCMPLT:
1233 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1234 		break;
1235 
1236 	/*
1237 	 * Get packet direction flag
1238 	 */
1239 	case BIOCGDIRECTION:
1240 		*(u_int *)addr = d->bd_direction;
1241 		break;
1242 
1243 	/*
1244 	 * Set packet direction flag
1245 	 */
1246 	case BIOCSDIRECTION:
1247 		{
1248 			u_int	direction;
1249 
1250 			direction = *(u_int *)addr;
1251 			switch (direction) {
1252 			case BPF_D_IN:
1253 			case BPF_D_INOUT:
1254 			case BPF_D_OUT:
1255 				d->bd_direction = direction;
1256 				break;
1257 			default:
1258 				error = EINVAL;
1259 			}
1260 		}
1261 		break;
1262 
1263 	case BIOCFEEDBACK:
1264 		d->bd_feedback = *(u_int *)addr;
1265 		break;
1266 
1267 	case BIOCLOCK:
1268 		d->bd_locked = 1;
1269 		break;
1270 
1271 	case FIONBIO:		/* Non-blocking I/O */
1272 		break;
1273 
1274 	case FIOASYNC:		/* Send signal on receive packets */
1275 		d->bd_async = *(int *)addr;
1276 		break;
1277 
1278 	case FIOSETOWN:
1279 		error = fsetown(*(int *)addr, &d->bd_sigio);
1280 		break;
1281 
1282 	case FIOGETOWN:
1283 		*(int *)addr = fgetown(&d->bd_sigio);
1284 		break;
1285 
1286 	/* This is deprecated, FIOSETOWN should be used instead. */
1287 	case TIOCSPGRP:
1288 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1289 		break;
1290 
1291 	/* This is deprecated, FIOGETOWN should be used instead. */
1292 	case TIOCGPGRP:
1293 		*(int *)addr = -fgetown(&d->bd_sigio);
1294 		break;
1295 
1296 	case BIOCSRSIG:		/* Set receive signal */
1297 		{
1298 			u_int sig;
1299 
1300 			sig = *(u_int *)addr;
1301 
1302 			if (sig >= NSIG)
1303 				error = EINVAL;
1304 			else
1305 				d->bd_sig = sig;
1306 			break;
1307 		}
1308 	case BIOCGRSIG:
1309 		*(u_int *)addr = d->bd_sig;
1310 		break;
1311 
1312 	case BIOCGETBUFMODE:
1313 		*(u_int *)addr = d->bd_bufmode;
1314 		break;
1315 
1316 	case BIOCSETBUFMODE:
1317 		/*
1318 		 * Allow the buffering mode to be changed as long as we
1319 		 * haven't yet committed to a particular mode.  Our
1320 		 * definition of commitment, for now, is whether or not a
1321 		 * buffer has been allocated or an interface attached, since
1322 		 * that's the point where things get tricky.
1323 		 */
1324 		switch (*(u_int *)addr) {
1325 		case BPF_BUFMODE_BUFFER:
1326 			break;
1327 
1328 		case BPF_BUFMODE_ZBUF:
1329 			if (bpf_zerocopy_enable)
1330 				break;
1331 			/* FALLSTHROUGH */
1332 
1333 		default:
1334 			return (EINVAL);
1335 		}
1336 
1337 		BPFD_LOCK(d);
1338 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1339 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1340 			BPFD_UNLOCK(d);
1341 			return (EBUSY);
1342 		}
1343 		d->bd_bufmode = *(u_int *)addr;
1344 		BPFD_UNLOCK(d);
1345 		break;
1346 
1347 	case BIOCGETZMAX:
1348 		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1349 
1350 	case BIOCSETZBUF:
1351 		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1352 
1353 	case BIOCROTZBUF:
1354 		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1355 	}
1356 	CURVNET_RESTORE();
1357 	return (error);
1358 }
1359 
1360 /*
1361  * Set d's packet filter program to fp.  If this file already has a filter,
1362  * free it and replace it.  Returns EINVAL for bogus requests.
1363  */
1364 static int
1365 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1366 {
1367 	struct bpf_insn *fcode, *old;
1368 	u_int wfilter, flen, size;
1369 #ifdef BPF_JITTER
1370 	bpf_jit_filter *ofunc;
1371 #endif
1372 
1373 	if (cmd == BIOCSETWF) {
1374 		old = d->bd_wfilter;
1375 		wfilter = 1;
1376 #ifdef BPF_JITTER
1377 		ofunc = NULL;
1378 #endif
1379 	} else {
1380 		wfilter = 0;
1381 		old = d->bd_rfilter;
1382 #ifdef BPF_JITTER
1383 		ofunc = d->bd_bfilter;
1384 #endif
1385 	}
1386 	if (fp->bf_insns == NULL) {
1387 		if (fp->bf_len != 0)
1388 			return (EINVAL);
1389 		BPFD_LOCK(d);
1390 		if (wfilter)
1391 			d->bd_wfilter = NULL;
1392 		else {
1393 			d->bd_rfilter = NULL;
1394 #ifdef BPF_JITTER
1395 			d->bd_bfilter = NULL;
1396 #endif
1397 			if (cmd == BIOCSETF)
1398 				reset_d(d);
1399 		}
1400 		BPFD_UNLOCK(d);
1401 		if (old != NULL)
1402 			free((caddr_t)old, M_BPF);
1403 #ifdef BPF_JITTER
1404 		if (ofunc != NULL)
1405 			bpf_destroy_jit_filter(ofunc);
1406 #endif
1407 		return (0);
1408 	}
1409 	flen = fp->bf_len;
1410 	if (flen > bpf_maxinsns)
1411 		return (EINVAL);
1412 
1413 	size = flen * sizeof(*fp->bf_insns);
1414 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1415 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1416 	    bpf_validate(fcode, (int)flen)) {
1417 		BPFD_LOCK(d);
1418 		if (wfilter)
1419 			d->bd_wfilter = fcode;
1420 		else {
1421 			d->bd_rfilter = fcode;
1422 #ifdef BPF_JITTER
1423 			d->bd_bfilter = bpf_jitter(fcode, flen);
1424 #endif
1425 			if (cmd == BIOCSETF)
1426 				reset_d(d);
1427 		}
1428 		BPFD_UNLOCK(d);
1429 		if (old != NULL)
1430 			free((caddr_t)old, M_BPF);
1431 #ifdef BPF_JITTER
1432 		if (ofunc != NULL)
1433 			bpf_destroy_jit_filter(ofunc);
1434 #endif
1435 
1436 		return (0);
1437 	}
1438 	free((caddr_t)fcode, M_BPF);
1439 	return (EINVAL);
1440 }
1441 
1442 /*
1443  * Detach a file from its current interface (if attached at all) and attach
1444  * to the interface indicated by the name stored in ifr.
1445  * Return an errno or 0.
1446  */
1447 static int
1448 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1449 {
1450 	struct bpf_if *bp;
1451 	struct ifnet *theywant;
1452 
1453 	theywant = ifunit(ifr->ifr_name);
1454 	if (theywant == NULL || theywant->if_bpf == NULL)
1455 		return (ENXIO);
1456 
1457 	bp = theywant->if_bpf;
1458 
1459 	/*
1460 	 * Behavior here depends on the buffering model.  If we're using
1461 	 * kernel memory buffers, then we can allocate them here.  If we're
1462 	 * using zero-copy, then the user process must have registered
1463 	 * buffers by the time we get here.  If not, return an error.
1464 	 *
1465 	 * XXXRW: There are locking issues here with multi-threaded use: what
1466 	 * if two threads try to set the interface at once?
1467 	 */
1468 	switch (d->bd_bufmode) {
1469 	case BPF_BUFMODE_BUFFER:
1470 		if (d->bd_sbuf == NULL)
1471 			bpf_buffer_alloc(d);
1472 		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1473 		break;
1474 
1475 	case BPF_BUFMODE_ZBUF:
1476 		if (d->bd_sbuf == NULL)
1477 			return (EINVAL);
1478 		break;
1479 
1480 	default:
1481 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1482 	}
1483 	if (bp != d->bd_bif) {
1484 		if (d->bd_bif)
1485 			/*
1486 			 * Detach if attached to something else.
1487 			 */
1488 			bpf_detachd(d);
1489 
1490 		bpf_attachd(d, bp);
1491 	}
1492 	BPFD_LOCK(d);
1493 	reset_d(d);
1494 	BPFD_UNLOCK(d);
1495 	return (0);
1496 }
1497 
1498 /*
1499  * Support for select() and poll() system calls
1500  *
1501  * Return true iff the specific operation will not block indefinitely.
1502  * Otherwise, return false but make a note that a selwakeup() must be done.
1503  */
1504 static int
1505 bpfpoll(struct cdev *dev, int events, struct thread *td)
1506 {
1507 	struct bpf_d *d;
1508 	int revents;
1509 
1510 	if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
1511 		return (events &
1512 		    (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
1513 
1514 	/*
1515 	 * Refresh PID associated with this descriptor.
1516 	 */
1517 	revents = events & (POLLOUT | POLLWRNORM);
1518 	BPFD_LOCK(d);
1519 	d->bd_pid = td->td_proc->p_pid;
1520 	if (events & (POLLIN | POLLRDNORM)) {
1521 		if (bpf_ready(d))
1522 			revents |= events & (POLLIN | POLLRDNORM);
1523 		else {
1524 			selrecord(td, &d->bd_sel);
1525 			/* Start the read timeout if necessary. */
1526 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1527 				callout_reset(&d->bd_callout, d->bd_rtout,
1528 				    bpf_timed_out, d);
1529 				d->bd_state = BPF_WAITING;
1530 			}
1531 		}
1532 	}
1533 	BPFD_UNLOCK(d);
1534 	return (revents);
1535 }
1536 
1537 /*
1538  * Support for kevent() system call.  Register EVFILT_READ filters and
1539  * reject all others.
1540  */
1541 int
1542 bpfkqfilter(struct cdev *dev, struct knote *kn)
1543 {
1544 	struct bpf_d *d;
1545 
1546 	if (devfs_get_cdevpriv((void **)&d) != 0 ||
1547 	    kn->kn_filter != EVFILT_READ)
1548 		return (1);
1549 
1550 	/*
1551 	 * Refresh PID associated with this descriptor.
1552 	 */
1553 	BPFD_LOCK(d);
1554 	d->bd_pid = curthread->td_proc->p_pid;
1555 	kn->kn_fop = &bpfread_filtops;
1556 	kn->kn_hook = d;
1557 	knlist_add(&d->bd_sel.si_note, kn, 1);
1558 	BPFD_UNLOCK(d);
1559 
1560 	return (0);
1561 }
1562 
1563 static void
1564 filt_bpfdetach(struct knote *kn)
1565 {
1566 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1567 
1568 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1569 }
1570 
1571 static int
1572 filt_bpfread(struct knote *kn, long hint)
1573 {
1574 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1575 	int ready;
1576 
1577 	BPFD_LOCK_ASSERT(d);
1578 	ready = bpf_ready(d);
1579 	if (ready) {
1580 		kn->kn_data = d->bd_slen;
1581 		if (d->bd_hbuf)
1582 			kn->kn_data += d->bd_hlen;
1583 	} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1584 		callout_reset(&d->bd_callout, d->bd_rtout,
1585 		    bpf_timed_out, d);
1586 		d->bd_state = BPF_WAITING;
1587 	}
1588 
1589 	return (ready);
1590 }
1591 
1592 /*
1593  * Incoming linkage from device drivers.  Process the packet pkt, of length
1594  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1595  * by each process' filter, and if accepted, stashed into the corresponding
1596  * buffer.
1597  */
1598 void
1599 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1600 {
1601 	struct bpf_d *d;
1602 #ifdef BPF_JITTER
1603 	bpf_jit_filter *bf;
1604 #endif
1605 	u_int slen;
1606 	int gottime;
1607 	struct timeval tv;
1608 
1609 	gottime = 0;
1610 	BPFIF_LOCK(bp);
1611 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1612 		BPFD_LOCK(d);
1613 		++d->bd_rcount;
1614 		/*
1615 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1616 		 * way for the caller to indiciate to us whether this packet
1617 		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1618 		 * the interface pointers on the mbuf to figure it out.
1619 		 */
1620 #ifdef BPF_JITTER
1621 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1622 		if (bf != NULL)
1623 			slen = (*(bf->func))(pkt, pktlen, pktlen);
1624 		else
1625 #endif
1626 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1627 		if (slen != 0) {
1628 			d->bd_fcount++;
1629 			if (!gottime) {
1630 				microtime(&tv);
1631 				gottime = 1;
1632 			}
1633 #ifdef MAC
1634 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1635 #endif
1636 				catchpacket(d, pkt, pktlen, slen,
1637 				    bpf_append_bytes, &tv);
1638 		}
1639 		BPFD_UNLOCK(d);
1640 	}
1641 	BPFIF_UNLOCK(bp);
1642 }
1643 
1644 #define	BPF_CHECK_DIRECTION(d, r, i)				\
1645 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1646 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1647 
1648 /*
1649  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1650  */
1651 void
1652 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1653 {
1654 	struct bpf_d *d;
1655 #ifdef BPF_JITTER
1656 	bpf_jit_filter *bf;
1657 #endif
1658 	u_int pktlen, slen;
1659 	int gottime;
1660 	struct timeval tv;
1661 
1662 	/* Skip outgoing duplicate packets. */
1663 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1664 		m->m_flags &= ~M_PROMISC;
1665 		return;
1666 	}
1667 
1668 	gottime = 0;
1669 
1670 	pktlen = m_length(m, NULL);
1671 
1672 	BPFIF_LOCK(bp);
1673 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1674 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1675 			continue;
1676 		BPFD_LOCK(d);
1677 		++d->bd_rcount;
1678 #ifdef BPF_JITTER
1679 		bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
1680 		/* XXX We cannot handle multiple mbufs. */
1681 		if (bf != NULL && m->m_next == NULL)
1682 			slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
1683 		else
1684 #endif
1685 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1686 		if (slen != 0) {
1687 			d->bd_fcount++;
1688 			if (!gottime) {
1689 				microtime(&tv);
1690 				gottime = 1;
1691 			}
1692 #ifdef MAC
1693 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1694 #endif
1695 				catchpacket(d, (u_char *)m, pktlen, slen,
1696 				    bpf_append_mbuf, &tv);
1697 		}
1698 		BPFD_UNLOCK(d);
1699 	}
1700 	BPFIF_UNLOCK(bp);
1701 }
1702 
1703 /*
1704  * Incoming linkage from device drivers, when packet is in
1705  * an mbuf chain and to be prepended by a contiguous header.
1706  */
1707 void
1708 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1709 {
1710 	struct mbuf mb;
1711 	struct bpf_d *d;
1712 	u_int pktlen, slen;
1713 	int gottime;
1714 	struct timeval tv;
1715 
1716 	/* Skip outgoing duplicate packets. */
1717 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1718 		m->m_flags &= ~M_PROMISC;
1719 		return;
1720 	}
1721 
1722 	gottime = 0;
1723 
1724 	pktlen = m_length(m, NULL);
1725 	/*
1726 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1727 	 * Note that we cut corners here; we only setup what's
1728 	 * absolutely needed--this mbuf should never go anywhere else.
1729 	 */
1730 	mb.m_next = m;
1731 	mb.m_data = data;
1732 	mb.m_len = dlen;
1733 	pktlen += dlen;
1734 
1735 	BPFIF_LOCK(bp);
1736 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1737 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1738 			continue;
1739 		BPFD_LOCK(d);
1740 		++d->bd_rcount;
1741 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1742 		if (slen != 0) {
1743 			d->bd_fcount++;
1744 			if (!gottime) {
1745 				microtime(&tv);
1746 				gottime = 1;
1747 			}
1748 #ifdef MAC
1749 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1750 #endif
1751 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1752 				    bpf_append_mbuf, &tv);
1753 		}
1754 		BPFD_UNLOCK(d);
1755 	}
1756 	BPFIF_UNLOCK(bp);
1757 }
1758 
1759 #undef	BPF_CHECK_DIRECTION
1760 
1761 /*
1762  * Move the packet data from interface memory (pkt) into the
1763  * store buffer.  "cpfn" is the routine called to do the actual data
1764  * transfer.  bcopy is passed in to copy contiguous chunks, while
1765  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1766  * pkt is really an mbuf.
1767  */
1768 static void
1769 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1770     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1771     struct timeval *tv)
1772 {
1773 	struct bpf_hdr hdr;
1774 	int totlen, curlen;
1775 	int hdrlen = d->bd_bif->bif_hdrlen;
1776 	int do_wakeup = 0;
1777 
1778 	BPFD_LOCK_ASSERT(d);
1779 
1780 	/*
1781 	 * Detect whether user space has released a buffer back to us, and if
1782 	 * so, move it from being a hold buffer to a free buffer.  This may
1783 	 * not be the best place to do it (for example, we might only want to
1784 	 * run this check if we need the space), but for now it's a reliable
1785 	 * spot to do it.
1786 	 */
1787 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1788 		d->bd_fbuf = d->bd_hbuf;
1789 		d->bd_hbuf = NULL;
1790 		d->bd_hlen = 0;
1791 		bpf_buf_reclaimed(d);
1792 	}
1793 
1794 	/*
1795 	 * Figure out how many bytes to move.  If the packet is
1796 	 * greater or equal to the snapshot length, transfer that
1797 	 * much.  Otherwise, transfer the whole packet (unless
1798 	 * we hit the buffer size limit).
1799 	 */
1800 	totlen = hdrlen + min(snaplen, pktlen);
1801 	if (totlen > d->bd_bufsize)
1802 		totlen = d->bd_bufsize;
1803 
1804 	/*
1805 	 * Round up the end of the previous packet to the next longword.
1806 	 *
1807 	 * Drop the packet if there's no room and no hope of room
1808 	 * If the packet would overflow the storage buffer or the storage
1809 	 * buffer is considered immutable by the buffer model, try to rotate
1810 	 * the buffer and wakeup pending processes.
1811 	 */
1812 	curlen = BPF_WORDALIGN(d->bd_slen);
1813 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1814 		if (d->bd_fbuf == NULL) {
1815 			/*
1816 			 * There's no room in the store buffer, and no
1817 			 * prospect of room, so drop the packet.  Notify the
1818 			 * buffer model.
1819 			 */
1820 			bpf_buffull(d);
1821 			++d->bd_dcount;
1822 			return;
1823 		}
1824 		ROTATE_BUFFERS(d);
1825 		do_wakeup = 1;
1826 		curlen = 0;
1827 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1828 		/*
1829 		 * Immediate mode is set, or the read timeout has already
1830 		 * expired during a select call.  A packet arrived, so the
1831 		 * reader should be woken up.
1832 		 */
1833 		do_wakeup = 1;
1834 
1835 	/*
1836 	 * Append the bpf header.  Note we append the actual header size, but
1837 	 * move forward the length of the header plus padding.
1838 	 */
1839 	bzero(&hdr, sizeof(hdr));
1840 	hdr.bh_tstamp = *tv;
1841 	hdr.bh_datalen = pktlen;
1842 	hdr.bh_hdrlen = hdrlen;
1843 	hdr.bh_caplen = totlen - hdrlen;
1844 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1845 
1846 	/*
1847 	 * Copy the packet data into the store buffer and update its length.
1848 	 */
1849 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1850 	d->bd_slen = curlen + totlen;
1851 
1852 	if (do_wakeup)
1853 		bpf_wakeup(d);
1854 }
1855 
1856 /*
1857  * Free buffers currently in use by a descriptor.
1858  * Called on close.
1859  */
1860 static void
1861 bpf_freed(struct bpf_d *d)
1862 {
1863 
1864 	/*
1865 	 * We don't need to lock out interrupts since this descriptor has
1866 	 * been detached from its interface and it yet hasn't been marked
1867 	 * free.
1868 	 */
1869 	bpf_free(d);
1870 	if (d->bd_rfilter != NULL) {
1871 		free((caddr_t)d->bd_rfilter, M_BPF);
1872 #ifdef BPF_JITTER
1873 		if (d->bd_bfilter != NULL)
1874 			bpf_destroy_jit_filter(d->bd_bfilter);
1875 #endif
1876 	}
1877 	if (d->bd_wfilter != NULL)
1878 		free((caddr_t)d->bd_wfilter, M_BPF);
1879 	mtx_destroy(&d->bd_mtx);
1880 }
1881 
1882 /*
1883  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1884  * fixed size of the link header (variable length headers not yet supported).
1885  */
1886 void
1887 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1888 {
1889 
1890 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1891 }
1892 
1893 /*
1894  * Attach an interface to bpf.  ifp is a pointer to the structure
1895  * defining the interface to be attached, dlt is the link layer type,
1896  * and hdrlen is the fixed size of the link header (variable length
1897  * headers are not yet supporrted).
1898  */
1899 void
1900 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1901 {
1902 	struct bpf_if *bp;
1903 
1904 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1905 	if (bp == NULL)
1906 		panic("bpfattach");
1907 
1908 	LIST_INIT(&bp->bif_dlist);
1909 	bp->bif_ifp = ifp;
1910 	bp->bif_dlt = dlt;
1911 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1912 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1913 	*driverp = bp;
1914 
1915 	mtx_lock(&bpf_mtx);
1916 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1917 	mtx_unlock(&bpf_mtx);
1918 
1919 	/*
1920 	 * Compute the length of the bpf header.  This is not necessarily
1921 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1922 	 * that the network layer header begins on a longword boundary (for
1923 	 * performance reasons and to alleviate alignment restrictions).
1924 	 */
1925 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1926 
1927 	if (bootverbose)
1928 		if_printf(ifp, "bpf attached\n");
1929 }
1930 
1931 /*
1932  * Detach bpf from an interface.  This involves detaching each descriptor
1933  * associated with the interface, and leaving bd_bif NULL.  Notify each
1934  * descriptor as it's detached so that any sleepers wake up and get
1935  * ENXIO.
1936  */
1937 void
1938 bpfdetach(struct ifnet *ifp)
1939 {
1940 	struct bpf_if	*bp;
1941 	struct bpf_d	*d;
1942 
1943 	/* Locate BPF interface information */
1944 	mtx_lock(&bpf_mtx);
1945 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1946 		if (ifp == bp->bif_ifp)
1947 			break;
1948 	}
1949 
1950 	/* Interface wasn't attached */
1951 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1952 		mtx_unlock(&bpf_mtx);
1953 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1954 		return;
1955 	}
1956 
1957 	LIST_REMOVE(bp, bif_next);
1958 	mtx_unlock(&bpf_mtx);
1959 
1960 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1961 		bpf_detachd(d);
1962 		BPFD_LOCK(d);
1963 		bpf_wakeup(d);
1964 		BPFD_UNLOCK(d);
1965 	}
1966 
1967 	mtx_destroy(&bp->bif_mtx);
1968 	free(bp, M_BPF);
1969 }
1970 
1971 /*
1972  * Get a list of available data link type of the interface.
1973  */
1974 static int
1975 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1976 {
1977 	int n, error;
1978 	struct ifnet *ifp;
1979 	struct bpf_if *bp;
1980 
1981 	ifp = d->bd_bif->bif_ifp;
1982 	n = 0;
1983 	error = 0;
1984 	mtx_lock(&bpf_mtx);
1985 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1986 		if (bp->bif_ifp != ifp)
1987 			continue;
1988 		if (bfl->bfl_list != NULL) {
1989 			if (n >= bfl->bfl_len) {
1990 				mtx_unlock(&bpf_mtx);
1991 				return (ENOMEM);
1992 			}
1993 			error = copyout(&bp->bif_dlt,
1994 			    bfl->bfl_list + n, sizeof(u_int));
1995 		}
1996 		n++;
1997 	}
1998 	mtx_unlock(&bpf_mtx);
1999 	bfl->bfl_len = n;
2000 	return (error);
2001 }
2002 
2003 /*
2004  * Set the data link type of a BPF instance.
2005  */
2006 static int
2007 bpf_setdlt(struct bpf_d *d, u_int dlt)
2008 {
2009 	int error, opromisc;
2010 	struct ifnet *ifp;
2011 	struct bpf_if *bp;
2012 
2013 	if (d->bd_bif->bif_dlt == dlt)
2014 		return (0);
2015 	ifp = d->bd_bif->bif_ifp;
2016 	mtx_lock(&bpf_mtx);
2017 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2018 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
2019 			break;
2020 	}
2021 	mtx_unlock(&bpf_mtx);
2022 	if (bp != NULL) {
2023 		opromisc = d->bd_promisc;
2024 		bpf_detachd(d);
2025 		bpf_attachd(d, bp);
2026 		BPFD_LOCK(d);
2027 		reset_d(d);
2028 		BPFD_UNLOCK(d);
2029 		if (opromisc) {
2030 			error = ifpromisc(bp->bif_ifp, 1);
2031 			if (error)
2032 				if_printf(bp->bif_ifp,
2033 					"bpf_setdlt: ifpromisc failed (%d)\n",
2034 					error);
2035 			else
2036 				d->bd_promisc = 1;
2037 		}
2038 	}
2039 	return (bp == NULL ? EINVAL : 0);
2040 }
2041 
2042 static void
2043 bpf_drvinit(void *unused)
2044 {
2045 	struct cdev *dev;
2046 
2047 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2048 	LIST_INIT(&bpf_iflist);
2049 
2050 	dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
2051 	/* For compatibility */
2052 	make_dev_alias(dev, "bpf0");
2053 }
2054 
2055 /*
2056  * Zero out the various packet counters associated with all of the bpf
2057  * descriptors.  At some point, we will probably want to get a bit more
2058  * granular and allow the user to specify descriptors to be zeroed.
2059  */
2060 static void
2061 bpf_zero_counters(void)
2062 {
2063 	struct bpf_if *bp;
2064 	struct bpf_d *bd;
2065 
2066 	mtx_lock(&bpf_mtx);
2067 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2068 		BPFIF_LOCK(bp);
2069 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2070 			BPFD_LOCK(bd);
2071 			bd->bd_rcount = 0;
2072 			bd->bd_dcount = 0;
2073 			bd->bd_fcount = 0;
2074 			bd->bd_wcount = 0;
2075 			bd->bd_wfcount = 0;
2076 			bd->bd_zcopy = 0;
2077 			BPFD_UNLOCK(bd);
2078 		}
2079 		BPFIF_UNLOCK(bp);
2080 	}
2081 	mtx_unlock(&bpf_mtx);
2082 }
2083 
2084 static void
2085 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2086 {
2087 
2088 	bzero(d, sizeof(*d));
2089 	BPFD_LOCK_ASSERT(bd);
2090 	d->bd_structsize = sizeof(*d);
2091 	d->bd_immediate = bd->bd_immediate;
2092 	d->bd_promisc = bd->bd_promisc;
2093 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2094 	d->bd_direction = bd->bd_direction;
2095 	d->bd_feedback = bd->bd_feedback;
2096 	d->bd_async = bd->bd_async;
2097 	d->bd_rcount = bd->bd_rcount;
2098 	d->bd_dcount = bd->bd_dcount;
2099 	d->bd_fcount = bd->bd_fcount;
2100 	d->bd_sig = bd->bd_sig;
2101 	d->bd_slen = bd->bd_slen;
2102 	d->bd_hlen = bd->bd_hlen;
2103 	d->bd_bufsize = bd->bd_bufsize;
2104 	d->bd_pid = bd->bd_pid;
2105 	strlcpy(d->bd_ifname,
2106 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2107 	d->bd_locked = bd->bd_locked;
2108 	d->bd_wcount = bd->bd_wcount;
2109 	d->bd_wdcount = bd->bd_wdcount;
2110 	d->bd_wfcount = bd->bd_wfcount;
2111 	d->bd_zcopy = bd->bd_zcopy;
2112 	d->bd_bufmode = bd->bd_bufmode;
2113 }
2114 
2115 static int
2116 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2117 {
2118 	struct xbpf_d *xbdbuf, *xbd, zerostats;
2119 	int index, error;
2120 	struct bpf_if *bp;
2121 	struct bpf_d *bd;
2122 
2123 	/*
2124 	 * XXX This is not technically correct. It is possible for non
2125 	 * privileged users to open bpf devices. It would make sense
2126 	 * if the users who opened the devices were able to retrieve
2127 	 * the statistics for them, too.
2128 	 */
2129 	error = priv_check(req->td, PRIV_NET_BPF);
2130 	if (error)
2131 		return (error);
2132 	/*
2133 	 * Check to see if the user is requesting that the counters be
2134 	 * zeroed out.  Explicitly check that the supplied data is zeroed,
2135 	 * as we aren't allowing the user to set the counters currently.
2136 	 */
2137 	if (req->newptr != NULL) {
2138 		if (req->newlen != sizeof(zerostats))
2139 			return (EINVAL);
2140 		bzero(&zerostats, sizeof(zerostats));
2141 		xbd = req->newptr;
2142 		if (bcmp(xbd, &zerostats, sizeof(*xbd)) != 0)
2143 			return (EINVAL);
2144 		bpf_zero_counters();
2145 		return (0);
2146 	}
2147 	if (req->oldptr == NULL)
2148 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2149 	if (bpf_bpfd_cnt == 0)
2150 		return (SYSCTL_OUT(req, 0, 0));
2151 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2152 	mtx_lock(&bpf_mtx);
2153 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2154 		mtx_unlock(&bpf_mtx);
2155 		free(xbdbuf, M_BPF);
2156 		return (ENOMEM);
2157 	}
2158 	index = 0;
2159 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2160 		BPFIF_LOCK(bp);
2161 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2162 			xbd = &xbdbuf[index++];
2163 			BPFD_LOCK(bd);
2164 			bpfstats_fill_xbpf(xbd, bd);
2165 			BPFD_UNLOCK(bd);
2166 		}
2167 		BPFIF_UNLOCK(bp);
2168 	}
2169 	mtx_unlock(&bpf_mtx);
2170 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2171 	free(xbdbuf, M_BPF);
2172 	return (error);
2173 }
2174 
2175 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2176 
2177 #else /* !DEV_BPF && !NETGRAPH_BPF */
2178 /*
2179  * NOP stubs to allow bpf-using drivers to load and function.
2180  *
2181  * A 'better' implementation would allow the core bpf functionality
2182  * to be loaded at runtime.
2183  */
2184 static struct bpf_if bp_null;
2185 
2186 void
2187 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2188 {
2189 }
2190 
2191 void
2192 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2193 {
2194 }
2195 
2196 void
2197 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2198 {
2199 }
2200 
2201 void
2202 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2203 {
2204 
2205 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2206 }
2207 
2208 void
2209 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2210 {
2211 
2212 	*driverp = &bp_null;
2213 }
2214 
2215 void
2216 bpfdetach(struct ifnet *ifp)
2217 {
2218 }
2219 
2220 u_int
2221 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2222 {
2223 	return -1;	/* "no filter" behaviour */
2224 }
2225 
2226 int
2227 bpf_validate(const struct bpf_insn *f, int len)
2228 {
2229 	return 0;		/* false */
2230 }
2231 
2232 #endif /* !DEV_BPF && !NETGRAPH_BPF */
2233