xref: /freebsd/sys/net/bpf.c (revision cacdd70cc751fb68dec4b86c5e5b8c969b6e26ef)
1 /*-
2  * Copyright (c) 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from the Stanford/CMU enet packet filter,
6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8  * Berkeley Laboratory.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)bpf.c	8.4 (Berkeley) 1/9/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include "opt_bpf.h"
41 #include "opt_mac.h"
42 #include "opt_netgraph.h"
43 
44 #include <sys/types.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/conf.h>
48 #include <sys/fcntl.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59 
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64 
65 #include <sys/socket.h>
66 
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #include <net/bpf_buffer.h>
70 #ifdef BPF_JITTER
71 #include <net/bpf_jitter.h>
72 #endif
73 #include <net/bpf_zerocopy.h>
74 #include <net/bpfdesc.h>
75 
76 #include <netinet/in.h>
77 #include <netinet/if_ether.h>
78 #include <sys/kernel.h>
79 #include <sys/sysctl.h>
80 
81 #include <net80211/ieee80211_freebsd.h>
82 
83 #include <security/mac/mac_framework.h>
84 
85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
86 
87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
88 
89 #define PRINET  26			/* interruptible */
90 
91 /*
92  * bpf_iflist is a list of BPF interface structures, each corresponding to a
93  * specific DLT.  The same network interface might have several BPF interface
94  * structures registered by different layers in the stack (i.e., 802.11
95  * frames, ethernet frames, etc).
96  */
97 static LIST_HEAD(, bpf_if)	bpf_iflist;
98 static struct mtx	bpf_mtx;		/* bpf global lock */
99 static int		bpf_bpfd_cnt;
100 
101 static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
102 static void	bpf_detachd(struct bpf_d *);
103 static void	bpf_freed(struct bpf_d *);
104 static int	bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
105 		    struct sockaddr *, int *, struct bpf_insn *);
106 static int	bpf_setif(struct bpf_d *, struct ifreq *);
107 static void	bpf_timed_out(void *);
108 static __inline void
109 		bpf_wakeup(struct bpf_d *);
110 static void	catchpacket(struct bpf_d *, u_char *, u_int, u_int,
111 		    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
112 		    struct timeval *);
113 static void	reset_d(struct bpf_d *);
114 static int	 bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
115 static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
116 static int	bpf_setdlt(struct bpf_d *, u_int);
117 static void	filt_bpfdetach(struct knote *);
118 static int	filt_bpfread(struct knote *, long);
119 static void	bpf_drvinit(void *);
120 static void	bpf_clone(void *, struct ucred *, char *, int, struct cdev **);
121 static int	bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
122 
123 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
124 static int bpf_maxinsns = BPF_MAXINSNS;
125 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
126     &bpf_maxinsns, 0, "Maximum bpf program instructions");
127 static int bpf_zerocopy_enable = 0;
128 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
129     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
130 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
131     bpf_stats_sysctl, "bpf statistics portal");
132 
133 static	d_open_t	bpfopen;
134 static	d_close_t	bpfclose;
135 static	d_read_t	bpfread;
136 static	d_write_t	bpfwrite;
137 static	d_ioctl_t	bpfioctl;
138 static	d_poll_t	bpfpoll;
139 static	d_kqfilter_t	bpfkqfilter;
140 
141 static struct cdevsw bpf_cdevsw = {
142 	.d_version =	D_VERSION,
143 	.d_flags =	D_TRACKCLOSE,
144 	.d_open =	bpfopen,
145 	.d_close =	bpfclose,
146 	.d_read =	bpfread,
147 	.d_write =	bpfwrite,
148 	.d_ioctl =	bpfioctl,
149 	.d_poll =	bpfpoll,
150 	.d_name =	"bpf",
151 	.d_kqfilter =	bpfkqfilter,
152 };
153 
154 static struct filterops bpfread_filtops =
155 	{ 1, NULL, filt_bpfdetach, filt_bpfread };
156 
157 /*
158  * Wrapper functions for various buffering methods.  If the set of buffer
159  * modes expands, we will probably want to introduce a switch data structure
160  * similar to protosw, et.
161  */
162 static void
163 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
164     u_int len)
165 {
166 
167 	BPFD_LOCK_ASSERT(d);
168 
169 	switch (d->bd_bufmode) {
170 	case BPF_BUFMODE_BUFFER:
171 		return (bpf_buffer_append_bytes(d, buf, offset, src, len));
172 
173 	case BPF_BUFMODE_ZBUF:
174 		d->bd_zcopy++;
175 		return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
176 
177 	default:
178 		panic("bpf_buf_append_bytes");
179 	}
180 }
181 
182 static void
183 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
184     u_int len)
185 {
186 
187 	BPFD_LOCK_ASSERT(d);
188 
189 	switch (d->bd_bufmode) {
190 	case BPF_BUFMODE_BUFFER:
191 		return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
192 
193 	case BPF_BUFMODE_ZBUF:
194 		d->bd_zcopy++;
195 		return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
196 
197 	default:
198 		panic("bpf_buf_append_mbuf");
199 	}
200 }
201 
202 /*
203  * This function gets called when the free buffer is re-assigned.
204  */
205 static void
206 bpf_buf_reclaimed(struct bpf_d *d)
207 {
208 
209 	BPFD_LOCK_ASSERT(d);
210 
211 	switch (d->bd_bufmode) {
212 	case BPF_BUFMODE_BUFFER:
213 		return;
214 
215 	case BPF_BUFMODE_ZBUF:
216 		bpf_zerocopy_buf_reclaimed(d);
217 		return;
218 
219 	default:
220 		panic("bpf_buf_reclaimed");
221 	}
222 }
223 
224 /*
225  * If the buffer mechanism has a way to decide that a held buffer can be made
226  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
227  * returned if the buffer can be discarded, (0) is returned if it cannot.
228  */
229 static int
230 bpf_canfreebuf(struct bpf_d *d)
231 {
232 
233 	BPFD_LOCK_ASSERT(d);
234 
235 	switch (d->bd_bufmode) {
236 	case BPF_BUFMODE_ZBUF:
237 		return (bpf_zerocopy_canfreebuf(d));
238 	}
239 	return (0);
240 }
241 
242 /*
243  * Allow the buffer model to indicate that the current store buffer is
244  * immutable, regardless of the appearance of space.  Return (1) if the
245  * buffer is writable, and (0) if not.
246  */
247 static int
248 bpf_canwritebuf(struct bpf_d *d)
249 {
250 
251 	BPFD_LOCK_ASSERT(d);
252 
253 	switch (d->bd_bufmode) {
254 	case BPF_BUFMODE_ZBUF:
255 		return (bpf_zerocopy_canwritebuf(d));
256 	}
257 	return (1);
258 }
259 
260 /*
261  * Notify buffer model that an attempt to write to the store buffer has
262  * resulted in a dropped packet, in which case the buffer may be considered
263  * full.
264  */
265 static void
266 bpf_buffull(struct bpf_d *d)
267 {
268 
269 	BPFD_LOCK_ASSERT(d);
270 
271 	switch (d->bd_bufmode) {
272 	case BPF_BUFMODE_ZBUF:
273 		bpf_zerocopy_buffull(d);
274 		break;
275 	}
276 }
277 
278 /*
279  * Notify the buffer model that a buffer has moved into the hold position.
280  */
281 void
282 bpf_bufheld(struct bpf_d *d)
283 {
284 
285 	BPFD_LOCK_ASSERT(d);
286 
287 	switch (d->bd_bufmode) {
288 	case BPF_BUFMODE_ZBUF:
289 		bpf_zerocopy_bufheld(d);
290 		break;
291 	}
292 }
293 
294 static void
295 bpf_free(struct bpf_d *d)
296 {
297 
298 	switch (d->bd_bufmode) {
299 	case BPF_BUFMODE_BUFFER:
300 		return (bpf_buffer_free(d));
301 
302 	case BPF_BUFMODE_ZBUF:
303 		return (bpf_zerocopy_free(d));
304 
305 	default:
306 		panic("bpf_buf_free");
307 	}
308 }
309 
310 static int
311 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
312 {
313 
314 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
315 		return (EOPNOTSUPP);
316 	return (bpf_buffer_uiomove(d, buf, len, uio));
317 }
318 
319 static int
320 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
321 {
322 
323 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
324 		return (EOPNOTSUPP);
325 	return (bpf_buffer_ioctl_sblen(d, i));
326 }
327 
328 static int
329 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
330 {
331 
332 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
333 		return (EOPNOTSUPP);
334 	return (bpf_zerocopy_ioctl_getzmax(td, d, i));
335 }
336 
337 static int
338 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
339 {
340 
341 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
342 		return (EOPNOTSUPP);
343 	return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
344 }
345 
346 static int
347 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
348 {
349 
350 	if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
351 		return (EOPNOTSUPP);
352 	return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
353 }
354 
355 /*
356  * General BPF functions.
357  */
358 static int
359 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
360     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
361 {
362 	const struct ieee80211_bpf_params *p;
363 	struct ether_header *eh;
364 	struct mbuf *m;
365 	int error;
366 	int len;
367 	int hlen;
368 	int slen;
369 
370 	/*
371 	 * Build a sockaddr based on the data link layer type.
372 	 * We do this at this level because the ethernet header
373 	 * is copied directly into the data field of the sockaddr.
374 	 * In the case of SLIP, there is no header and the packet
375 	 * is forwarded as is.
376 	 * Also, we are careful to leave room at the front of the mbuf
377 	 * for the link level header.
378 	 */
379 	switch (linktype) {
380 
381 	case DLT_SLIP:
382 		sockp->sa_family = AF_INET;
383 		hlen = 0;
384 		break;
385 
386 	case DLT_EN10MB:
387 		sockp->sa_family = AF_UNSPEC;
388 		/* XXX Would MAXLINKHDR be better? */
389 		hlen = ETHER_HDR_LEN;
390 		break;
391 
392 	case DLT_FDDI:
393 		sockp->sa_family = AF_IMPLINK;
394 		hlen = 0;
395 		break;
396 
397 	case DLT_RAW:
398 		sockp->sa_family = AF_UNSPEC;
399 		hlen = 0;
400 		break;
401 
402 	case DLT_NULL:
403 		/*
404 		 * null interface types require a 4 byte pseudo header which
405 		 * corresponds to the address family of the packet.
406 		 */
407 		sockp->sa_family = AF_UNSPEC;
408 		hlen = 4;
409 		break;
410 
411 	case DLT_ATM_RFC1483:
412 		/*
413 		 * en atm driver requires 4-byte atm pseudo header.
414 		 * though it isn't standard, vpi:vci needs to be
415 		 * specified anyway.
416 		 */
417 		sockp->sa_family = AF_UNSPEC;
418 		hlen = 12;	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
419 		break;
420 
421 	case DLT_PPP:
422 		sockp->sa_family = AF_UNSPEC;
423 		hlen = 4;	/* This should match PPP_HDRLEN */
424 		break;
425 
426 	case DLT_IEEE802_11:		/* IEEE 802.11 wireless */
427 		sockp->sa_family = AF_IEEE80211;
428 		hlen = 0;
429 		break;
430 
431 	case DLT_IEEE802_11_RADIO:	/* IEEE 802.11 wireless w/ phy params */
432 		sockp->sa_family = AF_IEEE80211;
433 		sockp->sa_len = 12;	/* XXX != 0 */
434 		hlen = sizeof(struct ieee80211_bpf_params);
435 		break;
436 
437 	default:
438 		return (EIO);
439 	}
440 
441 	len = uio->uio_resid;
442 
443 	if (len - hlen > ifp->if_mtu)
444 		return (EMSGSIZE);
445 
446 	if ((unsigned)len > MJUM16BYTES)
447 		return (EIO);
448 
449 	if (len <= MHLEN)
450 		MGETHDR(m, M_WAIT, MT_DATA);
451 	else if (len <= MCLBYTES)
452 		m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
453 	else
454 		m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
455 #if (MJUMPAGESIZE > MCLBYTES)
456 		    len <= MJUMPAGESIZE ? MJUMPAGESIZE :
457 #endif
458 		    (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
459 	m->m_pkthdr.len = m->m_len = len;
460 	m->m_pkthdr.rcvif = NULL;
461 	*mp = m;
462 
463 	if (m->m_len < hlen) {
464 		error = EPERM;
465 		goto bad;
466 	}
467 
468 	error = uiomove(mtod(m, u_char *), len, uio);
469 	if (error)
470 		goto bad;
471 
472 	slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
473 	if (slen == 0) {
474 		error = EPERM;
475 		goto bad;
476 	}
477 
478 	/* Check for multicast destination */
479 	switch (linktype) {
480 	case DLT_EN10MB:
481 		eh = mtod(m, struct ether_header *);
482 		if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
483 			if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
484 			    ETHER_ADDR_LEN) == 0)
485 				m->m_flags |= M_BCAST;
486 			else
487 				m->m_flags |= M_MCAST;
488 		}
489 		break;
490 	}
491 
492 	/*
493 	 * Make room for link header, and copy it to sockaddr
494 	 */
495 	if (hlen != 0) {
496 		if (sockp->sa_family == AF_IEEE80211) {
497 			/*
498 			 * Collect true length from the parameter header
499 			 * NB: sockp is known to be zero'd so if we do a
500 			 *     short copy unspecified parameters will be
501 			 *     zero.
502 			 * NB: packet may not be aligned after stripping
503 			 *     bpf params
504 			 * XXX check ibp_vers
505 			 */
506 			p = mtod(m, const struct ieee80211_bpf_params *);
507 			hlen = p->ibp_len;
508 			if (hlen > sizeof(sockp->sa_data)) {
509 				error = EINVAL;
510 				goto bad;
511 			}
512 		}
513 		bcopy(m->m_data, sockp->sa_data, hlen);
514 	}
515 	*hdrlen = hlen;
516 
517 	return (0);
518 bad:
519 	m_freem(m);
520 	return (error);
521 }
522 
523 /*
524  * Attach file to the bpf interface, i.e. make d listen on bp.
525  */
526 static void
527 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
528 {
529 	/*
530 	 * Point d at bp, and add d to the interface's list of listeners.
531 	 * Finally, point the driver's bpf cookie at the interface so
532 	 * it will divert packets to bpf.
533 	 */
534 	BPFIF_LOCK(bp);
535 	d->bd_bif = bp;
536 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
537 
538 	bpf_bpfd_cnt++;
539 	BPFIF_UNLOCK(bp);
540 }
541 
542 /*
543  * Detach a file from its interface.
544  */
545 static void
546 bpf_detachd(struct bpf_d *d)
547 {
548 	int error;
549 	struct bpf_if *bp;
550 	struct ifnet *ifp;
551 
552 	bp = d->bd_bif;
553 	BPFIF_LOCK(bp);
554 	BPFD_LOCK(d);
555 	ifp = d->bd_bif->bif_ifp;
556 
557 	/*
558 	 * Remove d from the interface's descriptor list.
559 	 */
560 	LIST_REMOVE(d, bd_next);
561 
562 	bpf_bpfd_cnt--;
563 	d->bd_bif = NULL;
564 	BPFD_UNLOCK(d);
565 	BPFIF_UNLOCK(bp);
566 
567 	/*
568 	 * Check if this descriptor had requested promiscuous mode.
569 	 * If so, turn it off.
570 	 */
571 	if (d->bd_promisc) {
572 		d->bd_promisc = 0;
573 		error = ifpromisc(ifp, 0);
574 		if (error != 0 && error != ENXIO) {
575 			/*
576 			 * ENXIO can happen if a pccard is unplugged
577 			 * Something is really wrong if we were able to put
578 			 * the driver into promiscuous mode, but can't
579 			 * take it out.
580 			 */
581 			if_printf(bp->bif_ifp,
582 				"bpf_detach: ifpromisc failed (%d)\n", error);
583 		}
584 	}
585 }
586 
587 /*
588  * Open ethernet device.  Returns ENXIO for illegal minor device number,
589  * EBUSY if file is open by another process.
590  */
591 /* ARGSUSED */
592 static	int
593 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
594 {
595 	struct bpf_d *d;
596 
597 	mtx_lock(&bpf_mtx);
598 	d = dev->si_drv1;
599 	/*
600 	 * Each minor can be opened by only one process.  If the requested
601 	 * minor is in use, return EBUSY.
602 	 */
603 	if (d != NULL) {
604 		mtx_unlock(&bpf_mtx);
605 		return (EBUSY);
606 	}
607 	dev->si_drv1 = (struct bpf_d *)~0;	/* mark device in use */
608 	mtx_unlock(&bpf_mtx);
609 
610 	if ((dev->si_flags & SI_NAMED) == 0)
611 		make_dev(&bpf_cdevsw, minor(dev), UID_ROOT, GID_WHEEL, 0600,
612 		    "bpf%d", dev2unit(dev));
613 	MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
614 	dev->si_drv1 = d;
615 
616 	/*
617 	 * For historical reasons, perform a one-time initialization call to
618 	 * the buffer routines, even though we're not yet committed to a
619 	 * particular buffer method.
620 	 */
621 	bpf_buffer_init(d);
622 	d->bd_bufmode = BPF_BUFMODE_BUFFER;
623 	d->bd_sig = SIGIO;
624 	d->bd_direction = BPF_D_INOUT;
625 	d->bd_pid = td->td_proc->p_pid;
626 #ifdef MAC
627 	mac_bpfdesc_init(d);
628 	mac_bpfdesc_create(td->td_ucred, d);
629 #endif
630 	mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
631 	callout_init(&d->bd_callout, CALLOUT_MPSAFE);
632 	knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
633 
634 	return (0);
635 }
636 
637 /*
638  * Close the descriptor by detaching it from its interface,
639  * deallocating its buffers, and marking it free.
640  */
641 /* ARGSUSED */
642 static	int
643 bpfclose(struct cdev *dev, int flags, int fmt, struct thread *td)
644 {
645 	struct bpf_d *d = dev->si_drv1;
646 
647 	BPFD_LOCK(d);
648 	if (d->bd_state == BPF_WAITING)
649 		callout_stop(&d->bd_callout);
650 	d->bd_state = BPF_IDLE;
651 	BPFD_UNLOCK(d);
652 	funsetown(&d->bd_sigio);
653 	mtx_lock(&bpf_mtx);
654 	if (d->bd_bif)
655 		bpf_detachd(d);
656 	mtx_unlock(&bpf_mtx);
657 	selwakeuppri(&d->bd_sel, PRINET);
658 #ifdef MAC
659 	mac_bpfdesc_destroy(d);
660 #endif /* MAC */
661 	knlist_destroy(&d->bd_sel.si_note);
662 	bpf_freed(d);
663 	dev->si_drv1 = NULL;
664 	free(d, M_BPF);
665 
666 	return (0);
667 }
668 
669 /*
670  *  bpfread - read next chunk of packets from buffers
671  */
672 static	int
673 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
674 {
675 	struct bpf_d *d = dev->si_drv1;
676 	int timed_out;
677 	int error;
678 
679 	/*
680 	 * Restrict application to use a buffer the same size as
681 	 * as kernel buffers.
682 	 */
683 	if (uio->uio_resid != d->bd_bufsize)
684 		return (EINVAL);
685 
686 	BPFD_LOCK(d);
687 	d->bd_pid = curthread->td_proc->p_pid;
688 	if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
689 		BPFD_UNLOCK(d);
690 		return (EOPNOTSUPP);
691 	}
692 	if (d->bd_state == BPF_WAITING)
693 		callout_stop(&d->bd_callout);
694 	timed_out = (d->bd_state == BPF_TIMED_OUT);
695 	d->bd_state = BPF_IDLE;
696 	/*
697 	 * If the hold buffer is empty, then do a timed sleep, which
698 	 * ends when the timeout expires or when enough packets
699 	 * have arrived to fill the store buffer.
700 	 */
701 	while (d->bd_hbuf == NULL) {
702 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
703 			/*
704 			 * A packet(s) either arrived since the previous
705 			 * read or arrived while we were asleep.
706 			 * Rotate the buffers and return what's here.
707 			 */
708 			ROTATE_BUFFERS(d);
709 			break;
710 		}
711 
712 		/*
713 		 * No data is available, check to see if the bpf device
714 		 * is still pointed at a real interface.  If not, return
715 		 * ENXIO so that the userland process knows to rebind
716 		 * it before using it again.
717 		 */
718 		if (d->bd_bif == NULL) {
719 			BPFD_UNLOCK(d);
720 			return (ENXIO);
721 		}
722 
723 		if (ioflag & O_NONBLOCK) {
724 			BPFD_UNLOCK(d);
725 			return (EWOULDBLOCK);
726 		}
727 		error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
728 		     "bpf", d->bd_rtout);
729 		if (error == EINTR || error == ERESTART) {
730 			BPFD_UNLOCK(d);
731 			return (error);
732 		}
733 		if (error == EWOULDBLOCK) {
734 			/*
735 			 * On a timeout, return what's in the buffer,
736 			 * which may be nothing.  If there is something
737 			 * in the store buffer, we can rotate the buffers.
738 			 */
739 			if (d->bd_hbuf)
740 				/*
741 				 * We filled up the buffer in between
742 				 * getting the timeout and arriving
743 				 * here, so we don't need to rotate.
744 				 */
745 				break;
746 
747 			if (d->bd_slen == 0) {
748 				BPFD_UNLOCK(d);
749 				return (0);
750 			}
751 			ROTATE_BUFFERS(d);
752 			break;
753 		}
754 	}
755 	/*
756 	 * At this point, we know we have something in the hold slot.
757 	 */
758 	BPFD_UNLOCK(d);
759 
760 	/*
761 	 * Move data from hold buffer into user space.
762 	 * We know the entire buffer is transferred since
763 	 * we checked above that the read buffer is bpf_bufsize bytes.
764 	 *
765 	 * XXXRW: More synchronization needed here: what if a second thread
766 	 * issues a read on the same fd at the same time?  Don't want this
767 	 * getting invalidated.
768 	 */
769 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
770 
771 	BPFD_LOCK(d);
772 	d->bd_fbuf = d->bd_hbuf;
773 	d->bd_hbuf = NULL;
774 	d->bd_hlen = 0;
775 	bpf_buf_reclaimed(d);
776 	BPFD_UNLOCK(d);
777 
778 	return (error);
779 }
780 
781 /*
782  * If there are processes sleeping on this descriptor, wake them up.
783  */
784 static __inline void
785 bpf_wakeup(struct bpf_d *d)
786 {
787 
788 	BPFD_LOCK_ASSERT(d);
789 	if (d->bd_state == BPF_WAITING) {
790 		callout_stop(&d->bd_callout);
791 		d->bd_state = BPF_IDLE;
792 	}
793 	wakeup(d);
794 	if (d->bd_async && d->bd_sig && d->bd_sigio)
795 		pgsigio(&d->bd_sigio, d->bd_sig, 0);
796 
797 	selwakeuppri(&d->bd_sel, PRINET);
798 	KNOTE_LOCKED(&d->bd_sel.si_note, 0);
799 }
800 
801 static void
802 bpf_timed_out(void *arg)
803 {
804 	struct bpf_d *d = (struct bpf_d *)arg;
805 
806 	BPFD_LOCK(d);
807 	if (d->bd_state == BPF_WAITING) {
808 		d->bd_state = BPF_TIMED_OUT;
809 		if (d->bd_slen != 0)
810 			bpf_wakeup(d);
811 	}
812 	BPFD_UNLOCK(d);
813 }
814 
815 static int
816 bpf_ready(struct bpf_d *d)
817 {
818 
819 	BPFD_LOCK_ASSERT(d);
820 
821 	if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
822 		return (1);
823 	if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
824 	    d->bd_slen != 0)
825 		return (1);
826 	return (0);
827 }
828 
829 static int
830 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
831 {
832 	struct bpf_d *d = dev->si_drv1;
833 	struct ifnet *ifp;
834 	struct mbuf *m, *mc;
835 	struct sockaddr dst;
836 	int error, hlen;
837 
838 	d->bd_pid = curthread->td_proc->p_pid;
839 	d->bd_wcount++;
840 	if (d->bd_bif == NULL) {
841 		d->bd_wdcount++;
842 		return (ENXIO);
843 	}
844 
845 	ifp = d->bd_bif->bif_ifp;
846 
847 	if ((ifp->if_flags & IFF_UP) == 0) {
848 		d->bd_wdcount++;
849 		return (ENETDOWN);
850 	}
851 
852 	if (uio->uio_resid == 0) {
853 		d->bd_wdcount++;
854 		return (0);
855 	}
856 
857 	bzero(&dst, sizeof(dst));
858 	m = NULL;
859 	hlen = 0;
860 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
861 	    &m, &dst, &hlen, d->bd_wfilter);
862 	if (error) {
863 		d->bd_wdcount++;
864 		return (error);
865 	}
866 	d->bd_wfcount++;
867 	if (d->bd_hdrcmplt)
868 		dst.sa_family = pseudo_AF_HDRCMPLT;
869 
870 	if (d->bd_feedback) {
871 		mc = m_dup(m, M_DONTWAIT);
872 		if (mc != NULL)
873 			mc->m_pkthdr.rcvif = ifp;
874 		/* Set M_PROMISC for outgoing packets to be discarded. */
875 		if (d->bd_direction == BPF_D_INOUT)
876 			m->m_flags |= M_PROMISC;
877 	} else
878 		mc = NULL;
879 
880 	m->m_pkthdr.len -= hlen;
881 	m->m_len -= hlen;
882 	m->m_data += hlen;	/* XXX */
883 
884 #ifdef MAC
885 	BPFD_LOCK(d);
886 	mac_bpfdesc_create_mbuf(d, m);
887 	if (mc != NULL)
888 		mac_bpfdesc_create_mbuf(d, mc);
889 	BPFD_UNLOCK(d);
890 #endif
891 
892 	error = (*ifp->if_output)(ifp, m, &dst, NULL);
893 	if (error)
894 		d->bd_wdcount++;
895 
896 	if (mc != NULL) {
897 		if (error == 0)
898 			(*ifp->if_input)(ifp, mc);
899 		else
900 			m_freem(mc);
901 	}
902 
903 	return (error);
904 }
905 
906 /*
907  * Reset a descriptor by flushing its packet buffer and clearing the
908  * receive and drop counts.
909  */
910 static void
911 reset_d(struct bpf_d *d)
912 {
913 
914 	mtx_assert(&d->bd_mtx, MA_OWNED);
915 	if (d->bd_hbuf) {
916 		/* Free the hold buffer. */
917 		d->bd_fbuf = d->bd_hbuf;
918 		d->bd_hbuf = NULL;
919 		bpf_buf_reclaimed(d);
920 	}
921 	d->bd_slen = 0;
922 	d->bd_hlen = 0;
923 	d->bd_rcount = 0;
924 	d->bd_dcount = 0;
925 	d->bd_fcount = 0;
926 	d->bd_wcount = 0;
927 	d->bd_wfcount = 0;
928 	d->bd_wdcount = 0;
929 	d->bd_zcopy = 0;
930 }
931 
932 /*
933  *  FIONREAD		Check for read packet available.
934  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
935  *  BIOCGBLEN		Get buffer len [for read()].
936  *  BIOCSETF		Set read filter.
937  *  BIOCSETFNR		Set read filter without resetting descriptor.
938  *  BIOCSETWF		Set write filter.
939  *  BIOCFLUSH		Flush read packet buffer.
940  *  BIOCPROMISC		Put interface into promiscuous mode.
941  *  BIOCGDLT		Get link layer type.
942  *  BIOCGETIF		Get interface name.
943  *  BIOCSETIF		Set interface.
944  *  BIOCSRTIMEOUT	Set read timeout.
945  *  BIOCGRTIMEOUT	Get read timeout.
946  *  BIOCGSTATS		Get packet stats.
947  *  BIOCIMMEDIATE	Set immediate mode.
948  *  BIOCVERSION		Get filter language version.
949  *  BIOCGHDRCMPLT	Get "header already complete" flag
950  *  BIOCSHDRCMPLT	Set "header already complete" flag
951  *  BIOCGDIRECTION	Get packet direction flag
952  *  BIOCSDIRECTION	Set packet direction flag
953  *  BIOCLOCK		Set "locked" flag
954  *  BIOCFEEDBACK	Set packet feedback mode.
955  *  BIOCSETZBUF		Set current zero-copy buffer locations.
956  *  BIOCGETZMAX		Get maximum zero-copy buffer size.
957  *  BIOCROTZBUF		Force rotation of zero-copy buffer
958  *  BIOCSETBUFMODE	Set buffer mode.
959  *  BIOCGETBUFMODE	Get current buffer mode.
960  */
961 /* ARGSUSED */
962 static	int
963 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
964     struct thread *td)
965 {
966 	struct bpf_d *d = dev->si_drv1;
967 	int error = 0;
968 
969 	/*
970 	 * Refresh PID associated with this descriptor.
971 	 */
972 	BPFD_LOCK(d);
973 	d->bd_pid = td->td_proc->p_pid;
974 	if (d->bd_state == BPF_WAITING)
975 		callout_stop(&d->bd_callout);
976 	d->bd_state = BPF_IDLE;
977 	BPFD_UNLOCK(d);
978 
979 	if (d->bd_locked == 1) {
980 		switch (cmd) {
981 		case BIOCGBLEN:
982 		case BIOCFLUSH:
983 		case BIOCGDLT:
984 		case BIOCGDLTLIST:
985 		case BIOCGETIF:
986 		case BIOCGRTIMEOUT:
987 		case BIOCGSTATS:
988 		case BIOCVERSION:
989 		case BIOCGRSIG:
990 		case BIOCGHDRCMPLT:
991 		case BIOCFEEDBACK:
992 		case FIONREAD:
993 		case BIOCLOCK:
994 		case BIOCSRTIMEOUT:
995 		case BIOCIMMEDIATE:
996 		case TIOCGPGRP:
997 		case BIOCROTZBUF:
998 			break;
999 		default:
1000 			return (EPERM);
1001 		}
1002 	}
1003 	switch (cmd) {
1004 
1005 	default:
1006 		error = EINVAL;
1007 		break;
1008 
1009 	/*
1010 	 * Check for read packet available.
1011 	 */
1012 	case FIONREAD:
1013 		{
1014 			int n;
1015 
1016 			BPFD_LOCK(d);
1017 			n = d->bd_slen;
1018 			if (d->bd_hbuf)
1019 				n += d->bd_hlen;
1020 			BPFD_UNLOCK(d);
1021 
1022 			*(int *)addr = n;
1023 			break;
1024 		}
1025 
1026 	case SIOCGIFADDR:
1027 		{
1028 			struct ifnet *ifp;
1029 
1030 			if (d->bd_bif == NULL)
1031 				error = EINVAL;
1032 			else {
1033 				ifp = d->bd_bif->bif_ifp;
1034 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
1035 			}
1036 			break;
1037 		}
1038 
1039 	/*
1040 	 * Get buffer len [for read()].
1041 	 */
1042 	case BIOCGBLEN:
1043 		*(u_int *)addr = d->bd_bufsize;
1044 		break;
1045 
1046 	/*
1047 	 * Set buffer length.
1048 	 */
1049 	case BIOCSBLEN:
1050 		error = bpf_ioctl_sblen(d, (u_int *)addr);
1051 		break;
1052 
1053 	/*
1054 	 * Set link layer read filter.
1055 	 */
1056 	case BIOCSETF:
1057 	case BIOCSETFNR:
1058 	case BIOCSETWF:
1059 		error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1060 		break;
1061 
1062 	/*
1063 	 * Flush read packet buffer.
1064 	 */
1065 	case BIOCFLUSH:
1066 		BPFD_LOCK(d);
1067 		reset_d(d);
1068 		BPFD_UNLOCK(d);
1069 		break;
1070 
1071 	/*
1072 	 * Put interface into promiscuous mode.
1073 	 */
1074 	case BIOCPROMISC:
1075 		if (d->bd_bif == NULL) {
1076 			/*
1077 			 * No interface attached yet.
1078 			 */
1079 			error = EINVAL;
1080 			break;
1081 		}
1082 		if (d->bd_promisc == 0) {
1083 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
1084 			if (error == 0)
1085 				d->bd_promisc = 1;
1086 		}
1087 		break;
1088 
1089 	/*
1090 	 * Get current data link type.
1091 	 */
1092 	case BIOCGDLT:
1093 		if (d->bd_bif == NULL)
1094 			error = EINVAL;
1095 		else
1096 			*(u_int *)addr = d->bd_bif->bif_dlt;
1097 		break;
1098 
1099 	/*
1100 	 * Get a list of supported data link types.
1101 	 */
1102 	case BIOCGDLTLIST:
1103 		if (d->bd_bif == NULL)
1104 			error = EINVAL;
1105 		else
1106 			error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1107 		break;
1108 
1109 	/*
1110 	 * Set data link type.
1111 	 */
1112 	case BIOCSDLT:
1113 		if (d->bd_bif == NULL)
1114 			error = EINVAL;
1115 		else
1116 			error = bpf_setdlt(d, *(u_int *)addr);
1117 		break;
1118 
1119 	/*
1120 	 * Get interface name.
1121 	 */
1122 	case BIOCGETIF:
1123 		if (d->bd_bif == NULL)
1124 			error = EINVAL;
1125 		else {
1126 			struct ifnet *const ifp = d->bd_bif->bif_ifp;
1127 			struct ifreq *const ifr = (struct ifreq *)addr;
1128 
1129 			strlcpy(ifr->ifr_name, ifp->if_xname,
1130 			    sizeof(ifr->ifr_name));
1131 		}
1132 		break;
1133 
1134 	/*
1135 	 * Set interface.
1136 	 */
1137 	case BIOCSETIF:
1138 		error = bpf_setif(d, (struct ifreq *)addr);
1139 		break;
1140 
1141 	/*
1142 	 * Set read timeout.
1143 	 */
1144 	case BIOCSRTIMEOUT:
1145 		{
1146 			struct timeval *tv = (struct timeval *)addr;
1147 
1148 			/*
1149 			 * Subtract 1 tick from tvtohz() since this isn't
1150 			 * a one-shot timer.
1151 			 */
1152 			if ((error = itimerfix(tv)) == 0)
1153 				d->bd_rtout = tvtohz(tv) - 1;
1154 			break;
1155 		}
1156 
1157 	/*
1158 	 * Get read timeout.
1159 	 */
1160 	case BIOCGRTIMEOUT:
1161 		{
1162 			struct timeval *tv = (struct timeval *)addr;
1163 
1164 			tv->tv_sec = d->bd_rtout / hz;
1165 			tv->tv_usec = (d->bd_rtout % hz) * tick;
1166 			break;
1167 		}
1168 
1169 	/*
1170 	 * Get packet stats.
1171 	 */
1172 	case BIOCGSTATS:
1173 		{
1174 			struct bpf_stat *bs = (struct bpf_stat *)addr;
1175 
1176 			/* XXXCSJP overflow */
1177 			bs->bs_recv = d->bd_rcount;
1178 			bs->bs_drop = d->bd_dcount;
1179 			break;
1180 		}
1181 
1182 	/*
1183 	 * Set immediate mode.
1184 	 */
1185 	case BIOCIMMEDIATE:
1186 		d->bd_immediate = *(u_int *)addr;
1187 		break;
1188 
1189 	case BIOCVERSION:
1190 		{
1191 			struct bpf_version *bv = (struct bpf_version *)addr;
1192 
1193 			bv->bv_major = BPF_MAJOR_VERSION;
1194 			bv->bv_minor = BPF_MINOR_VERSION;
1195 			break;
1196 		}
1197 
1198 	/*
1199 	 * Get "header already complete" flag
1200 	 */
1201 	case BIOCGHDRCMPLT:
1202 		*(u_int *)addr = d->bd_hdrcmplt;
1203 		break;
1204 
1205 	/*
1206 	 * Set "header already complete" flag
1207 	 */
1208 	case BIOCSHDRCMPLT:
1209 		d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1210 		break;
1211 
1212 	/*
1213 	 * Get packet direction flag
1214 	 */
1215 	case BIOCGDIRECTION:
1216 		*(u_int *)addr = d->bd_direction;
1217 		break;
1218 
1219 	/*
1220 	 * Set packet direction flag
1221 	 */
1222 	case BIOCSDIRECTION:
1223 		{
1224 			u_int	direction;
1225 
1226 			direction = *(u_int *)addr;
1227 			switch (direction) {
1228 			case BPF_D_IN:
1229 			case BPF_D_INOUT:
1230 			case BPF_D_OUT:
1231 				d->bd_direction = direction;
1232 				break;
1233 			default:
1234 				error = EINVAL;
1235 			}
1236 		}
1237 		break;
1238 
1239 	case BIOCFEEDBACK:
1240 		d->bd_feedback = *(u_int *)addr;
1241 		break;
1242 
1243 	case BIOCLOCK:
1244 		d->bd_locked = 1;
1245 		break;
1246 
1247 	case FIONBIO:		/* Non-blocking I/O */
1248 		break;
1249 
1250 	case FIOASYNC:		/* Send signal on receive packets */
1251 		d->bd_async = *(int *)addr;
1252 		break;
1253 
1254 	case FIOSETOWN:
1255 		error = fsetown(*(int *)addr, &d->bd_sigio);
1256 		break;
1257 
1258 	case FIOGETOWN:
1259 		*(int *)addr = fgetown(&d->bd_sigio);
1260 		break;
1261 
1262 	/* This is deprecated, FIOSETOWN should be used instead. */
1263 	case TIOCSPGRP:
1264 		error = fsetown(-(*(int *)addr), &d->bd_sigio);
1265 		break;
1266 
1267 	/* This is deprecated, FIOGETOWN should be used instead. */
1268 	case TIOCGPGRP:
1269 		*(int *)addr = -fgetown(&d->bd_sigio);
1270 		break;
1271 
1272 	case BIOCSRSIG:		/* Set receive signal */
1273 		{
1274 			u_int sig;
1275 
1276 			sig = *(u_int *)addr;
1277 
1278 			if (sig >= NSIG)
1279 				error = EINVAL;
1280 			else
1281 				d->bd_sig = sig;
1282 			break;
1283 		}
1284 	case BIOCGRSIG:
1285 		*(u_int *)addr = d->bd_sig;
1286 		break;
1287 
1288 	case BIOCGETBUFMODE:
1289 		*(u_int *)addr = d->bd_bufmode;
1290 		break;
1291 
1292 	case BIOCSETBUFMODE:
1293 		/*
1294 		 * Allow the buffering mode to be changed as long as we
1295 		 * haven't yet committed to a particular mode.  Our
1296 		 * definition of commitment, for now, is whether or not a
1297 		 * buffer has been allocated or an interface attached, since
1298 		 * that's the point where things get tricky.
1299 		 */
1300 		switch (*(u_int *)addr) {
1301 		case BPF_BUFMODE_BUFFER:
1302 			break;
1303 
1304 		case BPF_BUFMODE_ZBUF:
1305 			if (bpf_zerocopy_enable)
1306 				break;
1307 			/* FALLSTHROUGH */
1308 
1309 		default:
1310 			return (EINVAL);
1311 		}
1312 
1313 		BPFD_LOCK(d);
1314 		if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
1315 		    d->bd_fbuf != NULL || d->bd_bif != NULL) {
1316 			BPFD_UNLOCK(d);
1317 			return (EBUSY);
1318 		}
1319 		d->bd_bufmode = *(u_int *)addr;
1320 		BPFD_UNLOCK(d);
1321 		break;
1322 
1323 	case BIOCGETZMAX:
1324 		return (bpf_ioctl_getzmax(td, d, (size_t *)addr));
1325 
1326 	case BIOCSETZBUF:
1327 		return (bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr));
1328 
1329 	case BIOCROTZBUF:
1330 		return (bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr));
1331 	}
1332 	return (error);
1333 }
1334 
1335 /*
1336  * Set d's packet filter program to fp.  If this file already has a filter,
1337  * free it and replace it.  Returns EINVAL for bogus requests.
1338  */
1339 static int
1340 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1341 {
1342 	struct bpf_insn *fcode, *old;
1343 	u_int wfilter, flen, size;
1344 #ifdef BPF_JITTER
1345 	bpf_jit_filter *ofunc;
1346 #endif
1347 
1348 	if (cmd == BIOCSETWF) {
1349 		old = d->bd_wfilter;
1350 		wfilter = 1;
1351 #ifdef BPF_JITTER
1352 		ofunc = NULL;
1353 #endif
1354 	} else {
1355 		wfilter = 0;
1356 		old = d->bd_rfilter;
1357 #ifdef BPF_JITTER
1358 		ofunc = d->bd_bfilter;
1359 #endif
1360 	}
1361 	if (fp->bf_insns == NULL) {
1362 		if (fp->bf_len != 0)
1363 			return (EINVAL);
1364 		BPFD_LOCK(d);
1365 		if (wfilter)
1366 			d->bd_wfilter = NULL;
1367 		else {
1368 			d->bd_rfilter = NULL;
1369 #ifdef BPF_JITTER
1370 			d->bd_bfilter = NULL;
1371 #endif
1372 			if (cmd == BIOCSETF)
1373 				reset_d(d);
1374 		}
1375 		BPFD_UNLOCK(d);
1376 		if (old != NULL)
1377 			free((caddr_t)old, M_BPF);
1378 #ifdef BPF_JITTER
1379 		if (ofunc != NULL)
1380 			bpf_destroy_jit_filter(ofunc);
1381 #endif
1382 		return (0);
1383 	}
1384 	flen = fp->bf_len;
1385 	if (flen > bpf_maxinsns)
1386 		return (EINVAL);
1387 
1388 	size = flen * sizeof(*fp->bf_insns);
1389 	fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
1390 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
1391 	    bpf_validate(fcode, (int)flen)) {
1392 		BPFD_LOCK(d);
1393 		if (wfilter)
1394 			d->bd_wfilter = fcode;
1395 		else {
1396 			d->bd_rfilter = fcode;
1397 #ifdef BPF_JITTER
1398 			d->bd_bfilter = bpf_jitter(fcode, flen);
1399 #endif
1400 			if (cmd == BIOCSETF)
1401 				reset_d(d);
1402 		}
1403 		BPFD_UNLOCK(d);
1404 		if (old != NULL)
1405 			free((caddr_t)old, M_BPF);
1406 #ifdef BPF_JITTER
1407 		if (ofunc != NULL)
1408 			bpf_destroy_jit_filter(ofunc);
1409 #endif
1410 
1411 		return (0);
1412 	}
1413 	free((caddr_t)fcode, M_BPF);
1414 	return (EINVAL);
1415 }
1416 
1417 /*
1418  * Detach a file from its current interface (if attached at all) and attach
1419  * to the interface indicated by the name stored in ifr.
1420  * Return an errno or 0.
1421  */
1422 static int
1423 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1424 {
1425 	struct bpf_if *bp;
1426 	struct ifnet *theywant;
1427 
1428 	theywant = ifunit(ifr->ifr_name);
1429 	if (theywant == NULL || theywant->if_bpf == NULL)
1430 		return (ENXIO);
1431 
1432 	bp = theywant->if_bpf;
1433 
1434 	/*
1435 	 * Behavior here depends on the buffering model.  If we're using
1436 	 * kernel memory buffers, then we can allocate them here.  If we're
1437 	 * using zero-copy, then the user process must have registered
1438 	 * buffers by the time we get here.  If not, return an error.
1439 	 *
1440 	 * XXXRW: There are locking issues here with multi-threaded use: what
1441 	 * if two threads try to set the interface at once?
1442 	 */
1443 	switch (d->bd_bufmode) {
1444 	case BPF_BUFMODE_BUFFER:
1445 		if (d->bd_sbuf == NULL)
1446 			bpf_buffer_alloc(d);
1447 		KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
1448 		break;
1449 
1450 	case BPF_BUFMODE_ZBUF:
1451 		if (d->bd_sbuf == NULL)
1452 			return (EINVAL);
1453 		break;
1454 
1455 	default:
1456 		panic("bpf_setif: bufmode %d", d->bd_bufmode);
1457 	}
1458 	if (bp != d->bd_bif) {
1459 		if (d->bd_bif)
1460 			/*
1461 			 * Detach if attached to something else.
1462 			 */
1463 			bpf_detachd(d);
1464 
1465 		bpf_attachd(d, bp);
1466 	}
1467 	BPFD_LOCK(d);
1468 	reset_d(d);
1469 	BPFD_UNLOCK(d);
1470 	return (0);
1471 }
1472 
1473 /*
1474  * Support for select() and poll() system calls
1475  *
1476  * Return true iff the specific operation will not block indefinitely.
1477  * Otherwise, return false but make a note that a selwakeup() must be done.
1478  */
1479 static int
1480 bpfpoll(struct cdev *dev, int events, struct thread *td)
1481 {
1482 	struct bpf_d *d;
1483 	int revents;
1484 
1485 	d = dev->si_drv1;
1486 	if (d->bd_bif == NULL)
1487 		return (ENXIO);
1488 
1489 	/*
1490 	 * Refresh PID associated with this descriptor.
1491 	 */
1492 	revents = events & (POLLOUT | POLLWRNORM);
1493 	BPFD_LOCK(d);
1494 	d->bd_pid = td->td_proc->p_pid;
1495 	if (events & (POLLIN | POLLRDNORM)) {
1496 		if (bpf_ready(d))
1497 			revents |= events & (POLLIN | POLLRDNORM);
1498 		else {
1499 			selrecord(td, &d->bd_sel);
1500 			/* Start the read timeout if necessary. */
1501 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1502 				callout_reset(&d->bd_callout, d->bd_rtout,
1503 				    bpf_timed_out, d);
1504 				d->bd_state = BPF_WAITING;
1505 			}
1506 		}
1507 	}
1508 	BPFD_UNLOCK(d);
1509 	return (revents);
1510 }
1511 
1512 /*
1513  * Support for kevent() system call.  Register EVFILT_READ filters and
1514  * reject all others.
1515  */
1516 int
1517 bpfkqfilter(struct cdev *dev, struct knote *kn)
1518 {
1519 	struct bpf_d *d = (struct bpf_d *)dev->si_drv1;
1520 
1521 	if (kn->kn_filter != EVFILT_READ)
1522 		return (1);
1523 
1524 	/*
1525 	 * Refresh PID associated with this descriptor.
1526 	 */
1527 	BPFD_LOCK(d);
1528 	d->bd_pid = curthread->td_proc->p_pid;
1529 	kn->kn_fop = &bpfread_filtops;
1530 	kn->kn_hook = d;
1531 	knlist_add(&d->bd_sel.si_note, kn, 1);
1532 	BPFD_UNLOCK(d);
1533 
1534 	return (0);
1535 }
1536 
1537 static void
1538 filt_bpfdetach(struct knote *kn)
1539 {
1540 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1541 
1542 	knlist_remove(&d->bd_sel.si_note, kn, 0);
1543 }
1544 
1545 static int
1546 filt_bpfread(struct knote *kn, long hint)
1547 {
1548 	struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
1549 	int ready;
1550 
1551 	BPFD_LOCK_ASSERT(d);
1552 	ready = bpf_ready(d);
1553 	if (ready) {
1554 		kn->kn_data = d->bd_slen;
1555 		if (d->bd_hbuf)
1556 			kn->kn_data += d->bd_hlen;
1557 	}
1558 	else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1559 		callout_reset(&d->bd_callout, d->bd_rtout,
1560 		    bpf_timed_out, d);
1561 		d->bd_state = BPF_WAITING;
1562 	}
1563 
1564 	return (ready);
1565 }
1566 
1567 /*
1568  * Incoming linkage from device drivers.  Process the packet pkt, of length
1569  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
1570  * by each process' filter, and if accepted, stashed into the corresponding
1571  * buffer.
1572  */
1573 void
1574 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
1575 {
1576 	struct bpf_d *d;
1577 	u_int slen;
1578 	int gottime;
1579 	struct timeval tv;
1580 
1581 	gottime = 0;
1582 	BPFIF_LOCK(bp);
1583 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1584 		BPFD_LOCK(d);
1585 		++d->bd_rcount;
1586 		/*
1587 		 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
1588 		 * way for the caller to indiciate to us whether this packet
1589 		 * is inbound or outbound.  In the bpf_mtap() routines, we use
1590 		 * the interface pointers on the mbuf to figure it out.
1591 		 */
1592 #ifdef BPF_JITTER
1593 		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL)
1594 			slen = (*(d->bd_bfilter->func))(pkt, pktlen, pktlen);
1595 		else
1596 #endif
1597 		slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
1598 		if (slen != 0) {
1599 			d->bd_fcount++;
1600 			if (!gottime) {
1601 				microtime(&tv);
1602 				gottime = 1;
1603 			}
1604 #ifdef MAC
1605 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1606 #endif
1607 				catchpacket(d, pkt, pktlen, slen,
1608 				    bpf_append_bytes, &tv);
1609 		}
1610 		BPFD_UNLOCK(d);
1611 	}
1612 	BPFIF_UNLOCK(bp);
1613 }
1614 
1615 #define	BPF_CHECK_DIRECTION(d, r, i)				\
1616 	    (((d)->bd_direction == BPF_D_IN && (r) != (i)) ||	\
1617 	    ((d)->bd_direction == BPF_D_OUT && (r) == (i)))
1618 
1619 /*
1620  * Incoming linkage from device drivers, when packet is in an mbuf chain.
1621  */
1622 void
1623 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
1624 {
1625 	struct bpf_d *d;
1626 	u_int pktlen, slen;
1627 	int gottime;
1628 	struct timeval tv;
1629 
1630 	/* Skip outgoing duplicate packets. */
1631 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1632 		m->m_flags &= ~M_PROMISC;
1633 		return;
1634 	}
1635 
1636 	gottime = 0;
1637 
1638 	pktlen = m_length(m, NULL);
1639 
1640 	BPFIF_LOCK(bp);
1641 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1642 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1643 			continue;
1644 		BPFD_LOCK(d);
1645 		++d->bd_rcount;
1646 #ifdef BPF_JITTER
1647 		/* XXX We cannot handle multiple mbufs. */
1648 		if (bpf_jitter_enable != 0 && d->bd_bfilter != NULL &&
1649 		    m->m_next == NULL)
1650 			slen = (*(d->bd_bfilter->func))(mtod(m, u_char *),
1651 			    pktlen, pktlen);
1652 		else
1653 #endif
1654 		slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
1655 		if (slen != 0) {
1656 			d->bd_fcount++;
1657 			if (!gottime) {
1658 				microtime(&tv);
1659 				gottime = 1;
1660 			}
1661 #ifdef MAC
1662 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1663 #endif
1664 				catchpacket(d, (u_char *)m, pktlen, slen,
1665 				    bpf_append_mbuf, &tv);
1666 		}
1667 		BPFD_UNLOCK(d);
1668 	}
1669 	BPFIF_UNLOCK(bp);
1670 }
1671 
1672 /*
1673  * Incoming linkage from device drivers, when packet is in
1674  * an mbuf chain and to be prepended by a contiguous header.
1675  */
1676 void
1677 bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
1678 {
1679 	struct mbuf mb;
1680 	struct bpf_d *d;
1681 	u_int pktlen, slen;
1682 	int gottime;
1683 	struct timeval tv;
1684 
1685 	/* Skip outgoing duplicate packets. */
1686 	if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
1687 		m->m_flags &= ~M_PROMISC;
1688 		return;
1689 	}
1690 
1691 	gottime = 0;
1692 
1693 	pktlen = m_length(m, NULL);
1694 	/*
1695 	 * Craft on-stack mbuf suitable for passing to bpf_filter.
1696 	 * Note that we cut corners here; we only setup what's
1697 	 * absolutely needed--this mbuf should never go anywhere else.
1698 	 */
1699 	mb.m_next = m;
1700 	mb.m_data = data;
1701 	mb.m_len = dlen;
1702 	pktlen += dlen;
1703 
1704 	BPFIF_LOCK(bp);
1705 	LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1706 		if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
1707 			continue;
1708 		BPFD_LOCK(d);
1709 		++d->bd_rcount;
1710 		slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
1711 		if (slen != 0) {
1712 			d->bd_fcount++;
1713 			if (!gottime) {
1714 				microtime(&tv);
1715 				gottime = 1;
1716 			}
1717 #ifdef MAC
1718 			if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
1719 #endif
1720 				catchpacket(d, (u_char *)&mb, pktlen, slen,
1721 				    bpf_append_mbuf, &tv);
1722 		}
1723 		BPFD_UNLOCK(d);
1724 	}
1725 	BPFIF_UNLOCK(bp);
1726 }
1727 
1728 #undef	BPF_CHECK_DIRECTION
1729 
1730 /*
1731  * Move the packet data from interface memory (pkt) into the
1732  * store buffer.  "cpfn" is the routine called to do the actual data
1733  * transfer.  bcopy is passed in to copy contiguous chunks, while
1734  * bpf_append_mbuf is passed in to copy mbuf chains.  In the latter case,
1735  * pkt is really an mbuf.
1736  */
1737 static void
1738 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
1739     void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
1740     struct timeval *tv)
1741 {
1742 	struct bpf_hdr hdr;
1743 	int totlen, curlen;
1744 	int hdrlen = d->bd_bif->bif_hdrlen;
1745 	int do_wakeup = 0;
1746 
1747 	BPFD_LOCK_ASSERT(d);
1748 
1749 	/*
1750 	 * Detect whether user space has released a buffer back to us, and if
1751 	 * so, move it from being a hold buffer to a free buffer.  This may
1752 	 * not be the best place to do it (for example, we might only want to
1753 	 * run this check if we need the space), but for now it's a reliable
1754 	 * spot to do it.
1755 	 */
1756 	if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
1757 		d->bd_fbuf = d->bd_hbuf;
1758 		d->bd_hbuf = NULL;
1759 		d->bd_hlen = 0;
1760 		bpf_buf_reclaimed(d);
1761 	}
1762 
1763 	/*
1764 	 * Figure out how many bytes to move.  If the packet is
1765 	 * greater or equal to the snapshot length, transfer that
1766 	 * much.  Otherwise, transfer the whole packet (unless
1767 	 * we hit the buffer size limit).
1768 	 */
1769 	totlen = hdrlen + min(snaplen, pktlen);
1770 	if (totlen > d->bd_bufsize)
1771 		totlen = d->bd_bufsize;
1772 
1773 	/*
1774 	 * Round up the end of the previous packet to the next longword.
1775 	 *
1776 	 * Drop the packet if there's no room and no hope of room
1777 	 * If the packet would overflow the storage buffer or the storage
1778 	 * buffer is considered immutable by the buffer model, try to rotate
1779 	 * the buffer and wakeup pending processes.
1780 	 */
1781 	curlen = BPF_WORDALIGN(d->bd_slen);
1782 	if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
1783 		if (d->bd_fbuf == NULL) {
1784 			/*
1785 			 * There's no room in the store buffer, and no
1786 			 * prospect of room, so drop the packet.  Notify the
1787 			 * buffer model.
1788 			 */
1789 			bpf_buffull(d);
1790 			++d->bd_dcount;
1791 			return;
1792 		}
1793 		ROTATE_BUFFERS(d);
1794 		do_wakeup = 1;
1795 		curlen = 0;
1796 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
1797 		/*
1798 		 * Immediate mode is set, or the read timeout has already
1799 		 * expired during a select call.  A packet arrived, so the
1800 		 * reader should be woken up.
1801 		 */
1802 		do_wakeup = 1;
1803 
1804 	/*
1805 	 * Append the bpf header.  Note we append the actual header size, but
1806 	 * move forward the length of the header plus padding.
1807 	 */
1808 	bzero(&hdr, sizeof(hdr));
1809 	hdr.bh_tstamp = *tv;
1810 	hdr.bh_datalen = pktlen;
1811 	hdr.bh_hdrlen = hdrlen;
1812 	hdr.bh_caplen = totlen - hdrlen;
1813 	bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
1814 
1815 	/*
1816 	 * Copy the packet data into the store buffer and update its length.
1817 	 */
1818 	(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, hdr.bh_caplen);
1819 	d->bd_slen = curlen + totlen;
1820 
1821 	if (do_wakeup)
1822 		bpf_wakeup(d);
1823 }
1824 
1825 /*
1826  * Free buffers currently in use by a descriptor.
1827  * Called on close.
1828  */
1829 static void
1830 bpf_freed(struct bpf_d *d)
1831 {
1832 
1833 	/*
1834 	 * We don't need to lock out interrupts since this descriptor has
1835 	 * been detached from its interface and it yet hasn't been marked
1836 	 * free.
1837 	 */
1838 	bpf_free(d);
1839 	if (d->bd_rfilter) {
1840 		free((caddr_t)d->bd_rfilter, M_BPF);
1841 #ifdef BPF_JITTER
1842 		bpf_destroy_jit_filter(d->bd_bfilter);
1843 #endif
1844 	}
1845 	if (d->bd_wfilter)
1846 		free((caddr_t)d->bd_wfilter, M_BPF);
1847 	mtx_destroy(&d->bd_mtx);
1848 }
1849 
1850 /*
1851  * Attach an interface to bpf.  dlt is the link layer type; hdrlen is the
1852  * fixed size of the link header (variable length headers not yet supported).
1853  */
1854 void
1855 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
1856 {
1857 
1858 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
1859 }
1860 
1861 /*
1862  * Attach an interface to bpf.  ifp is a pointer to the structure
1863  * defining the interface to be attached, dlt is the link layer type,
1864  * and hdrlen is the fixed size of the link header (variable length
1865  * headers are not yet supporrted).
1866  */
1867 void
1868 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
1869 {
1870 	struct bpf_if *bp;
1871 
1872 	bp = malloc(sizeof(*bp), M_BPF, M_NOWAIT | M_ZERO);
1873 	if (bp == NULL)
1874 		panic("bpfattach");
1875 
1876 	LIST_INIT(&bp->bif_dlist);
1877 	bp->bif_ifp = ifp;
1878 	bp->bif_dlt = dlt;
1879 	mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
1880 	KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
1881 	*driverp = bp;
1882 
1883 	mtx_lock(&bpf_mtx);
1884 	LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
1885 	mtx_unlock(&bpf_mtx);
1886 
1887 	/*
1888 	 * Compute the length of the bpf header.  This is not necessarily
1889 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1890 	 * that the network layer header begins on a longword boundary (for
1891 	 * performance reasons and to alleviate alignment restrictions).
1892 	 */
1893 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1894 
1895 	if (bootverbose)
1896 		if_printf(ifp, "bpf attached\n");
1897 }
1898 
1899 /*
1900  * Detach bpf from an interface.  This involves detaching each descriptor
1901  * associated with the interface, and leaving bd_bif NULL.  Notify each
1902  * descriptor as it's detached so that any sleepers wake up and get
1903  * ENXIO.
1904  */
1905 void
1906 bpfdetach(struct ifnet *ifp)
1907 {
1908 	struct bpf_if	*bp;
1909 	struct bpf_d	*d;
1910 
1911 	/* Locate BPF interface information */
1912 	mtx_lock(&bpf_mtx);
1913 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1914 		if (ifp == bp->bif_ifp)
1915 			break;
1916 	}
1917 
1918 	/* Interface wasn't attached */
1919 	if ((bp == NULL) || (bp->bif_ifp == NULL)) {
1920 		mtx_unlock(&bpf_mtx);
1921 		printf("bpfdetach: %s was not attached\n", ifp->if_xname);
1922 		return;
1923 	}
1924 
1925 	LIST_REMOVE(bp, bif_next);
1926 	mtx_unlock(&bpf_mtx);
1927 
1928 	while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
1929 		bpf_detachd(d);
1930 		BPFD_LOCK(d);
1931 		bpf_wakeup(d);
1932 		BPFD_UNLOCK(d);
1933 	}
1934 
1935 	mtx_destroy(&bp->bif_mtx);
1936 	free(bp, M_BPF);
1937 }
1938 
1939 /*
1940  * Get a list of available data link type of the interface.
1941  */
1942 static int
1943 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1944 {
1945 	int n, error;
1946 	struct ifnet *ifp;
1947 	struct bpf_if *bp;
1948 
1949 	ifp = d->bd_bif->bif_ifp;
1950 	n = 0;
1951 	error = 0;
1952 	mtx_lock(&bpf_mtx);
1953 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1954 		if (bp->bif_ifp != ifp)
1955 			continue;
1956 		if (bfl->bfl_list != NULL) {
1957 			if (n >= bfl->bfl_len) {
1958 				mtx_unlock(&bpf_mtx);
1959 				return (ENOMEM);
1960 			}
1961 			error = copyout(&bp->bif_dlt,
1962 			    bfl->bfl_list + n, sizeof(u_int));
1963 		}
1964 		n++;
1965 	}
1966 	mtx_unlock(&bpf_mtx);
1967 	bfl->bfl_len = n;
1968 	return (error);
1969 }
1970 
1971 /*
1972  * Set the data link type of a BPF instance.
1973  */
1974 static int
1975 bpf_setdlt(struct bpf_d *d, u_int dlt)
1976 {
1977 	int error, opromisc;
1978 	struct ifnet *ifp;
1979 	struct bpf_if *bp;
1980 
1981 	if (d->bd_bif->bif_dlt == dlt)
1982 		return (0);
1983 	ifp = d->bd_bif->bif_ifp;
1984 	mtx_lock(&bpf_mtx);
1985 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
1986 		if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
1987 			break;
1988 	}
1989 	mtx_unlock(&bpf_mtx);
1990 	if (bp != NULL) {
1991 		opromisc = d->bd_promisc;
1992 		bpf_detachd(d);
1993 		bpf_attachd(d, bp);
1994 		BPFD_LOCK(d);
1995 		reset_d(d);
1996 		BPFD_UNLOCK(d);
1997 		if (opromisc) {
1998 			error = ifpromisc(bp->bif_ifp, 1);
1999 			if (error)
2000 				if_printf(bp->bif_ifp,
2001 					"bpf_setdlt: ifpromisc failed (%d)\n",
2002 					error);
2003 			else
2004 				d->bd_promisc = 1;
2005 		}
2006 	}
2007 	return (bp == NULL ? EINVAL : 0);
2008 }
2009 
2010 static void
2011 bpf_clone(void *arg, struct ucred *cred, char *name, int namelen,
2012     struct cdev **dev)
2013 {
2014 	int u;
2015 
2016 	if (*dev != NULL)
2017 		return;
2018 	if (dev_stdclone(name, NULL, "bpf", &u) != 1)
2019 		return;
2020 	*dev = make_dev(&bpf_cdevsw, unit2minor(u), UID_ROOT, GID_WHEEL, 0600,
2021 	    "bpf%d", u);
2022 	dev_ref(*dev);
2023 	(*dev)->si_flags |= SI_CHEAPCLONE;
2024 	return;
2025 }
2026 
2027 static void
2028 bpf_drvinit(void *unused)
2029 {
2030 
2031 	mtx_init(&bpf_mtx, "bpf global lock", NULL, MTX_DEF);
2032 	LIST_INIT(&bpf_iflist);
2033 	EVENTHANDLER_REGISTER(dev_clone, bpf_clone, 0, 1000);
2034 }
2035 
2036 static void
2037 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
2038 {
2039 
2040 	bzero(d, sizeof(*d));
2041 	BPFD_LOCK_ASSERT(bd);
2042 	d->bd_structsize = sizeof(*d);
2043 	d->bd_immediate = bd->bd_immediate;
2044 	d->bd_promisc = bd->bd_promisc;
2045 	d->bd_hdrcmplt = bd->bd_hdrcmplt;
2046 	d->bd_direction = bd->bd_direction;
2047 	d->bd_feedback = bd->bd_feedback;
2048 	d->bd_async = bd->bd_async;
2049 	d->bd_rcount = bd->bd_rcount;
2050 	d->bd_dcount = bd->bd_dcount;
2051 	d->bd_fcount = bd->bd_fcount;
2052 	d->bd_sig = bd->bd_sig;
2053 	d->bd_slen = bd->bd_slen;
2054 	d->bd_hlen = bd->bd_hlen;
2055 	d->bd_bufsize = bd->bd_bufsize;
2056 	d->bd_pid = bd->bd_pid;
2057 	strlcpy(d->bd_ifname,
2058 	    bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
2059 	d->bd_locked = bd->bd_locked;
2060 	d->bd_wcount = bd->bd_wcount;
2061 	d->bd_wdcount = bd->bd_wdcount;
2062 	d->bd_wfcount = bd->bd_wfcount;
2063 	d->bd_zcopy = bd->bd_zcopy;
2064 	d->bd_bufmode = bd->bd_bufmode;
2065 }
2066 
2067 static int
2068 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
2069 {
2070 	struct xbpf_d *xbdbuf, *xbd;
2071 	int index, error;
2072 	struct bpf_if *bp;
2073 	struct bpf_d *bd;
2074 
2075 	/*
2076 	 * XXX This is not technically correct. It is possible for non
2077 	 * privileged users to open bpf devices. It would make sense
2078 	 * if the users who opened the devices were able to retrieve
2079 	 * the statistics for them, too.
2080 	 */
2081 	error = priv_check(req->td, PRIV_NET_BPF);
2082 	if (error)
2083 		return (error);
2084 	if (req->oldptr == NULL)
2085 		return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
2086 	if (bpf_bpfd_cnt == 0)
2087 		return (SYSCTL_OUT(req, 0, 0));
2088 	xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
2089 	mtx_lock(&bpf_mtx);
2090 	if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
2091 		mtx_unlock(&bpf_mtx);
2092 		free(xbdbuf, M_BPF);
2093 		return (ENOMEM);
2094 	}
2095 	index = 0;
2096 	LIST_FOREACH(bp, &bpf_iflist, bif_next) {
2097 		BPFIF_LOCK(bp);
2098 		LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
2099 			xbd = &xbdbuf[index++];
2100 			BPFD_LOCK(bd);
2101 			bpfstats_fill_xbpf(xbd, bd);
2102 			BPFD_UNLOCK(bd);
2103 		}
2104 		BPFIF_UNLOCK(bp);
2105 	}
2106 	mtx_unlock(&bpf_mtx);
2107 	error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
2108 	free(xbdbuf, M_BPF);
2109 	return (error);
2110 }
2111 
2112 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
2113 
2114 #else /* !DEV_BPF && !NETGRAPH_BPF */
2115 /*
2116  * NOP stubs to allow bpf-using drivers to load and function.
2117  *
2118  * A 'better' implementation would allow the core bpf functionality
2119  * to be loaded at runtime.
2120  */
2121 static struct bpf_if bp_null;
2122 
2123 void
2124 bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
2125 {
2126 }
2127 
2128 void
2129 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
2130 {
2131 }
2132 
2133 void
2134 bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
2135 {
2136 }
2137 
2138 void
2139 bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
2140 {
2141 
2142 	bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
2143 }
2144 
2145 void
2146 bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
2147 {
2148 
2149 	*driverp = &bp_null;
2150 }
2151 
2152 void
2153 bpfdetach(struct ifnet *ifp)
2154 {
2155 }
2156 
2157 u_int
2158 bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
2159 {
2160 	return -1;	/* "no filter" behaviour */
2161 }
2162 
2163 int
2164 bpf_validate(const struct bpf_insn *f, int len)
2165 {
2166 	return 0;		/* false */
2167 }
2168 
2169 #endif /* !DEV_BPF && !NETGRAPH_BPF */
2170